read.go

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package build
     6  
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"errors"
    11  	"fmt"
    12  	"go/ast"
    13  	"go/parser"
    14  	"go/token"
    15  	"io"
    16  	"strconv"
    17  	"strings"
    18  	"unicode"
    19  	"unicode/utf8"
    20  )
    21  
    22  type importReader struct {
    23  	b    *bufio.Reader
    24  	buf  []byte
    25  	peek byte
    26  	err  error
    27  	eof  bool
    28  	nerr int
    29  	pos  token.Position
    30  }
    31  
    32  var bom = []byte{0xef, 0xbb, 0xbf}
    33  
    34  func newImportReader(name string, r io.Reader) *importReader {
    35  	b := bufio.NewReader(r)
    36  	// Remove leading UTF-8 BOM.
    37  	// Per https://golang.org/ref/spec#Source_code_representation:
    38  	// a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
    39  	// if it is the first Unicode code point in the source text.
    40  	if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) {
    41  		b.Discard(3)
    42  	}
    43  	return &importReader{
    44  		b: b,
    45  		pos: token.Position{
    46  			Filename: name,
    47  			Line:     1,
    48  			Column:   1,
    49  		},
    50  	}
    51  }
    52  
    53  func isIdent(c byte) bool {
    54  	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf
    55  }
    56  
    57  var (
    58  	errSyntax = errors.New("syntax error")
    59  	errNUL    = errors.New("unexpected NUL in input")
    60  )
    61  
    62  // syntaxError records a syntax error, but only if an I/O error has not already been recorded.
    63  func (r *importReader) syntaxError() {
    64  	if r.err == nil {
    65  		r.err = errSyntax
    66  	}
    67  }
    68  
    69  // readByte reads the next byte from the input, saves it in buf, and returns it.
    70  // If an error occurs, readByte records the error in r.err and returns 0.
    71  func (r *importReader) readByte() byte {
    72  	c, err := r.b.ReadByte()
    73  	if err == nil {
    74  		r.buf = append(r.buf, c)
    75  		if c == 0 {
    76  			err = errNUL
    77  		}
    78  	}
    79  	if err != nil {
    80  		if err == io.EOF {
    81  			r.eof = true
    82  		} else if r.err == nil {
    83  			r.err = err
    84  		}
    85  		c = 0
    86  	}
    87  	return c
    88  }
    89  
    90  // readByteNoBuf is like readByte but doesn't buffer the byte.
    91  // It exhausts r.buf before reading from r.b.
    92  func (r *importReader) readByteNoBuf() byte {
    93  	var c byte
    94  	var err error
    95  	if len(r.buf) > 0 {
    96  		c = r.buf[0]
    97  		r.buf = r.buf[1:]
    98  	} else {
    99  		c, err = r.b.ReadByte()
   100  		if err == nil && c == 0 {
   101  			err = errNUL
   102  		}
   103  	}
   104  
   105  	if err != nil {
   106  		if err == io.EOF {
   107  			r.eof = true
   108  		} else if r.err == nil {
   109  			r.err = err
   110  		}
   111  		return 0
   112  	}
   113  	r.pos.Offset++
   114  	if c == '\n' {
   115  		r.pos.Line++
   116  		r.pos.Column = 1
   117  	} else {
   118  		r.pos.Column++
   119  	}
   120  	return c
   121  }
   122  
   123  // peekByte returns the next byte from the input reader but does not advance beyond it.
   124  // If skipSpace is set, peekByte skips leading spaces and comments.
   125  func (r *importReader) peekByte(skipSpace bool) byte {
   126  	if r.err != nil {
   127  		if r.nerr++; r.nerr > 10000 {
   128  			panic("go/build: import reader looping")
   129  		}
   130  		return 0
   131  	}
   132  
   133  	// Use r.peek as first input byte.
   134  	// Don't just return r.peek here: it might have been left by peekByte(false)
   135  	// and this might be peekByte(true).
   136  	c := r.peek
   137  	if c == 0 {
   138  		c = r.readByte()
   139  	}
   140  	for r.err == nil && !r.eof {
   141  		if skipSpace {
   142  			// For the purposes of this reader, semicolons are never necessary to
   143  			// understand the input and are treated as spaces.
   144  			switch c {
   145  			case ' ', '\f', '\t', '\r', '\n', ';':
   146  				c = r.readByte()
   147  				continue
   148  
   149  			case '/':
   150  				c = r.readByte()
   151  				if c == '/' {
   152  					for c != '\n' && r.err == nil && !r.eof {
   153  						c = r.readByte()
   154  					}
   155  				} else if c == '*' {
   156  					var c1 byte
   157  					for (c != '*' || c1 != '/') && r.err == nil {
   158  						if r.eof {
   159  							r.syntaxError()
   160  						}
   161  						c, c1 = c1, r.readByte()
   162  					}
   163  				} else {
   164  					r.syntaxError()
   165  				}
   166  				c = r.readByte()
   167  				continue
   168  			}
   169  		}
   170  		break
   171  	}
   172  	r.peek = c
   173  	return r.peek
   174  }
   175  
   176  // nextByte is like peekByte but advances beyond the returned byte.
   177  func (r *importReader) nextByte(skipSpace bool) byte {
   178  	c := r.peekByte(skipSpace)
   179  	r.peek = 0
   180  	return c
   181  }
   182  
   183  var goEmbed = []byte("go:embed")
   184  
   185  // findEmbed advances the input reader to the next //go:embed comment.
   186  // It reports whether it found a comment.
   187  // (Otherwise it found an error or EOF.)
   188  func (r *importReader) findEmbed(first bool) bool {
   189  	// The import block scan stopped after a non-space character,
   190  	// so the reader is not at the start of a line on the first call.
   191  	// After that, each //go:embed extraction leaves the reader
   192  	// at the end of a line.
   193  	startLine := !first
   194  	var c byte
   195  	for r.err == nil && !r.eof {
   196  		c = r.readByteNoBuf()
   197  	Reswitch:
   198  		switch c {
   199  		default:
   200  			startLine = false
   201  
   202  		case '\n':
   203  			startLine = true
   204  
   205  		case ' ', '\t':
   206  			// leave startLine alone
   207  
   208  		case '"':
   209  			startLine = false
   210  			for r.err == nil {
   211  				if r.eof {
   212  					r.syntaxError()
   213  				}
   214  				c = r.readByteNoBuf()
   215  				if c == '\\' {
   216  					r.readByteNoBuf()
   217  					if r.err != nil {
   218  						r.syntaxError()
   219  						return false
   220  					}
   221  					continue
   222  				}
   223  				if c == '"' {
   224  					c = r.readByteNoBuf()
   225  					goto Reswitch
   226  				}
   227  			}
   228  			goto Reswitch
   229  
   230  		case '`':
   231  			startLine = false
   232  			for r.err == nil {
   233  				if r.eof {
   234  					r.syntaxError()
   235  				}
   236  				c = r.readByteNoBuf()
   237  				if c == '`' {
   238  					c = r.readByteNoBuf()
   239  					goto Reswitch
   240  				}
   241  			}
   242  
   243  		case '\'':
   244  			startLine = false
   245  			for r.err == nil {
   246  				if r.eof {
   247  					r.syntaxError()
   248  				}
   249  				c = r.readByteNoBuf()
   250  				if c == '\\' {
   251  					r.readByteNoBuf()
   252  					if r.err != nil {
   253  						r.syntaxError()
   254  						return false
   255  					}
   256  					continue
   257  				}
   258  				if c == '\'' {
   259  					c = r.readByteNoBuf()
   260  					goto Reswitch
   261  				}
   262  			}
   263  
   264  		case '/':
   265  			c = r.readByteNoBuf()
   266  			switch c {
   267  			default:
   268  				startLine = false
   269  				goto Reswitch
   270  
   271  			case '*':
   272  				var c1 byte
   273  				for (c != '*' || c1 != '/') && r.err == nil {
   274  					if r.eof {
   275  						r.syntaxError()
   276  					}
   277  					c, c1 = c1, r.readByteNoBuf()
   278  				}
   279  				startLine = false
   280  
   281  			case '/':
   282  				if startLine {
   283  					// Try to read this as a //go:embed comment.
   284  					for i := range goEmbed {
   285  						c = r.readByteNoBuf()
   286  						if c != goEmbed[i] {
   287  							goto SkipSlashSlash
   288  						}
   289  					}
   290  					c = r.readByteNoBuf()
   291  					if c == ' ' || c == '\t' {
   292  						// Found one!
   293  						return true
   294  					}
   295  				}
   296  			SkipSlashSlash:
   297  				for c != '\n' && r.err == nil && !r.eof {
   298  					c = r.readByteNoBuf()
   299  				}
   300  				startLine = true
   301  			}
   302  		}
   303  	}
   304  	return false
   305  }
   306  
   307  // readKeyword reads the given keyword from the input.
   308  // If the keyword is not present, readKeyword records a syntax error.
   309  func (r *importReader) readKeyword(kw string) {
   310  	r.peekByte(true)
   311  	for i := 0; i < len(kw); i++ {
   312  		if r.nextByte(false) != kw[i] {
   313  			r.syntaxError()
   314  			return
   315  		}
   316  	}
   317  	if isIdent(r.peekByte(false)) {
   318  		r.syntaxError()
   319  	}
   320  }
   321  
   322  // readIdent reads an identifier from the input.
   323  // If an identifier is not present, readIdent records a syntax error.
   324  func (r *importReader) readIdent() {
   325  	c := r.peekByte(true)
   326  	if !isIdent(c) {
   327  		r.syntaxError()
   328  		return
   329  	}
   330  	for isIdent(r.peekByte(false)) {
   331  		r.peek = 0
   332  	}
   333  }
   334  
   335  // readString reads a quoted string literal from the input.
   336  // If an identifier is not present, readString records a syntax error.
   337  func (r *importReader) readString() {
   338  	switch r.nextByte(true) {
   339  	case '`':
   340  		for r.err == nil {
   341  			if r.nextByte(false) == '`' {
   342  				break
   343  			}
   344  			if r.eof {
   345  				r.syntaxError()
   346  			}
   347  		}
   348  	case '"':
   349  		for r.err == nil {
   350  			c := r.nextByte(false)
   351  			if c == '"' {
   352  				break
   353  			}
   354  			if r.eof || c == '\n' {
   355  				r.syntaxError()
   356  			}
   357  			if c == '\\' {
   358  				r.nextByte(false)
   359  			}
   360  		}
   361  	default:
   362  		r.syntaxError()
   363  	}
   364  }
   365  
   366  // readImport reads an import clause - optional identifier followed by quoted string -
   367  // from the input.
   368  func (r *importReader) readImport() {
   369  	c := r.peekByte(true)
   370  	if c == '.' {
   371  		r.peek = 0
   372  	} else if isIdent(c) {
   373  		r.readIdent()
   374  	}
   375  	r.readString()
   376  }
   377  
   378  // readComments is like io.ReadAll, except that it only reads the leading
   379  // block of comments in the file.
   380  func readComments(f io.Reader) ([]byte, error) {
   381  	r := newImportReader("", f)
   382  	r.peekByte(true)
   383  	if r.err == nil && !r.eof {
   384  		// Didn't reach EOF, so must have found a non-space byte. Remove it.
   385  		r.buf = r.buf[:len(r.buf)-1]
   386  	}
   387  	return r.buf, r.err
   388  }
   389  
   390  // readGoInfo expects a Go file as input and reads the file up to and including the import section.
   391  // It records what it learned in *info.
   392  // If info.fset is non-nil, readGoInfo parses the file and sets info.parsed, info.parseErr,
   393  // info.imports, info.embeds, and info.embedErr.
   394  //
   395  // It only returns an error if there are problems reading the file,
   396  // not for syntax errors in the file itself.
   397  func readGoInfo(f io.Reader, info *fileInfo) error {
   398  	r := newImportReader(info.name, f)
   399  
   400  	r.readKeyword("package")
   401  	r.readIdent()
   402  	for r.peekByte(true) == 'i' {
   403  		r.readKeyword("import")
   404  		if r.peekByte(true) == '(' {
   405  			r.nextByte(false)
   406  			for r.peekByte(true) != ')' && r.err == nil {
   407  				r.readImport()
   408  			}
   409  			r.nextByte(false)
   410  		} else {
   411  			r.readImport()
   412  		}
   413  	}
   414  
   415  	info.header = r.buf
   416  
   417  	// If we stopped successfully before EOF, we read a byte that told us we were done.
   418  	// Return all but that last byte, which would cause a syntax error if we let it through.
   419  	if r.err == nil && !r.eof {
   420  		info.header = r.buf[:len(r.buf)-1]
   421  	}
   422  
   423  	// If we stopped for a syntax error, consume the whole file so that
   424  	// we are sure we don't change the errors that go/parser returns.
   425  	if r.err == errSyntax {
   426  		r.err = nil
   427  		for r.err == nil && !r.eof {
   428  			r.readByte()
   429  		}
   430  		info.header = r.buf
   431  	}
   432  	if r.err != nil {
   433  		return r.err
   434  	}
   435  
   436  	if info.fset == nil {
   437  		return nil
   438  	}
   439  
   440  	// Parse file header & record imports.
   441  	info.parsed, info.parseErr = parser.ParseFile(info.fset, info.name, info.header, parser.ImportsOnly|parser.ParseComments)
   442  	if info.parseErr != nil {
   443  		return nil
   444  	}
   445  
   446  	hasEmbed := false
   447  	for _, decl := range info.parsed.Decls {
   448  		d, ok := decl.(*ast.GenDecl)
   449  		if !ok {
   450  			continue
   451  		}
   452  		for _, dspec := range d.Specs {
   453  			spec, ok := dspec.(*ast.ImportSpec)
   454  			if !ok {
   455  				continue
   456  			}
   457  			quoted := spec.Path.Value
   458  			path, err := strconv.Unquote(quoted)
   459  			if err != nil {
   460  				return fmt.Errorf("parser returned invalid quoted string: <%s>", quoted)
   461  			}
   462  			if path == "embed" {
   463  				hasEmbed = true
   464  			}
   465  
   466  			doc := spec.Doc
   467  			if doc == nil && len(d.Specs) == 1 {
   468  				doc = d.Doc
   469  			}
   470  			info.imports = append(info.imports, fileImport{path, spec.Pos(), doc})
   471  		}
   472  	}
   473  
   474  	// If the file imports "embed",
   475  	// we have to look for //go:embed comments
   476  	// in the remainder of the file.
   477  	// The compiler will enforce the mapping of comments to
   478  	// declared variables. We just need to know the patterns.
   479  	// If there were //go:embed comments earlier in the file
   480  	// (near the package statement or imports), the compiler
   481  	// will reject them. They can be (and have already been) ignored.
   482  	if hasEmbed {
   483  		var line []byte
   484  		for first := true; r.findEmbed(first); first = false {
   485  			line = line[:0]
   486  			pos := r.pos
   487  			for {
   488  				c := r.readByteNoBuf()
   489  				if c == '\n' || r.err != nil || r.eof {
   490  					break
   491  				}
   492  				line = append(line, c)
   493  			}
   494  			// Add args if line is well-formed.
   495  			// Ignore badly-formed lines - the compiler will report them when it finds them,
   496  			// and we can pretend they are not there to help go list succeed with what it knows.
   497  			embs, err := parseGoEmbed(string(line), pos)
   498  			if err == nil {
   499  				info.embeds = append(info.embeds, embs...)
   500  			}
   501  		}
   502  	}
   503  
   504  	return nil
   505  }
   506  
   507  // parseGoEmbed parses the text following "//go:embed" to extract the glob patterns.
   508  // It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings.
   509  // This is based on a similar function in cmd/compile/internal/gc/noder.go;
   510  // this version calculates position information as well.
   511  func parseGoEmbed(args string, pos token.Position) ([]fileEmbed, error) {
   512  	trimBytes := func(n int) {
   513  		pos.Offset += n
   514  		pos.Column += utf8.RuneCountInString(args[:n])
   515  		args = args[n:]
   516  	}
   517  	trimSpace := func() {
   518  		trim := strings.TrimLeftFunc(args, unicode.IsSpace)
   519  		trimBytes(len(args) - len(trim))
   520  	}
   521  
   522  	var list []fileEmbed
   523  	for trimSpace(); args != ""; trimSpace() {
   524  		var path string
   525  		pathPos := pos
   526  	Switch:
   527  		switch args[0] {
   528  		default:
   529  			i := len(args)
   530  			for j, c := range args {
   531  				if unicode.IsSpace(c) {
   532  					i = j
   533  					break
   534  				}
   535  			}
   536  			path = args[:i]
   537  			trimBytes(i)
   538  
   539  		case '`':
   540  			var ok bool
   541  			path, _, ok = strings.Cut(args[1:], "`")
   542  			if !ok {
   543  				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
   544  			}
   545  			trimBytes(1 + len(path) + 1)
   546  
   547  		case '"':
   548  			i := 1
   549  			for ; i < len(args); i++ {
   550  				if args[i] == '\\' {
   551  					i++
   552  					continue
   553  				}
   554  				if args[i] == '"' {
   555  					q, err := strconv.Unquote(args[:i+1])
   556  					if err != nil {
   557  						return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args[:i+1])
   558  					}
   559  					path = q
   560  					trimBytes(i + 1)
   561  					break Switch
   562  				}
   563  			}
   564  			if i >= len(args) {
   565  				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
   566  			}
   567  		}
   568  
   569  		if args != "" {
   570  			r, _ := utf8.DecodeRuneInString(args)
   571  			if !unicode.IsSpace(r) {
   572  				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
   573  			}
   574  		}
   575  		list = append(list, fileEmbed{path, pathPos})
   576  	}
   577  	return list, nil
   578  }
   579
View as plain text