read.go

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package modfile
     6  
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"fmt"
    11  	"os"
    12  	"strconv"
    13  	"strings"
    14  	"unicode"
    15  	"unicode/utf8"
    16  )
    17  
    18  // A Position describes an arbitrary source position in a file, including the
    19  // file, line, column, and byte offset.
    20  type Position struct {
    21  	Line     int // line in input (starting at 1)
    22  	LineRune int // rune in line (starting at 1)
    23  	Byte     int // byte in input (starting at 0)
    24  }
    25  
    26  // add returns the position at the end of s, assuming it starts at p.
    27  func (p Position) add(s string) Position {
    28  	p.Byte += len(s)
    29  	if n := strings.Count(s, "\n"); n > 0 {
    30  		p.Line += n
    31  		s = s[strings.LastIndex(s, "\n")+1:]
    32  		p.LineRune = 1
    33  	}
    34  	p.LineRune += utf8.RuneCountInString(s)
    35  	return p
    36  }
    37  
    38  // An Expr represents an input element.
    39  type Expr interface {
    40  	// Span returns the start and end position of the expression,
    41  	// excluding leading or trailing comments.
    42  	Span() (start, end Position)
    43  
    44  	// Comment returns the comments attached to the expression.
    45  	// This method would normally be named 'Comments' but that
    46  	// would interfere with embedding a type of the same name.
    47  	Comment() *Comments
    48  }
    49  
    50  // A Comment represents a single // comment.
    51  type Comment struct {
    52  	Start  Position
    53  	Token  string // without trailing newline
    54  	Suffix bool   // an end of line (not whole line) comment
    55  }
    56  
    57  // Comments collects the comments associated with an expression.
    58  type Comments struct {
    59  	Before []Comment // whole-line comments before this expression
    60  	Suffix []Comment // end-of-line comments after this expression
    61  
    62  	// For top-level expressions only, After lists whole-line
    63  	// comments following the expression.
    64  	After []Comment
    65  }
    66  
    67  // Comment returns the receiver. This isn't useful by itself, but
    68  // a Comments struct is embedded into all the expression
    69  // implementation types, and this gives each of those a Comment
    70  // method to satisfy the Expr interface.
    71  func (c *Comments) Comment() *Comments {
    72  	return c
    73  }
    74  
    75  // A FileSyntax represents an entire go.mod file.
    76  type FileSyntax struct {
    77  	Name string // file path
    78  	Comments
    79  	Stmt []Expr
    80  }
    81  
    82  func (x *FileSyntax) Span() (start, end Position) {
    83  	if len(x.Stmt) == 0 {
    84  		return
    85  	}
    86  	start, _ = x.Stmt[0].Span()
    87  	_, end = x.Stmt[len(x.Stmt)-1].Span()
    88  	return start, end
    89  }
    90  
    91  // addLine adds a line containing the given tokens to the file.
    92  //
    93  // If the first token of the hint matches the first token of the
    94  // line, the new line is added at the end of the block containing hint,
    95  // extracting hint into a new block if it is not yet in one.
    96  //
    97  // If the hint is non-nil buts its first token does not match,
    98  // the new line is added after the block containing hint
    99  // (or hint itself, if not in a block).
   100  //
   101  // If no hint is provided, addLine appends the line to the end of
   102  // the last block with a matching first token,
   103  // or to the end of the file if no such block exists.
   104  func (x *FileSyntax) addLine(hint Expr, tokens ...string) *Line {
   105  	if hint == nil {
   106  		// If no hint given, add to the last statement of the given type.
   107  	Loop:
   108  		for i := len(x.Stmt) - 1; i >= 0; i-- {
   109  			stmt := x.Stmt[i]
   110  			switch stmt := stmt.(type) {
   111  			case *Line:
   112  				if stmt.Token != nil && stmt.Token[0] == tokens[0] {
   113  					hint = stmt
   114  					break Loop
   115  				}
   116  			case *LineBlock:
   117  				if stmt.Token[0] == tokens[0] {
   118  					hint = stmt
   119  					break Loop
   120  				}
   121  			}
   122  		}
   123  	}
   124  
   125  	newLineAfter := func(i int) *Line {
   126  		new := &Line{Token: tokens}
   127  		if i == len(x.Stmt) {
   128  			x.Stmt = append(x.Stmt, new)
   129  		} else {
   130  			x.Stmt = append(x.Stmt, nil)
   131  			copy(x.Stmt[i+2:], x.Stmt[i+1:])
   132  			x.Stmt[i+1] = new
   133  		}
   134  		return new
   135  	}
   136  
   137  	if hint != nil {
   138  		for i, stmt := range x.Stmt {
   139  			switch stmt := stmt.(type) {
   140  			case *Line:
   141  				if stmt == hint {
   142  					if stmt.Token == nil || stmt.Token[0] != tokens[0] {
   143  						return newLineAfter(i)
   144  					}
   145  
   146  					// Convert line to line block.
   147  					stmt.InBlock = true
   148  					block := &LineBlock{Token: stmt.Token[:1], Line: []*Line{stmt}}
   149  					stmt.Token = stmt.Token[1:]
   150  					x.Stmt[i] = block
   151  					new := &Line{Token: tokens[1:], InBlock: true}
   152  					block.Line = append(block.Line, new)
   153  					return new
   154  				}
   155  
   156  			case *LineBlock:
   157  				if stmt == hint {
   158  					if stmt.Token[0] != tokens[0] {
   159  						return newLineAfter(i)
   160  					}
   161  
   162  					new := &Line{Token: tokens[1:], InBlock: true}
   163  					stmt.Line = append(stmt.Line, new)
   164  					return new
   165  				}
   166  
   167  				for j, line := range stmt.Line {
   168  					if line == hint {
   169  						if stmt.Token[0] != tokens[0] {
   170  							return newLineAfter(i)
   171  						}
   172  
   173  						// Add new line after hint within the block.
   174  						stmt.Line = append(stmt.Line, nil)
   175  						copy(stmt.Line[j+2:], stmt.Line[j+1:])
   176  						new := &Line{Token: tokens[1:], InBlock: true}
   177  						stmt.Line[j+1] = new
   178  						return new
   179  					}
   180  				}
   181  			}
   182  		}
   183  	}
   184  
   185  	new := &Line{Token: tokens}
   186  	x.Stmt = append(x.Stmt, new)
   187  	return new
   188  }
   189  
   190  func (x *FileSyntax) updateLine(line *Line, tokens ...string) {
   191  	if line.InBlock {
   192  		tokens = tokens[1:]
   193  	}
   194  	line.Token = tokens
   195  }
   196  
   197  // markRemoved modifies line so that it (and its end-of-line comment, if any)
   198  // will be dropped by (*FileSyntax).Cleanup.
   199  func (line *Line) markRemoved() {
   200  	line.Token = nil
   201  	line.Comments.Suffix = nil
   202  }
   203  
   204  // Cleanup cleans up the file syntax x after any edit operations.
   205  // To avoid quadratic behavior, (*Line).markRemoved marks the line as dead
   206  // by setting line.Token = nil but does not remove it from the slice
   207  // in which it appears. After edits have all been indicated,
   208  // calling Cleanup cleans out the dead lines.
   209  func (x *FileSyntax) Cleanup() {
   210  	w := 0
   211  	for _, stmt := range x.Stmt {
   212  		switch stmt := stmt.(type) {
   213  		case *Line:
   214  			if stmt.Token == nil {
   215  				continue
   216  			}
   217  		case *LineBlock:
   218  			ww := 0
   219  			for _, line := range stmt.Line {
   220  				if line.Token != nil {
   221  					stmt.Line[ww] = line
   222  					ww++
   223  				}
   224  			}
   225  			if ww == 0 {
   226  				continue
   227  			}
   228  			if ww == 1 {
   229  				// Collapse block into single line.
   230  				line := &Line{
   231  					Comments: Comments{
   232  						Before: commentsAdd(stmt.Before, stmt.Line[0].Before),
   233  						Suffix: commentsAdd(stmt.Line[0].Suffix, stmt.Suffix),
   234  						After:  commentsAdd(stmt.Line[0].After, stmt.After),
   235  					},
   236  					Token: stringsAdd(stmt.Token, stmt.Line[0].Token),
   237  				}
   238  				x.Stmt[w] = line
   239  				w++
   240  				continue
   241  			}
   242  			stmt.Line = stmt.Line[:ww]
   243  		}
   244  		x.Stmt[w] = stmt
   245  		w++
   246  	}
   247  	x.Stmt = x.Stmt[:w]
   248  }
   249  
   250  func commentsAdd(x, y []Comment) []Comment {
   251  	return append(x[:len(x):len(x)], y...)
   252  }
   253  
   254  func stringsAdd(x, y []string) []string {
   255  	return append(x[:len(x):len(x)], y...)
   256  }
   257  
   258  // A CommentBlock represents a top-level block of comments separate
   259  // from any rule.
   260  type CommentBlock struct {
   261  	Comments
   262  	Start Position
   263  }
   264  
   265  func (x *CommentBlock) Span() (start, end Position) {
   266  	return x.Start, x.Start
   267  }
   268  
   269  // A Line is a single line of tokens.
   270  type Line struct {
   271  	Comments
   272  	Start   Position
   273  	Token   []string
   274  	InBlock bool
   275  	End     Position
   276  }
   277  
   278  func (x *Line) Span() (start, end Position) {
   279  	return x.Start, x.End
   280  }
   281  
   282  // A LineBlock is a factored block of lines, like
   283  //
   284  //	require (
   285  //		"x"
   286  //		"y"
   287  //	)
   288  //
   289  type LineBlock struct {
   290  	Comments
   291  	Start  Position
   292  	LParen LParen
   293  	Token  []string
   294  	Line   []*Line
   295  	RParen RParen
   296  }
   297  
   298  func (x *LineBlock) Span() (start, end Position) {
   299  	return x.Start, x.RParen.Pos.add(")")
   300  }
   301  
   302  // An LParen represents the beginning of a parenthesized line block.
   303  // It is a place to store suffix comments.
   304  type LParen struct {
   305  	Comments
   306  	Pos Position
   307  }
   308  
   309  func (x *LParen) Span() (start, end Position) {
   310  	return x.Pos, x.Pos.add(")")
   311  }
   312  
   313  // An RParen represents the end of a parenthesized line block.
   314  // It is a place to store whole-line (before) comments.
   315  type RParen struct {
   316  	Comments
   317  	Pos Position
   318  }
   319  
   320  func (x *RParen) Span() (start, end Position) {
   321  	return x.Pos, x.Pos.add(")")
   322  }
   323  
   324  // An input represents a single input file being parsed.
   325  type input struct {
   326  	// Lexing state.
   327  	filename   string    // name of input file, for errors
   328  	complete   []byte    // entire input
   329  	remaining  []byte    // remaining input
   330  	tokenStart []byte    // token being scanned to end of input
   331  	token      token     // next token to be returned by lex, peek
   332  	pos        Position  // current input position
   333  	comments   []Comment // accumulated comments
   334  
   335  	// Parser state.
   336  	file        *FileSyntax // returned top-level syntax tree
   337  	parseErrors ErrorList   // errors encountered during parsing
   338  
   339  	// Comment assignment state.
   340  	pre  []Expr // all expressions, in preorder traversal
   341  	post []Expr // all expressions, in postorder traversal
   342  }
   343  
   344  func newInput(filename string, data []byte) *input {
   345  	return &input{
   346  		filename:  filename,
   347  		complete:  data,
   348  		remaining: data,
   349  		pos:       Position{Line: 1, LineRune: 1, Byte: 0},
   350  	}
   351  }
   352  
   353  // parse parses the input file.
   354  func parse(file string, data []byte) (f *FileSyntax, err error) {
   355  	// The parser panics for both routine errors like syntax errors
   356  	// and for programmer bugs like array index errors.
   357  	// Turn both into error returns. Catching bug panics is
   358  	// especially important when processing many files.
   359  	in := newInput(file, data)
   360  	defer func() {
   361  		if e := recover(); e != nil && e != &in.parseErrors {
   362  			in.parseErrors = append(in.parseErrors, Error{
   363  				Filename: in.filename,
   364  				Pos:      in.pos,
   365  				Err:      fmt.Errorf("internal error: %v", e),
   366  			})
   367  		}
   368  		if err == nil && len(in.parseErrors) > 0 {
   369  			err = in.parseErrors
   370  		}
   371  	}()
   372  
   373  	// Prime the lexer by reading in the first token. It will be available
   374  	// in the next peek() or lex() call.
   375  	in.readToken()
   376  
   377  	// Invoke the parser.
   378  	in.parseFile()
   379  	if len(in.parseErrors) > 0 {
   380  		return nil, in.parseErrors
   381  	}
   382  	in.file.Name = in.filename
   383  
   384  	// Assign comments to nearby syntax.
   385  	in.assignComments()
   386  
   387  	return in.file, nil
   388  }
   389  
   390  // Error is called to report an error.
   391  // Error does not return: it panics.
   392  func (in *input) Error(s string) {
   393  	in.parseErrors = append(in.parseErrors, Error{
   394  		Filename: in.filename,
   395  		Pos:      in.pos,
   396  		Err:      errors.New(s),
   397  	})
   398  	panic(&in.parseErrors)
   399  }
   400  
   401  // eof reports whether the input has reached end of file.
   402  func (in *input) eof() bool {
   403  	return len(in.remaining) == 0
   404  }
   405  
   406  // peekRune returns the next rune in the input without consuming it.
   407  func (in *input) peekRune() int {
   408  	if len(in.remaining) == 0 {
   409  		return 0
   410  	}
   411  	r, _ := utf8.DecodeRune(in.remaining)
   412  	return int(r)
   413  }
   414  
   415  // peekPrefix reports whether the remaining input begins with the given prefix.
   416  func (in *input) peekPrefix(prefix string) bool {
   417  	// This is like bytes.HasPrefix(in.remaining, []byte(prefix))
   418  	// but without the allocation of the []byte copy of prefix.
   419  	for i := 0; i < len(prefix); i++ {
   420  		if i >= len(in.remaining) || in.remaining[i] != prefix[i] {
   421  			return false
   422  		}
   423  	}
   424  	return true
   425  }
   426  
   427  // readRune consumes and returns the next rune in the input.
   428  func (in *input) readRune() int {
   429  	if len(in.remaining) == 0 {
   430  		in.Error("internal lexer error: readRune at EOF")
   431  	}
   432  	r, size := utf8.DecodeRune(in.remaining)
   433  	in.remaining = in.remaining[size:]
   434  	if r == '\n' {
   435  		in.pos.Line++
   436  		in.pos.LineRune = 1
   437  	} else {
   438  		in.pos.LineRune++
   439  	}
   440  	in.pos.Byte += size
   441  	return int(r)
   442  }
   443  
   444  type token struct {
   445  	kind   tokenKind
   446  	pos    Position
   447  	endPos Position
   448  	text   string
   449  }
   450  
   451  type tokenKind int
   452  
   453  const (
   454  	_EOF tokenKind = -(iota + 1)
   455  	_EOLCOMMENT
   456  	_IDENT
   457  	_STRING
   458  	_COMMENT
   459  
   460  	// newlines and punctuation tokens are allowed as ASCII codes.
   461  )
   462  
   463  func (k tokenKind) isComment() bool {
   464  	return k == _COMMENT || k == _EOLCOMMENT
   465  }
   466  
   467  // isEOL returns whether a token terminates a line.
   468  func (k tokenKind) isEOL() bool {
   469  	return k == _EOF || k == _EOLCOMMENT || k == '\n'
   470  }
   471  
   472  // startToken marks the beginning of the next input token.
   473  // It must be followed by a call to endToken, once the token's text has
   474  // been consumed using readRune.
   475  func (in *input) startToken() {
   476  	in.tokenStart = in.remaining
   477  	in.token.text = ""
   478  	in.token.pos = in.pos
   479  }
   480  
   481  // endToken marks the end of an input token.
   482  // It records the actual token string in tok.text.
   483  // A single trailing newline (LF or CRLF) will be removed from comment tokens.
   484  func (in *input) endToken(kind tokenKind) {
   485  	in.token.kind = kind
   486  	text := string(in.tokenStart[:len(in.tokenStart)-len(in.remaining)])
   487  	if kind.isComment() {
   488  		if strings.HasSuffix(text, "\r\n") {
   489  			text = text[:len(text)-2]
   490  		} else {
   491  			text = strings.TrimSuffix(text, "\n")
   492  		}
   493  	}
   494  	in.token.text = text
   495  	in.token.endPos = in.pos
   496  }
   497  
   498  // peek returns the kind of the the next token returned by lex.
   499  func (in *input) peek() tokenKind {
   500  	return in.token.kind
   501  }
   502  
   503  // lex is called from the parser to obtain the next input token.
   504  func (in *input) lex() token {
   505  	tok := in.token
   506  	in.readToken()
   507  	return tok
   508  }
   509  
   510  // readToken lexes the next token from the text and stores it in in.token.
   511  func (in *input) readToken() {
   512  	// Skip past spaces, stopping at non-space or EOF.
   513  	for !in.eof() {
   514  		c := in.peekRune()
   515  		if c == ' ' || c == '\t' || c == '\r' {
   516  			in.readRune()
   517  			continue
   518  		}
   519  
   520  		// Comment runs to end of line.
   521  		if in.peekPrefix("//") {
   522  			in.startToken()
   523  
   524  			// Is this comment the only thing on its line?
   525  			// Find the last \n before this // and see if it's all
   526  			// spaces from there to here.
   527  			i := bytes.LastIndex(in.complete[:in.pos.Byte], []byte("\n"))
   528  			suffix := len(bytes.TrimSpace(in.complete[i+1:in.pos.Byte])) > 0
   529  			in.readRune()
   530  			in.readRune()
   531  
   532  			// Consume comment.
   533  			for len(in.remaining) > 0 && in.readRune() != '\n' {
   534  			}
   535  
   536  			// If we are at top level (not in a statement), hand the comment to
   537  			// the parser as a _COMMENT token. The grammar is written
   538  			// to handle top-level comments itself.
   539  			if !suffix {
   540  				in.endToken(_COMMENT)
   541  				return
   542  			}
   543  
   544  			// Otherwise, save comment for later attachment to syntax tree.
   545  			in.endToken(_EOLCOMMENT)
   546  			in.comments = append(in.comments, Comment{in.token.pos, in.token.text, suffix})
   547  			return
   548  		}
   549  
   550  		if in.peekPrefix("/*") {
   551  			in.Error("mod files must use // comments (not /* */ comments)")
   552  		}
   553  
   554  		// Found non-space non-comment.
   555  		break
   556  	}
   557  
   558  	// Found the beginning of the next token.
   559  	in.startToken()
   560  
   561  	// End of file.
   562  	if in.eof() {
   563  		in.endToken(_EOF)
   564  		return
   565  	}
   566  
   567  	// Punctuation tokens.
   568  	switch c := in.peekRune(); c {
   569  	case '\n', '(', ')', '[', ']', '{', '}', ',':
   570  		in.readRune()
   571  		in.endToken(tokenKind(c))
   572  		return
   573  
   574  	case '"', '`': // quoted string
   575  		quote := c
   576  		in.readRune()
   577  		for {
   578  			if in.eof() {
   579  				in.pos = in.token.pos
   580  				in.Error("unexpected EOF in string")
   581  			}
   582  			if in.peekRune() == '\n' {
   583  				in.Error("unexpected newline in string")
   584  			}
   585  			c := in.readRune()
   586  			if c == quote {
   587  				break
   588  			}
   589  			if c == '\\' && quote != '`' {
   590  				if in.eof() {
   591  					in.pos = in.token.pos
   592  					in.Error("unexpected EOF in string")
   593  				}
   594  				in.readRune()
   595  			}
   596  		}
   597  		in.endToken(_STRING)
   598  		return
   599  	}
   600  
   601  	// Checked all punctuation. Must be identifier token.
   602  	if c := in.peekRune(); !isIdent(c) {
   603  		in.Error(fmt.Sprintf("unexpected input character %#q", c))
   604  	}
   605  
   606  	// Scan over identifier.
   607  	for isIdent(in.peekRune()) {
   608  		if in.peekPrefix("//") {
   609  			break
   610  		}
   611  		if in.peekPrefix("/*") {
   612  			in.Error("mod files must use // comments (not /* */ comments)")
   613  		}
   614  		in.readRune()
   615  	}
   616  	in.endToken(_IDENT)
   617  }
   618  
   619  // isIdent reports whether c is an identifier rune.
   620  // We treat most printable runes as identifier runes, except for a handful of
   621  // ASCII punctuation characters.
   622  func isIdent(c int) bool {
   623  	switch r := rune(c); r {
   624  	case ' ', '(', ')', '[', ']', '{', '}', ',':
   625  		return false
   626  	default:
   627  		return !unicode.IsSpace(r) && unicode.IsPrint(r)
   628  	}
   629  }
   630  
   631  // Comment assignment.
   632  // We build two lists of all subexpressions, preorder and postorder.
   633  // The preorder list is ordered by start location, with outer expressions first.
   634  // The postorder list is ordered by end location, with outer expressions last.
   635  // We use the preorder list to assign each whole-line comment to the syntax
   636  // immediately following it, and we use the postorder list to assign each
   637  // end-of-line comment to the syntax immediately preceding it.
   638  
   639  // order walks the expression adding it and its subexpressions to the
   640  // preorder and postorder lists.
   641  func (in *input) order(x Expr) {
   642  	if x != nil {
   643  		in.pre = append(in.pre, x)
   644  	}
   645  	switch x := x.(type) {
   646  	default:
   647  		panic(fmt.Errorf("order: unexpected type %T", x))
   648  	case nil:
   649  		// nothing
   650  	case *LParen, *RParen:
   651  		// nothing
   652  	case *CommentBlock:
   653  		// nothing
   654  	case *Line:
   655  		// nothing
   656  	case *FileSyntax:
   657  		for _, stmt := range x.Stmt {
   658  			in.order(stmt)
   659  		}
   660  	case *LineBlock:
   661  		in.order(&x.LParen)
   662  		for _, l := range x.Line {
   663  			in.order(l)
   664  		}
   665  		in.order(&x.RParen)
   666  	}
   667  	if x != nil {
   668  		in.post = append(in.post, x)
   669  	}
   670  }
   671  
   672  // assignComments attaches comments to nearby syntax.
   673  func (in *input) assignComments() {
   674  	const debug = false
   675  
   676  	// Generate preorder and postorder lists.
   677  	in.order(in.file)
   678  
   679  	// Split into whole-line comments and suffix comments.
   680  	var line, suffix []Comment
   681  	for _, com := range in.comments {
   682  		if com.Suffix {
   683  			suffix = append(suffix, com)
   684  		} else {
   685  			line = append(line, com)
   686  		}
   687  	}
   688  
   689  	if debug {
   690  		for _, c := range line {
   691  			fmt.Fprintf(os.Stderr, "LINE %q :%d:%d #%d\n", c.Token, c.Start.Line, c.Start.LineRune, c.Start.Byte)
   692  		}
   693  	}
   694  
   695  	// Assign line comments to syntax immediately following.
   696  	for _, x := range in.pre {
   697  		start, _ := x.Span()
   698  		if debug {
   699  			fmt.Fprintf(os.Stderr, "pre %T :%d:%d #%d\n", x, start.Line, start.LineRune, start.Byte)
   700  		}
   701  		xcom := x.Comment()
   702  		for len(line) > 0 && start.Byte >= line[0].Start.Byte {
   703  			if debug {
   704  				fmt.Fprintf(os.Stderr, "ASSIGN LINE %q #%d\n", line[0].Token, line[0].Start.Byte)
   705  			}
   706  			xcom.Before = append(xcom.Before, line[0])
   707  			line = line[1:]
   708  		}
   709  	}
   710  
   711  	// Remaining line comments go at end of file.
   712  	in.file.After = append(in.file.After, line...)
   713  
   714  	if debug {
   715  		for _, c := range suffix {
   716  			fmt.Fprintf(os.Stderr, "SUFFIX %q :%d:%d #%d\n", c.Token, c.Start.Line, c.Start.LineRune, c.Start.Byte)
   717  		}
   718  	}
   719  
   720  	// Assign suffix comments to syntax immediately before.
   721  	for i := len(in.post) - 1; i >= 0; i-- {
   722  		x := in.post[i]
   723  
   724  		start, end := x.Span()
   725  		if debug {
   726  			fmt.Fprintf(os.Stderr, "post %T :%d:%d #%d :%d:%d #%d\n", x, start.Line, start.LineRune, start.Byte, end.Line, end.LineRune, end.Byte)
   727  		}
   728  
   729  		// Do not assign suffix comments to end of line block or whole file.
   730  		// Instead assign them to the last element inside.
   731  		switch x.(type) {
   732  		case *FileSyntax:
   733  			continue
   734  		}
   735  
   736  		// Do not assign suffix comments to something that starts
   737  		// on an earlier line, so that in
   738  		//
   739  		//	x ( y
   740  		//		z ) // comment
   741  		//
   742  		// we assign the comment to z and not to x ( ... ).
   743  		if start.Line != end.Line {
   744  			continue
   745  		}
   746  		xcom := x.Comment()
   747  		for len(suffix) > 0 && end.Byte <= suffix[len(suffix)-1].Start.Byte {
   748  			if debug {
   749  				fmt.Fprintf(os.Stderr, "ASSIGN SUFFIX %q #%d\n", suffix[len(suffix)-1].Token, suffix[len(suffix)-1].Start.Byte)
   750  			}
   751  			xcom.Suffix = append(xcom.Suffix, suffix[len(suffix)-1])
   752  			suffix = suffix[:len(suffix)-1]
   753  		}
   754  	}
   755  
   756  	// We assigned suffix comments in reverse.
   757  	// If multiple suffix comments were appended to the same
   758  	// expression node, they are now in reverse. Fix that.
   759  	for _, x := range in.post {
   760  		reverseComments(x.Comment().Suffix)
   761  	}
   762  
   763  	// Remaining suffix comments go at beginning of file.
   764  	in.file.Before = append(in.file.Before, suffix...)
   765  }
   766  
   767  // reverseComments reverses the []Comment list.
   768  func reverseComments(list []Comment) {
   769  	for i, j := 0, len(list)-1; i < j; i, j = i+1, j-1 {
   770  		list[i], list[j] = list[j], list[i]
   771  	}
   772  }
   773  
   774  func (in *input) parseFile() {
   775  	in.file = new(FileSyntax)
   776  	var cb *CommentBlock
   777  	for {
   778  		switch in.peek() {
   779  		case '\n':
   780  			in.lex()
   781  			if cb != nil {
   782  				in.file.Stmt = append(in.file.Stmt, cb)
   783  				cb = nil
   784  			}
   785  		case _COMMENT:
   786  			tok := in.lex()
   787  			if cb == nil {
   788  				cb = &CommentBlock{Start: tok.pos}
   789  			}
   790  			com := cb.Comment()
   791  			com.Before = append(com.Before, Comment{Start: tok.pos, Token: tok.text})
   792  		case _EOF:
   793  			if cb != nil {
   794  				in.file.Stmt = append(in.file.Stmt, cb)
   795  			}
   796  			return
   797  		default:
   798  			in.parseStmt()
   799  			if cb != nil {
   800  				in.file.Stmt[len(in.file.Stmt)-1].Comment().Before = cb.Before
   801  				cb = nil
   802  			}
   803  		}
   804  	}
   805  }
   806  
   807  func (in *input) parseStmt() {
   808  	tok := in.lex()
   809  	start := tok.pos
   810  	end := tok.endPos
   811  	tokens := []string{tok.text}
   812  	for {
   813  		tok := in.lex()
   814  		switch {
   815  		case tok.kind.isEOL():
   816  			in.file.Stmt = append(in.file.Stmt, &Line{
   817  				Start: start,
   818  				Token: tokens,
   819  				End:   end,
   820  			})
   821  			return
   822  
   823  		case tok.kind == '(':
   824  			if next := in.peek(); next.isEOL() {
   825  				// Start of block: no more tokens on this line.
   826  				in.file.Stmt = append(in.file.Stmt, in.parseLineBlock(start, tokens, tok))
   827  				return
   828  			} else if next == ')' {
   829  				rparen := in.lex()
   830  				if in.peek().isEOL() {
   831  					// Empty block.
   832  					in.lex()
   833  					in.file.Stmt = append(in.file.Stmt, &LineBlock{
   834  						Start:  start,
   835  						Token:  tokens,
   836  						LParen: LParen{Pos: tok.pos},
   837  						RParen: RParen{Pos: rparen.pos},
   838  					})
   839  					return
   840  				}
   841  				// '( )' in the middle of the line, not a block.
   842  				tokens = append(tokens, tok.text, rparen.text)
   843  			} else {
   844  				// '(' in the middle of the line, not a block.
   845  				tokens = append(tokens, tok.text)
   846  			}
   847  
   848  		default:
   849  			tokens = append(tokens, tok.text)
   850  			end = tok.endPos
   851  		}
   852  	}
   853  }
   854  
   855  func (in *input) parseLineBlock(start Position, token []string, lparen token) *LineBlock {
   856  	x := &LineBlock{
   857  		Start:  start,
   858  		Token:  token,
   859  		LParen: LParen{Pos: lparen.pos},
   860  	}
   861  	var comments []Comment
   862  	for {
   863  		switch in.peek() {
   864  		case _EOLCOMMENT:
   865  			// Suffix comment, will be attached later by assignComments.
   866  			in.lex()
   867  		case '\n':
   868  			// Blank line. Add an empty comment to preserve it.
   869  			in.lex()
   870  			if len(comments) == 0 && len(x.Line) > 0 || len(comments) > 0 && comments[len(comments)-1].Token != "" {
   871  				comments = append(comments, Comment{})
   872  			}
   873  		case _COMMENT:
   874  			tok := in.lex()
   875  			comments = append(comments, Comment{Start: tok.pos, Token: tok.text})
   876  		case _EOF:
   877  			in.Error(fmt.Sprintf("syntax error (unterminated block started at %s:%d:%d)", in.filename, x.Start.Line, x.Start.LineRune))
   878  		case ')':
   879  			rparen := in.lex()
   880  			x.RParen.Before = comments
   881  			x.RParen.Pos = rparen.pos
   882  			if !in.peek().isEOL() {
   883  				in.Error("syntax error (expected newline after closing paren)")
   884  			}
   885  			in.lex()
   886  			return x
   887  		default:
   888  			l := in.parseLine()
   889  			x.Line = append(x.Line, l)
   890  			l.Comment().Before = comments
   891  			comments = nil
   892  		}
   893  	}
   894  }
   895  
   896  func (in *input) parseLine() *Line {
   897  	tok := in.lex()
   898  	if tok.kind.isEOL() {
   899  		in.Error("internal parse error: parseLine at end of line")
   900  	}
   901  	start := tok.pos
   902  	end := tok.endPos
   903  	tokens := []string{tok.text}
   904  	for {
   905  		tok := in.lex()
   906  		if tok.kind.isEOL() {
   907  			return &Line{
   908  				Start:   start,
   909  				Token:   tokens,
   910  				End:     end,
   911  				InBlock: true,
   912  			}
   913  		}
   914  		tokens = append(tokens, tok.text)
   915  		end = tok.endPos
   916  	}
   917  }
   918  
   919  var (
   920  	slashSlash = []byte("//")
   921  	moduleStr  = []byte("module")
   922  )
   923  
   924  // ModulePath returns the module path from the gomod file text.
   925  // If it cannot find a module path, it returns an empty string.
   926  // It is tolerant of unrelated problems in the go.mod file.
   927  func ModulePath(mod []byte) string {
   928  	for len(mod) > 0 {
   929  		line := mod
   930  		mod = nil
   931  		if i := bytes.IndexByte(line, '\n'); i >= 0 {
   932  			line, mod = line[:i], line[i+1:]
   933  		}
   934  		if i := bytes.Index(line, slashSlash); i >= 0 {
   935  			line = line[:i]
   936  		}
   937  		line = bytes.TrimSpace(line)
   938  		if !bytes.HasPrefix(line, moduleStr) {
   939  			continue
   940  		}
   941  		line = line[len(moduleStr):]
   942  		n := len(line)
   943  		line = bytes.TrimSpace(line)
   944  		if len(line) == n || len(line) == 0 {
   945  			continue
   946  		}
   947  
   948  		if line[0] == '"' || line[0] == '`' {
   949  			p, err := strconv.Unquote(string(line))
   950  			if err != nil {
   951  				return "" // malformed quoted string or multiline module path
   952  			}
   953  			return p
   954  		}
   955  
   956  		return string(line)
   957  	}
   958  	return "" // missing module path
   959  }
   960
View as plain text