1
2
3
4
5 package parse
6
7 import (
8 "fmt"
9 "strings"
10 "unicode"
11 "unicode/utf8"
12 )
13
14
15 type item struct {
16 typ itemType
17 pos Pos
18 val string
19 line int
20 }
21
22 func (i item) String() string {
23 switch {
24 case i.typ == itemEOF:
25 return "EOF"
26 case i.typ == itemError:
27 return i.val
28 case i.typ > itemKeyword:
29 return fmt.Sprintf("<%s>", i.val)
30 case len(i.val) > 10:
31 return fmt.Sprintf("%.10q...", i.val)
32 }
33 return fmt.Sprintf("%q", i.val)
34 }
35
36
37 type itemType int
38
39 const (
40 itemError itemType = iota
41 itemBool
42 itemChar
43 itemCharConstant
44 itemComment
45 itemComplex
46 itemAssign
47 itemDeclare
48 itemEOF
49 itemField
50 itemIdentifier
51 itemLeftDelim
52 itemLeftParen
53 itemNumber
54 itemPipe
55 itemRawString
56 itemRightDelim
57 itemRightParen
58 itemSpace
59 itemString
60 itemText
61 itemVariable
62
63 itemKeyword
64 itemBlock
65 itemBreak
66 itemContinue
67 itemDot
68 itemDefine
69 itemElse
70 itemEnd
71 itemIf
72 itemNil
73 itemRange
74 itemTemplate
75 itemWith
76 )
77
78 var key = map[string]itemType{
79 ".": itemDot,
80 "block": itemBlock,
81 "break": itemBreak,
82 "continue": itemContinue,
83 "define": itemDefine,
84 "else": itemElse,
85 "end": itemEnd,
86 "if": itemIf,
87 "range": itemRange,
88 "nil": itemNil,
89 "template": itemTemplate,
90 "with": itemWith,
91 }
92
93 const eof = -1
94
95
96
97
98
99
100
101
102
103 const (
104 spaceChars = " \t\r\n"
105 trimMarker = '-'
106 trimMarkerLen = Pos(1 + 1)
107 )
108
109
110 type stateFn func(*lexer) stateFn
111
112
113 type lexer struct {
114 name string
115 input string
116 leftDelim string
117 rightDelim string
118 emitComment bool
119 pos Pos
120 start Pos
121 width Pos
122 items chan item
123 parenDepth int
124 line int
125 startLine int
126 breakOK bool
127 continueOK bool
128 }
129
130
131 func (l *lexer) next() rune {
132 if int(l.pos) >= len(l.input) {
133 l.width = 0
134 return eof
135 }
136 r, w := utf8.DecodeRuneInString(l.input[l.pos:])
137 l.width = Pos(w)
138 l.pos += l.width
139 if r == '\n' {
140 l.line++
141 }
142 return r
143 }
144
145
146 func (l *lexer) peek() rune {
147 r := l.next()
148 l.backup()
149 return r
150 }
151
152
153 func (l *lexer) backup() {
154 l.pos -= l.width
155
156 if l.width == 1 && l.input[l.pos] == '\n' {
157 l.line--
158 }
159 }
160
161
162 func (l *lexer) emit(t itemType) {
163 l.items <- item{t, l.start, l.input[l.start:l.pos], l.startLine}
164 l.start = l.pos
165 l.startLine = l.line
166 }
167
168
169 func (l *lexer) ignore() {
170 l.line += strings.Count(l.input[l.start:l.pos], "\n")
171 l.start = l.pos
172 l.startLine = l.line
173 }
174
175
176 func (l *lexer) accept(valid string) bool {
177 if strings.ContainsRune(valid, l.next()) {
178 return true
179 }
180 l.backup()
181 return false
182 }
183
184
185 func (l *lexer) acceptRun(valid string) {
186 for strings.ContainsRune(valid, l.next()) {
187 }
188 l.backup()
189 }
190
191
192
193 func (l *lexer) errorf(format string, args ...any) stateFn {
194 l.items <- item{itemError, l.start, fmt.Sprintf(format, args...), l.startLine}
195 return nil
196 }
197
198
199
200 func (l *lexer) nextItem() item {
201 return <-l.items
202 }
203
204
205
206 func (l *lexer) drain() {
207 for range l.items {
208 }
209 }
210
211
212 func lex(name, input, left, right string, emitComment bool) *lexer {
213 if left == "" {
214 left = leftDelim
215 }
216 if right == "" {
217 right = rightDelim
218 }
219 l := &lexer{
220 name: name,
221 input: input,
222 leftDelim: left,
223 rightDelim: right,
224 emitComment: emitComment,
225 items: make(chan item),
226 line: 1,
227 startLine: 1,
228 }
229 go l.run()
230 return l
231 }
232
233
234 func (l *lexer) run() {
235 for state := lexText; state != nil; {
236 state = state(l)
237 }
238 close(l.items)
239 }
240
241
242
243 const (
244 leftDelim = "{{"
245 rightDelim = "}}"
246 leftComment = "/*"
247 rightComment = "*/"
248 )
249
250
251 func lexText(l *lexer) stateFn {
252 l.width = 0
253 if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 {
254 ldn := Pos(len(l.leftDelim))
255 l.pos += Pos(x)
256 trimLength := Pos(0)
257 if hasLeftTrimMarker(l.input[l.pos+ldn:]) {
258 trimLength = rightTrimLength(l.input[l.start:l.pos])
259 }
260 l.pos -= trimLength
261 if l.pos > l.start {
262 l.line += strings.Count(l.input[l.start:l.pos], "\n")
263 l.emit(itemText)
264 }
265 l.pos += trimLength
266 l.ignore()
267 return lexLeftDelim
268 }
269 l.pos = Pos(len(l.input))
270
271 if l.pos > l.start {
272 l.line += strings.Count(l.input[l.start:l.pos], "\n")
273 l.emit(itemText)
274 }
275 l.emit(itemEOF)
276 return nil
277 }
278
279
280 func rightTrimLength(s string) Pos {
281 return Pos(len(s) - len(strings.TrimRight(s, spaceChars)))
282 }
283
284
285 func (l *lexer) atRightDelim() (delim, trimSpaces bool) {
286 if hasRightTrimMarker(l.input[l.pos:]) && strings.HasPrefix(l.input[l.pos+trimMarkerLen:], l.rightDelim) {
287 return true, true
288 }
289 if strings.HasPrefix(l.input[l.pos:], l.rightDelim) {
290 return true, false
291 }
292 return false, false
293 }
294
295
296 func leftTrimLength(s string) Pos {
297 return Pos(len(s) - len(strings.TrimLeft(s, spaceChars)))
298 }
299
300
301 func lexLeftDelim(l *lexer) stateFn {
302 l.pos += Pos(len(l.leftDelim))
303 trimSpace := hasLeftTrimMarker(l.input[l.pos:])
304 afterMarker := Pos(0)
305 if trimSpace {
306 afterMarker = trimMarkerLen
307 }
308 if strings.HasPrefix(l.input[l.pos+afterMarker:], leftComment) {
309 l.pos += afterMarker
310 l.ignore()
311 return lexComment
312 }
313 l.emit(itemLeftDelim)
314 l.pos += afterMarker
315 l.ignore()
316 l.parenDepth = 0
317 return lexInsideAction
318 }
319
320
321 func lexComment(l *lexer) stateFn {
322 l.pos += Pos(len(leftComment))
323 i := strings.Index(l.input[l.pos:], rightComment)
324 if i < 0 {
325 return l.errorf("unclosed comment")
326 }
327 l.pos += Pos(i + len(rightComment))
328 delim, trimSpace := l.atRightDelim()
329 if !delim {
330 return l.errorf("comment ends before closing delimiter")
331 }
332 if l.emitComment {
333 l.emit(itemComment)
334 }
335 if trimSpace {
336 l.pos += trimMarkerLen
337 }
338 l.pos += Pos(len(l.rightDelim))
339 if trimSpace {
340 l.pos += leftTrimLength(l.input[l.pos:])
341 }
342 l.ignore()
343 return lexText
344 }
345
346
347 func lexRightDelim(l *lexer) stateFn {
348 trimSpace := hasRightTrimMarker(l.input[l.pos:])
349 if trimSpace {
350 l.pos += trimMarkerLen
351 l.ignore()
352 }
353 l.pos += Pos(len(l.rightDelim))
354 l.emit(itemRightDelim)
355 if trimSpace {
356 l.pos += leftTrimLength(l.input[l.pos:])
357 l.ignore()
358 }
359 return lexText
360 }
361
362
363 func lexInsideAction(l *lexer) stateFn {
364
365
366
367 delim, _ := l.atRightDelim()
368 if delim {
369 if l.parenDepth == 0 {
370 return lexRightDelim
371 }
372 return l.errorf("unclosed left paren")
373 }
374 switch r := l.next(); {
375 case r == eof:
376 return l.errorf("unclosed action")
377 case isSpace(r):
378 l.backup()
379 return lexSpace
380 case r == '=':
381 l.emit(itemAssign)
382 case r == ':':
383 if l.next() != '=' {
384 return l.errorf("expected :=")
385 }
386 l.emit(itemDeclare)
387 case r == '|':
388 l.emit(itemPipe)
389 case r == '"':
390 return lexQuote
391 case r == '`':
392 return lexRawQuote
393 case r == '$':
394 return lexVariable
395 case r == '\'':
396 return lexChar
397 case r == '.':
398
399 if l.pos < Pos(len(l.input)) {
400 r := l.input[l.pos]
401 if r < '0' || '9' < r {
402 return lexField
403 }
404 }
405 fallthrough
406 case r == '+' || r == '-' || ('0' <= r && r <= '9'):
407 l.backup()
408 return lexNumber
409 case isAlphaNumeric(r):
410 l.backup()
411 return lexIdentifier
412 case r == '(':
413 l.emit(itemLeftParen)
414 l.parenDepth++
415 case r == ')':
416 l.emit(itemRightParen)
417 l.parenDepth--
418 if l.parenDepth < 0 {
419 return l.errorf("unexpected right paren %#U", r)
420 }
421 case r <= unicode.MaxASCII && unicode.IsPrint(r):
422 l.emit(itemChar)
423 default:
424 return l.errorf("unrecognized character in action: %#U", r)
425 }
426 return lexInsideAction
427 }
428
429
430
431
432 func lexSpace(l *lexer) stateFn {
433 var r rune
434 var numSpaces int
435 for {
436 r = l.peek()
437 if !isSpace(r) {
438 break
439 }
440 l.next()
441 numSpaces++
442 }
443
444
445 if hasRightTrimMarker(l.input[l.pos-1:]) && strings.HasPrefix(l.input[l.pos-1+trimMarkerLen:], l.rightDelim) {
446 l.backup()
447 if numSpaces == 1 {
448 return lexRightDelim
449 }
450 }
451 l.emit(itemSpace)
452 return lexInsideAction
453 }
454
455
456 func lexIdentifier(l *lexer) stateFn {
457 Loop:
458 for {
459 switch r := l.next(); {
460 case isAlphaNumeric(r):
461
462 default:
463 l.backup()
464 word := l.input[l.start:l.pos]
465 if !l.atTerminator() {
466 return l.errorf("bad character %#U", r)
467 }
468 switch {
469 case key[word] > itemKeyword:
470 item := key[word]
471 if item == itemBreak && !l.breakOK || item == itemContinue && !l.continueOK {
472 l.emit(itemIdentifier)
473 } else {
474 l.emit(item)
475 }
476 case word[0] == '.':
477 l.emit(itemField)
478 case word == "true", word == "false":
479 l.emit(itemBool)
480 default:
481 l.emit(itemIdentifier)
482 }
483 break Loop
484 }
485 }
486 return lexInsideAction
487 }
488
489
490
491 func lexField(l *lexer) stateFn {
492 return lexFieldOrVariable(l, itemField)
493 }
494
495
496
497 func lexVariable(l *lexer) stateFn {
498 if l.atTerminator() {
499 l.emit(itemVariable)
500 return lexInsideAction
501 }
502 return lexFieldOrVariable(l, itemVariable)
503 }
504
505
506
507 func lexFieldOrVariable(l *lexer, typ itemType) stateFn {
508 if l.atTerminator() {
509 if typ == itemVariable {
510 l.emit(itemVariable)
511 } else {
512 l.emit(itemDot)
513 }
514 return lexInsideAction
515 }
516 var r rune
517 for {
518 r = l.next()
519 if !isAlphaNumeric(r) {
520 l.backup()
521 break
522 }
523 }
524 if !l.atTerminator() {
525 return l.errorf("bad character %#U", r)
526 }
527 l.emit(typ)
528 return lexInsideAction
529 }
530
531
532
533
534
535 func (l *lexer) atTerminator() bool {
536 r := l.peek()
537 if isSpace(r) {
538 return true
539 }
540 switch r {
541 case eof, '.', ',', '|', ':', ')', '(':
542 return true
543 }
544
545
546
547 if rd, _ := utf8.DecodeRuneInString(l.rightDelim); rd == r {
548 return true
549 }
550 return false
551 }
552
553
554
555 func lexChar(l *lexer) stateFn {
556 Loop:
557 for {
558 switch l.next() {
559 case '\\':
560 if r := l.next(); r != eof && r != '\n' {
561 break
562 }
563 fallthrough
564 case eof, '\n':
565 return l.errorf("unterminated character constant")
566 case '\'':
567 break Loop
568 }
569 }
570 l.emit(itemCharConstant)
571 return lexInsideAction
572 }
573
574
575
576
577
578 func lexNumber(l *lexer) stateFn {
579 if !l.scanNumber() {
580 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
581 }
582 if sign := l.peek(); sign == '+' || sign == '-' {
583
584 if !l.scanNumber() || l.input[l.pos-1] != 'i' {
585 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
586 }
587 l.emit(itemComplex)
588 } else {
589 l.emit(itemNumber)
590 }
591 return lexInsideAction
592 }
593
594 func (l *lexer) scanNumber() bool {
595
596 l.accept("+-")
597
598 digits := "0123456789_"
599 if l.accept("0") {
600
601 if l.accept("xX") {
602 digits = "0123456789abcdefABCDEF_"
603 } else if l.accept("oO") {
604 digits = "01234567_"
605 } else if l.accept("bB") {
606 digits = "01_"
607 }
608 }
609 l.acceptRun(digits)
610 if l.accept(".") {
611 l.acceptRun(digits)
612 }
613 if len(digits) == 10+1 && l.accept("eE") {
614 l.accept("+-")
615 l.acceptRun("0123456789_")
616 }
617 if len(digits) == 16+6+1 && l.accept("pP") {
618 l.accept("+-")
619 l.acceptRun("0123456789_")
620 }
621
622 l.accept("i")
623
624 if isAlphaNumeric(l.peek()) {
625 l.next()
626 return false
627 }
628 return true
629 }
630
631
632 func lexQuote(l *lexer) stateFn {
633 Loop:
634 for {
635 switch l.next() {
636 case '\\':
637 if r := l.next(); r != eof && r != '\n' {
638 break
639 }
640 fallthrough
641 case eof, '\n':
642 return l.errorf("unterminated quoted string")
643 case '"':
644 break Loop
645 }
646 }
647 l.emit(itemString)
648 return lexInsideAction
649 }
650
651
652 func lexRawQuote(l *lexer) stateFn {
653 Loop:
654 for {
655 switch l.next() {
656 case eof:
657 return l.errorf("unterminated raw quoted string")
658 case '`':
659 break Loop
660 }
661 }
662 l.emit(itemRawString)
663 return lexInsideAction
664 }
665
666
667 func isSpace(r rune) bool {
668 return r == ' ' || r == '\t' || r == '\r' || r == '\n'
669 }
670
671
672 func isAlphaNumeric(r rune) bool {
673 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
674 }
675
676 func hasLeftTrimMarker(s string) bool {
677 return len(s) >= 2 && s[0] == trimMarker && isSpace(rune(s[1]))
678 }
679
680 func hasRightTrimMarker(s string) bool {
681 return len(s) >= 2 && isSpace(rune(s[0])) && s[1] == trimMarker
682 }
683
View as plain text