1
2
3
4
5 package syntax
6
7 import (
8 "strconv"
9 "strings"
10 "unicode"
11 "unicode/utf8"
12 )
13
14
15
16
17
18 type Prog struct {
19 Inst []Inst
20 Start int
21 NumCap int
22 }
23
24
25 type InstOp uint8
26
27 const (
28 InstAlt InstOp = iota
29 InstAltMatch
30 InstCapture
31 InstEmptyWidth
32 InstMatch
33 InstFail
34 InstNop
35 InstRune
36 InstRune1
37 InstRuneAny
38 InstRuneAnyNotNL
39 )
40
41 var instOpNames = []string{
42 "InstAlt",
43 "InstAltMatch",
44 "InstCapture",
45 "InstEmptyWidth",
46 "InstMatch",
47 "InstFail",
48 "InstNop",
49 "InstRune",
50 "InstRune1",
51 "InstRuneAny",
52 "InstRuneAnyNotNL",
53 }
54
55 func (i InstOp) String() string {
56 if uint(i) >= uint(len(instOpNames)) {
57 return ""
58 }
59 return instOpNames[i]
60 }
61
62
63 type EmptyOp uint8
64
65 const (
66 EmptyBeginLine EmptyOp = 1 << iota
67 EmptyEndLine
68 EmptyBeginText
69 EmptyEndText
70 EmptyWordBoundary
71 EmptyNoWordBoundary
72 )
73
74
75
76
77
78
79
80 func EmptyOpContext(r1, r2 rune) EmptyOp {
81 var op EmptyOp = EmptyNoWordBoundary
82 var boundary byte
83 switch {
84 case IsWordChar(r1):
85 boundary = 1
86 case r1 == '\n':
87 op |= EmptyBeginLine
88 case r1 < 0:
89 op |= EmptyBeginText | EmptyBeginLine
90 }
91 switch {
92 case IsWordChar(r2):
93 boundary ^= 1
94 case r2 == '\n':
95 op |= EmptyEndLine
96 case r2 < 0:
97 op |= EmptyEndText | EmptyEndLine
98 }
99 if boundary != 0 {
100 op ^= (EmptyWordBoundary | EmptyNoWordBoundary)
101 }
102 return op
103 }
104
105
106
107
108 func IsWordChar(r rune) bool {
109 return 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' || '0' <= r && r <= '9' || r == '_'
110 }
111
112
113 type Inst struct {
114 Op InstOp
115 Out uint32
116 Arg uint32
117 Rune []rune
118 }
119
120 func (p *Prog) String() string {
121 var b strings.Builder
122 dumpProg(&b, p)
123 return b.String()
124 }
125
126
127 func (p *Prog) skipNop(pc uint32) *Inst {
128 i := &p.Inst[pc]
129 for i.Op == InstNop || i.Op == InstCapture {
130 i = &p.Inst[i.Out]
131 }
132 return i
133 }
134
135
136 func (i *Inst) op() InstOp {
137 op := i.Op
138 switch op {
139 case InstRune1, InstRuneAny, InstRuneAnyNotNL:
140 op = InstRune
141 }
142 return op
143 }
144
145
146
147
148 func (p *Prog) Prefix() (prefix string, complete bool) {
149 i := p.skipNop(uint32(p.Start))
150
151
152 if i.op() != InstRune || len(i.Rune) != 1 {
153 return "", i.Op == InstMatch
154 }
155
156
157 var buf strings.Builder
158 for i.op() == InstRune && len(i.Rune) == 1 && Flags(i.Arg)&FoldCase == 0 && i.Rune[0] != utf8.RuneError {
159 buf.WriteRune(i.Rune[0])
160 i = p.skipNop(i.Out)
161 }
162 return buf.String(), i.Op == InstMatch
163 }
164
165
166
167 func (p *Prog) StartCond() EmptyOp {
168 var flag EmptyOp
169 pc := uint32(p.Start)
170 i := &p.Inst[pc]
171 Loop:
172 for {
173 switch i.Op {
174 case InstEmptyWidth:
175 flag |= EmptyOp(i.Arg)
176 case InstFail:
177 return ^EmptyOp(0)
178 case InstCapture, InstNop:
179
180 default:
181 break Loop
182 }
183 pc = i.Out
184 i = &p.Inst[pc]
185 }
186 return flag
187 }
188
189 const noMatch = -1
190
191
192
193 func (i *Inst) MatchRune(r rune) bool {
194 return i.MatchRunePos(r) != noMatch
195 }
196
197
198
199
200
201
202 func (i *Inst) MatchRunePos(r rune) int {
203 rune := i.Rune
204
205 switch len(rune) {
206 case 0:
207 return noMatch
208
209 case 1:
210
211 r0 := rune[0]
212 if r == r0 {
213 return 0
214 }
215 if Flags(i.Arg)&FoldCase != 0 {
216 for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
217 if r == r1 {
218 return 0
219 }
220 }
221 }
222 return noMatch
223
224 case 2:
225 if r >= rune[0] && r <= rune[1] {
226 return 0
227 }
228 return noMatch
229
230 case 4, 6, 8:
231
232
233 for j := 0; j < len(rune); j += 2 {
234 if r < rune[j] {
235 return noMatch
236 }
237 if r <= rune[j+1] {
238 return j / 2
239 }
240 }
241 return noMatch
242 }
243
244
245 lo := 0
246 hi := len(rune) / 2
247 for lo < hi {
248 m := lo + (hi-lo)/2
249 if c := rune[2*m]; c <= r {
250 if r <= rune[2*m+1] {
251 return m
252 }
253 lo = m + 1
254 } else {
255 hi = m
256 }
257 }
258 return noMatch
259 }
260
261
262
263
264 func (i *Inst) MatchEmptyWidth(before rune, after rune) bool {
265 switch EmptyOp(i.Arg) {
266 case EmptyBeginLine:
267 return before == '\n' || before == -1
268 case EmptyEndLine:
269 return after == '\n' || after == -1
270 case EmptyBeginText:
271 return before == -1
272 case EmptyEndText:
273 return after == -1
274 case EmptyWordBoundary:
275 return IsWordChar(before) != IsWordChar(after)
276 case EmptyNoWordBoundary:
277 return IsWordChar(before) == IsWordChar(after)
278 }
279 panic("unknown empty width arg")
280 }
281
282 func (i *Inst) String() string {
283 var b strings.Builder
284 dumpInst(&b, i)
285 return b.String()
286 }
287
288 func bw(b *strings.Builder, args ...string) {
289 for _, s := range args {
290 b.WriteString(s)
291 }
292 }
293
294 func dumpProg(b *strings.Builder, p *Prog) {
295 for j := range p.Inst {
296 i := &p.Inst[j]
297 pc := strconv.Itoa(j)
298 if len(pc) < 3 {
299 b.WriteString(" "[len(pc):])
300 }
301 if j == p.Start {
302 pc += "*"
303 }
304 bw(b, pc, "\t")
305 dumpInst(b, i)
306 bw(b, "\n")
307 }
308 }
309
310 func u32(i uint32) string {
311 return strconv.FormatUint(uint64(i), 10)
312 }
313
314 func dumpInst(b *strings.Builder, i *Inst) {
315 switch i.Op {
316 case InstAlt:
317 bw(b, "alt -> ", u32(i.Out), ", ", u32(i.Arg))
318 case InstAltMatch:
319 bw(b, "altmatch -> ", u32(i.Out), ", ", u32(i.Arg))
320 case InstCapture:
321 bw(b, "cap ", u32(i.Arg), " -> ", u32(i.Out))
322 case InstEmptyWidth:
323 bw(b, "empty ", u32(i.Arg), " -> ", u32(i.Out))
324 case InstMatch:
325 bw(b, "match")
326 case InstFail:
327 bw(b, "fail")
328 case InstNop:
329 bw(b, "nop -> ", u32(i.Out))
330 case InstRune:
331 if i.Rune == nil {
332
333 bw(b, "rune <nil>")
334 }
335 bw(b, "rune ", strconv.QuoteToASCII(string(i.Rune)))
336 if Flags(i.Arg)&FoldCase != 0 {
337 bw(b, "/i")
338 }
339 bw(b, " -> ", u32(i.Out))
340 case InstRune1:
341 bw(b, "rune1 ", strconv.QuoteToASCII(string(i.Rune)), " -> ", u32(i.Out))
342 case InstRuneAny:
343 bw(b, "any -> ", u32(i.Out))
344 case InstRuneAnyNotNL:
345 bw(b, "anynotnl -> ", u32(i.Out))
346 }
347 }
348
View as plain text