Source file
src/regexp/backtrack.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package regexp
16
17 import (
18 "regexp/syntax"
19 "sync"
20 )
21
22
23
24 type job struct {
25 pc uint32
26 arg bool
27 pos int
28 }
29
30 const (
31 visitedBits = 32
32 maxBacktrackProg = 500
33 maxBacktrackVector = 256 * 1024
34 )
35
36
37 type bitState struct {
38 end int
39 cap []int
40 matchcap []int
41 jobs []job
42 visited []uint32
43
44 inputs inputs
45 }
46
47 var bitStatePool sync.Pool
48
49 func newBitState() *bitState {
50 b, ok := bitStatePool.Get().(*bitState)
51 if !ok {
52 b = new(bitState)
53 }
54 return b
55 }
56
57 func freeBitState(b *bitState) {
58 b.inputs.clear()
59 bitStatePool.Put(b)
60 }
61
62
63
64 func maxBitStateLen(prog *syntax.Prog) int {
65 if !shouldBacktrack(prog) {
66 return 0
67 }
68 return maxBacktrackVector / len(prog.Inst)
69 }
70
71
72
73 func shouldBacktrack(prog *syntax.Prog) bool {
74 return len(prog.Inst) <= maxBacktrackProg
75 }
76
77
78
79
80 func (b *bitState) reset(prog *syntax.Prog, end int, ncap int) {
81 b.end = end
82
83 if cap(b.jobs) == 0 {
84 b.jobs = make([]job, 0, 256)
85 } else {
86 b.jobs = b.jobs[:0]
87 }
88
89 visitedSize := (len(prog.Inst)*(end+1) + visitedBits - 1) / visitedBits
90 if cap(b.visited) < visitedSize {
91 b.visited = make([]uint32, visitedSize, maxBacktrackVector/visitedBits)
92 } else {
93 b.visited = b.visited[:visitedSize]
94 for i := range b.visited {
95 b.visited[i] = 0
96 }
97 }
98
99 if cap(b.cap) < ncap {
100 b.cap = make([]int, ncap)
101 } else {
102 b.cap = b.cap[:ncap]
103 }
104 for i := range b.cap {
105 b.cap[i] = -1
106 }
107
108 if cap(b.matchcap) < ncap {
109 b.matchcap = make([]int, ncap)
110 } else {
111 b.matchcap = b.matchcap[:ncap]
112 }
113 for i := range b.matchcap {
114 b.matchcap[i] = -1
115 }
116 }
117
118
119
120 func (b *bitState) shouldVisit(pc uint32, pos int) bool {
121 n := uint(int(pc)*(b.end+1) + pos)
122 if b.visited[n/visitedBits]&(1<<(n&(visitedBits-1))) != 0 {
123 return false
124 }
125 b.visited[n/visitedBits] |= 1 << (n & (visitedBits - 1))
126 return true
127 }
128
129
130
131 func (b *bitState) push(re *Regexp, pc uint32, pos int, arg bool) {
132
133
134 if re.prog.Inst[pc].Op != syntax.InstFail && (arg || b.shouldVisit(pc, pos)) {
135 b.jobs = append(b.jobs, job{pc: pc, arg: arg, pos: pos})
136 }
137 }
138
139
140 func (re *Regexp) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
141 longest := re.longest
142
143 b.push(re, pc, pos, false)
144 for len(b.jobs) > 0 {
145 l := len(b.jobs) - 1
146
147 pc := b.jobs[l].pc
148 pos := b.jobs[l].pos
149 arg := b.jobs[l].arg
150 b.jobs = b.jobs[:l]
151
152
153
154
155
156
157
158
159 goto Skip
160 CheckAndLoop:
161 if !b.shouldVisit(pc, pos) {
162 continue
163 }
164 Skip:
165
166 inst := re.prog.Inst[pc]
167
168 switch inst.Op {
169 default:
170 panic("bad inst")
171 case syntax.InstFail:
172 panic("unexpected InstFail")
173 case syntax.InstAlt:
174
175
176
177
178
179
180
181
182 if arg {
183
184 arg = false
185 pc = inst.Arg
186 goto CheckAndLoop
187 } else {
188 b.push(re, pc, pos, true)
189 pc = inst.Out
190 goto CheckAndLoop
191 }
192
193 case syntax.InstAltMatch:
194
195 switch re.prog.Inst[inst.Out].Op {
196 case syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
197
198 b.push(re, inst.Arg, pos, false)
199 pc = inst.Arg
200 pos = b.end
201 goto CheckAndLoop
202 }
203
204 b.push(re, inst.Out, b.end, false)
205 pc = inst.Out
206 goto CheckAndLoop
207
208 case syntax.InstRune:
209 r, width := i.step(pos)
210 if !inst.MatchRune(r) {
211 continue
212 }
213 pos += width
214 pc = inst.Out
215 goto CheckAndLoop
216
217 case syntax.InstRune1:
218 r, width := i.step(pos)
219 if r != inst.Rune[0] {
220 continue
221 }
222 pos += width
223 pc = inst.Out
224 goto CheckAndLoop
225
226 case syntax.InstRuneAnyNotNL:
227 r, width := i.step(pos)
228 if r == '\n' || r == endOfText {
229 continue
230 }
231 pos += width
232 pc = inst.Out
233 goto CheckAndLoop
234
235 case syntax.InstRuneAny:
236 r, width := i.step(pos)
237 if r == endOfText {
238 continue
239 }
240 pos += width
241 pc = inst.Out
242 goto CheckAndLoop
243
244 case syntax.InstCapture:
245 if arg {
246
247 b.cap[inst.Arg] = pos
248 continue
249 } else {
250 if inst.Arg < uint32(len(b.cap)) {
251
252 b.push(re, pc, b.cap[inst.Arg], true)
253 b.cap[inst.Arg] = pos
254 }
255 pc = inst.Out
256 goto CheckAndLoop
257 }
258
259 case syntax.InstEmptyWidth:
260 flag := i.context(pos)
261 if !flag.match(syntax.EmptyOp(inst.Arg)) {
262 continue
263 }
264 pc = inst.Out
265 goto CheckAndLoop
266
267 case syntax.InstNop:
268 pc = inst.Out
269 goto CheckAndLoop
270
271 case syntax.InstMatch:
272
273
274 if len(b.cap) == 0 {
275 return true
276 }
277
278
279
280
281 if len(b.cap) > 1 {
282 b.cap[1] = pos
283 }
284 if old := b.matchcap[1]; old == -1 || (longest && pos > 0 && pos > old) {
285 copy(b.matchcap, b.cap)
286 }
287
288
289 if !longest {
290 return true
291 }
292
293
294 if pos == b.end {
295 return true
296 }
297
298
299 continue
300 }
301 }
302
303 return longest && len(b.matchcap) > 1 && b.matchcap[1] >= 0
304 }
305
306
307 func (re *Regexp) backtrack(ib []byte, is string, pos int, ncap int, dstCap []int) []int {
308 startCond := re.cond
309 if startCond == ^syntax.EmptyOp(0) {
310 return nil
311 }
312 if startCond&syntax.EmptyBeginText != 0 && pos != 0 {
313
314 return nil
315 }
316
317 b := newBitState()
318 i, end := b.inputs.init(nil, ib, is)
319 b.reset(re.prog, end, ncap)
320
321
322 if startCond&syntax.EmptyBeginText != 0 {
323 if len(b.cap) > 0 {
324 b.cap[0] = pos
325 }
326 if !re.tryBacktrack(b, i, uint32(re.prog.Start), pos) {
327 freeBitState(b)
328 return nil
329 }
330 } else {
331
332
333
334
335
336
337
338 width := -1
339 for ; pos <= end && width != 0; pos += width {
340 if len(re.prefix) > 0 {
341
342 advance := i.index(re, pos)
343 if advance < 0 {
344 freeBitState(b)
345 return nil
346 }
347 pos += advance
348 }
349
350 if len(b.cap) > 0 {
351 b.cap[0] = pos
352 }
353 if re.tryBacktrack(b, i, uint32(re.prog.Start), pos) {
354
355 goto Match
356 }
357 _, width = i.step(pos)
358 }
359 freeBitState(b)
360 return nil
361 }
362
363 Match:
364 dstCap = append(dstCap, b.matchcap...)
365 freeBitState(b)
366 return dstCap
367 }
368
View as plain text