Source file
src/regexp/exec_test.go
1
2
3
4
5 package regexp
6
7 import (
8 "bufio"
9 "compress/bzip2"
10 "fmt"
11 "internal/testenv"
12 "io"
13 "os"
14 "path/filepath"
15 "regexp/syntax"
16 "strconv"
17 "strings"
18 "testing"
19 "unicode/utf8"
20 )
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66 func TestRE2Search(t *testing.T) {
67 testRE2(t, "testdata/re2-search.txt")
68 }
69
70 func testRE2(t *testing.T, file string) {
71 f, err := os.Open(file)
72 if err != nil {
73 t.Fatal(err)
74 }
75 defer f.Close()
76 var txt io.Reader
77 if strings.HasSuffix(file, ".bz2") {
78 z := bzip2.NewReader(f)
79 txt = z
80 file = file[:len(file)-len(".bz2")]
81 } else {
82 txt = f
83 }
84 lineno := 0
85 scanner := bufio.NewScanner(txt)
86 var (
87 str []string
88 input []string
89 inStrings bool
90 re *Regexp
91 refull *Regexp
92 nfail int
93 ncase int
94 )
95 for lineno := 1; scanner.Scan(); lineno++ {
96 line := scanner.Text()
97 switch {
98 case line == "":
99 t.Fatalf("%s:%d: unexpected blank line", file, lineno)
100 case line[0] == '#':
101 continue
102 case 'A' <= line[0] && line[0] <= 'Z':
103
104 t.Logf("%s\n", line)
105 continue
106 case line == "strings":
107 str = str[:0]
108 inStrings = true
109 case line == "regexps":
110 inStrings = false
111 case line[0] == '"':
112 q, err := strconv.Unquote(line)
113 if err != nil {
114
115 t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err)
116 }
117 if inStrings {
118 str = append(str, q)
119 continue
120 }
121
122 if len(input) != 0 {
123 t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q)
124 }
125 re, err = tryCompile(q)
126 if err != nil {
127 if err.Error() == "error parsing regexp: invalid escape sequence: `\\C`" {
128
129 continue
130 }
131 t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err)
132 if nfail++; nfail >= 100 {
133 t.Fatalf("stopping after %d errors", nfail)
134 }
135 continue
136 }
137 full := `\A(?:` + q + `)\z`
138 refull, err = tryCompile(full)
139 if err != nil {
140
141 t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err)
142 }
143 input = str
144 case line[0] == '-' || '0' <= line[0] && line[0] <= '9':
145
146 ncase++
147 if re == nil {
148
149 continue
150 }
151 if len(input) == 0 {
152 t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno)
153 }
154 var text string
155 text, input = input[0], input[1:]
156 if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) {
157
158
159
160
161
162 continue
163 }
164 res := strings.Split(line, ";")
165 if len(res) != len(run) {
166 t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run))
167 }
168 for i := range res {
169 have, suffix := run[i](re, refull, text)
170 want := parseResult(t, file, lineno, res[i])
171 if !same(have, want) {
172 t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want)
173 if nfail++; nfail >= 100 {
174 t.Fatalf("stopping after %d errors", nfail)
175 }
176 continue
177 }
178 b, suffix := match[i](re, refull, text)
179 if b != (want != nil) {
180 t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b)
181 if nfail++; nfail >= 100 {
182 t.Fatalf("stopping after %d errors", nfail)
183 }
184 continue
185 }
186 }
187
188 default:
189 t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line)
190 }
191 }
192 if err := scanner.Err(); err != nil {
193 t.Fatalf("%s:%d: %v", file, lineno, err)
194 }
195 if len(input) != 0 {
196 t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input))
197 }
198 t.Logf("%d cases tested", ncase)
199 }
200
201 var run = []func(*Regexp, *Regexp, string) ([]int, string){
202 runFull,
203 runPartial,
204 runFullLongest,
205 runPartialLongest,
206 }
207
208 func runFull(re, refull *Regexp, text string) ([]int, string) {
209 refull.longest = false
210 return refull.FindStringSubmatchIndex(text), "[full]"
211 }
212
213 func runPartial(re, refull *Regexp, text string) ([]int, string) {
214 re.longest = false
215 return re.FindStringSubmatchIndex(text), ""
216 }
217
218 func runFullLongest(re, refull *Regexp, text string) ([]int, string) {
219 refull.longest = true
220 return refull.FindStringSubmatchIndex(text), "[full,longest]"
221 }
222
223 func runPartialLongest(re, refull *Regexp, text string) ([]int, string) {
224 re.longest = true
225 return re.FindStringSubmatchIndex(text), "[longest]"
226 }
227
228 var match = []func(*Regexp, *Regexp, string) (bool, string){
229 matchFull,
230 matchPartial,
231 matchFullLongest,
232 matchPartialLongest,
233 }
234
235 func matchFull(re, refull *Regexp, text string) (bool, string) {
236 refull.longest = false
237 return refull.MatchString(text), "[full]"
238 }
239
240 func matchPartial(re, refull *Regexp, text string) (bool, string) {
241 re.longest = false
242 return re.MatchString(text), ""
243 }
244
245 func matchFullLongest(re, refull *Regexp, text string) (bool, string) {
246 refull.longest = true
247 return refull.MatchString(text), "[full,longest]"
248 }
249
250 func matchPartialLongest(re, refull *Regexp, text string) (bool, string) {
251 re.longest = true
252 return re.MatchString(text), "[longest]"
253 }
254
255 func isSingleBytes(s string) bool {
256 for _, c := range s {
257 if c >= utf8.RuneSelf {
258 return false
259 }
260 }
261 return true
262 }
263
264 func tryCompile(s string) (re *Regexp, err error) {
265
266 defer func() {
267 if r := recover(); r != nil {
268 err = fmt.Errorf("panic: %v", r)
269 }
270 }()
271 return Compile(s)
272 }
273
274 func parseResult(t *testing.T, file string, lineno int, res string) []int {
275
276 if res == "-" {
277 return nil
278 }
279
280 n := 1
281 for j := 0; j < len(res); j++ {
282 if res[j] == ' ' {
283 n++
284 }
285 }
286 out := make([]int, 2*n)
287 i := 0
288 n = 0
289 for j := 0; j <= len(res); j++ {
290 if j == len(res) || res[j] == ' ' {
291
292 pair := res[i:j]
293 if pair == "-" {
294 out[n] = -1
295 out[n+1] = -1
296 } else {
297 loStr, hiStr, _ := strings.Cut(pair, "-")
298 lo, err1 := strconv.Atoi(loStr)
299 hi, err2 := strconv.Atoi(hiStr)
300 if err1 != nil || err2 != nil || lo > hi {
301 t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
302 }
303 out[n] = lo
304 out[n+1] = hi
305 }
306 n += 2
307 i = j + 1
308 }
309 }
310 return out
311 }
312
313 func same(x, y []int) bool {
314 if len(x) != len(y) {
315 return false
316 }
317 for i, xi := range x {
318 if xi != y[i] {
319 return false
320 }
321 }
322 return true
323 }
324
325
326
327
328 func TestFowler(t *testing.T) {
329 files, err := filepath.Glob("testdata/*.dat")
330 if err != nil {
331 t.Fatal(err)
332 }
333 for _, file := range files {
334 t.Log(file)
335 testFowler(t, file)
336 }
337 }
338
339 var notab = MustCompilePOSIX(`[^\t]+`)
340
341 func testFowler(t *testing.T, file string) {
342 f, err := os.Open(file)
343 if err != nil {
344 t.Error(err)
345 return
346 }
347 defer f.Close()
348 b := bufio.NewReader(f)
349 lineno := 0
350 lastRegexp := ""
351 Reading:
352 for {
353 lineno++
354 line, err := b.ReadString('\n')
355 if err != nil {
356 if err != io.EOF {
357 t.Errorf("%s:%d: %v", file, lineno, err)
358 }
359 break Reading
360 }
361
362
363
364
365
366
367
368
369 if line[0] == '#' || line[0] == '\n' {
370 continue Reading
371 }
372 line = line[:len(line)-1]
373 field := notab.FindAllString(line, -1)
374 for i, f := range field {
375 if f == "NULL" {
376 field[i] = ""
377 }
378 if f == "NIL" {
379 t.Logf("%s:%d: skip: %s", file, lineno, line)
380 continue Reading
381 }
382 }
383 if len(field) == 0 {
384 continue Reading
385 }
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447 flag := field[0]
448 switch flag[0] {
449 case '?', '&', '|', ';', '{', '}':
450
451
452 flag = flag[1:]
453 if flag == "" {
454 continue Reading
455 }
456 case ':':
457 var ok bool
458 if _, flag, ok = strings.Cut(flag[1:], ":"); !ok {
459 t.Logf("skip: %s", line)
460 continue Reading
461 }
462 case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
463 t.Logf("skip: %s", line)
464 continue Reading
465 }
466
467
468 if len(field) < 4 {
469 t.Errorf("%s:%d: too few fields: %s", file, lineno, line)
470 continue Reading
471 }
472
473
474 if strings.Contains(flag, "$") {
475 f := `"` + field[1] + `"`
476 if field[1], err = strconv.Unquote(f); err != nil {
477 t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
478 }
479 f = `"` + field[2] + `"`
480 if field[2], err = strconv.Unquote(f); err != nil {
481 t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
482 }
483 }
484
485
486
487
488 if field[1] == "SAME" {
489 field[1] = lastRegexp
490 }
491 lastRegexp = field[1]
492
493
494 text := field[2]
495
496
497 ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3])
498 if !ok {
499 t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3])
500 continue Reading
501 }
502
503
504
505 Testing:
506
507 for _, c := range flag {
508 pattern := field[1]
509 syn := syntax.POSIX | syntax.ClassNL
510 switch c {
511 default:
512 continue Testing
513 case 'E':
514
515 case 'L':
516
517 pattern = QuoteMeta(pattern)
518 }
519
520 for _, c := range flag {
521 switch c {
522 case 'i':
523 syn |= syntax.FoldCase
524 }
525 }
526
527 re, err := compile(pattern, syn, true)
528 if err != nil {
529 if shouldCompile {
530 t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern)
531 }
532 continue Testing
533 }
534 if !shouldCompile {
535 t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern)
536 continue Testing
537 }
538 match := re.MatchString(text)
539 if match != shouldMatch {
540 t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch)
541 continue Testing
542 }
543 have := re.FindStringSubmatchIndex(text)
544 if (len(have) > 0) != match {
545 t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, pattern, text, have)
546 continue Testing
547 }
548 if len(have) > len(pos) {
549 have = have[:len(pos)]
550 }
551 if !same(have, pos) {
552 t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos)
553 }
554 }
555 }
556 }
557
558 func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) {
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573 switch {
574 case s == "":
575
576 ok = true
577 compiled = true
578 matched = true
579 return
580 case s == "NOMATCH":
581
582 ok = true
583 compiled = true
584 matched = false
585 return
586 case 'A' <= s[0] && s[0] <= 'Z':
587
588 ok = true
589 compiled = false
590 return
591 }
592 compiled = true
593
594 var x []int
595 for s != "" {
596 var end byte = ')'
597 if len(x)%2 == 0 {
598 if s[0] != '(' {
599 ok = false
600 return
601 }
602 s = s[1:]
603 end = ','
604 }
605 i := 0
606 for i < len(s) && s[i] != end {
607 i++
608 }
609 if i == 0 || i == len(s) {
610 ok = false
611 return
612 }
613 var v = -1
614 var err error
615 if s[:i] != "?" {
616 v, err = strconv.Atoi(s[:i])
617 if err != nil {
618 ok = false
619 return
620 }
621 }
622 x = append(x, v)
623 s = s[i+1:]
624 }
625 if len(x)%2 != 0 {
626 ok = false
627 return
628 }
629 ok = true
630 matched = true
631 pos = x
632 return
633 }
634
635 var text []byte
636
637 func makeText(n int) []byte {
638 if len(text) >= n {
639 return text[:n]
640 }
641 text = make([]byte, n)
642 x := ^uint32(0)
643 for i := range text {
644 x += x
645 x ^= 1
646 if int32(x) < 0 {
647 x ^= 0x88888eef
648 }
649 if x%31 == 0 {
650 text[i] = '\n'
651 } else {
652 text[i] = byte(x%(0x7E+1-0x20) + 0x20)
653 }
654 }
655 return text
656 }
657
658 func BenchmarkMatch(b *testing.B) {
659 isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
660
661 for _, data := range benchData {
662 r := MustCompile(data.re)
663 for _, size := range benchSizes {
664 if (isRaceBuilder || testing.Short()) && size.n > 1<<10 {
665 continue
666 }
667 t := makeText(size.n)
668 b.Run(data.name+"/"+size.name, func(b *testing.B) {
669 b.SetBytes(int64(size.n))
670 for i := 0; i < b.N; i++ {
671 if r.Match(t) {
672 b.Fatal("match!")
673 }
674 }
675 })
676 }
677 }
678 }
679
680 func BenchmarkMatch_onepass_regex(b *testing.B) {
681 isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
682 r := MustCompile(`(?s)\A.*\z`)
683 if r.onepass == nil {
684 b.Fatalf("want onepass regex, but %q is not onepass", r)
685 }
686 for _, size := range benchSizes {
687 if (isRaceBuilder || testing.Short()) && size.n > 1<<10 {
688 continue
689 }
690 t := makeText(size.n)
691 b.Run(size.name, func(b *testing.B) {
692 b.SetBytes(int64(size.n))
693 b.ReportAllocs()
694 for i := 0; i < b.N; i++ {
695 if !r.Match(t) {
696 b.Fatal("not match!")
697 }
698 }
699 })
700 }
701 }
702
703 var benchData = []struct{ name, re string }{
704 {"Easy0", "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
705 {"Easy0i", "(?i)ABCDEFGHIJklmnopqrstuvwxyz$"},
706 {"Easy1", "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"},
707 {"Medium", "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
708 {"Hard", "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
709 {"Hard1", "ABCD|CDEF|EFGH|GHIJ|IJKL|KLMN|MNOP|OPQR|QRST|STUV|UVWX|WXYZ"},
710 }
711
712 var benchSizes = []struct {
713 name string
714 n int
715 }{
716 {"16", 16},
717 {"32", 32},
718 {"1K", 1 << 10},
719 {"32K", 32 << 10},
720 {"1M", 1 << 20},
721 {"32M", 32 << 20},
722 }
723
724 func TestLongest(t *testing.T) {
725 re, err := Compile(`a(|b)`)
726 if err != nil {
727 t.Fatal(err)
728 }
729 if g, w := re.FindString("ab"), "a"; g != w {
730 t.Errorf("first match was %q, want %q", g, w)
731 }
732 re.Longest()
733 if g, w := re.FindString("ab"), "ab"; g != w {
734 t.Errorf("longest match was %q, want %q", g, w)
735 }
736 }
737
738
739
740 func TestProgramTooLongForBacktrack(t *testing.T) {
741 longRegex := MustCompile(`(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|twentyone|twentytwo|twentythree|twentyfour|twentyfive|twentysix|twentyseven|twentyeight|twentynine|thirty|thirtyone|thirtytwo|thirtythree|thirtyfour|thirtyfive|thirtysix|thirtyseven|thirtyeight|thirtynine|forty|fortyone|fortytwo|fortythree|fortyfour|fortyfive|fortysix|fortyseven|fortyeight|fortynine|fifty|fiftyone|fiftytwo|fiftythree|fiftyfour|fiftyfive|fiftysix|fiftyseven|fiftyeight|fiftynine|sixty|sixtyone|sixtytwo|sixtythree|sixtyfour|sixtyfive|sixtysix|sixtyseven|sixtyeight|sixtynine|seventy|seventyone|seventytwo|seventythree|seventyfour|seventyfive|seventysix|seventyseven|seventyeight|seventynine|eighty|eightyone|eightytwo|eightythree|eightyfour|eightyfive|eightysix|eightyseven|eightyeight|eightynine|ninety|ninetyone|ninetytwo|ninetythree|ninetyfour|ninetyfive|ninetysix|ninetyseven|ninetyeight|ninetynine|onehundred)`)
742 if !longRegex.MatchString("two") {
743 t.Errorf("longRegex.MatchString(\"two\") was false, want true")
744 }
745 if longRegex.MatchString("xxx") {
746 t.Errorf("longRegex.MatchString(\"xxx\") was true, want false")
747 }
748 }
749
View as plain text