Source file src/regexp/exec_test.go

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package regexp
     6  
     7  import (
     8  	"bufio"
     9  	"compress/bzip2"
    10  	"fmt"
    11  	"internal/testenv"
    12  	"io"
    13  	"os"
    14  	"path/filepath"
    15  	"regexp/syntax"
    16  	"strconv"
    17  	"strings"
    18  	"testing"
    19  	"unicode/utf8"
    20  )
    21  
    22  // TestRE2 tests this package's regexp API against test cases
    23  // considered during RE2's exhaustive tests, which run all possible
    24  // regexps over a given set of atoms and operators, up to a given
    25  // complexity, over all possible strings over a given alphabet,
    26  // up to a given size. Rather than try to link with RE2, we read a
    27  // log file containing the test cases and the expected matches.
    28  // The log file, re2-exhaustive.txt, is generated by running 'make log'
    29  // in the open source RE2 distribution https://github.com/google/re2/.
    30  //
    31  // The test file format is a sequence of stanzas like:
    32  //
    33  //	strings
    34  //	"abc"
    35  //	"123x"
    36  //	regexps
    37  //	"[a-z]+"
    38  //	0-3;0-3
    39  //	-;-
    40  //	"([0-9])([0-9])([0-9])"
    41  //	-;-
    42  //	-;0-3 0-1 1-2 2-3
    43  //
    44  // The stanza begins by defining a set of strings, quoted
    45  // using Go double-quote syntax, one per line. Then the
    46  // regexps section gives a sequence of regexps to run on
    47  // the strings. In the block that follows a regexp, each line
    48  // gives the semicolon-separated match results of running
    49  // the regexp on the corresponding string.
    50  // Each match result is either a single -, meaning no match, or a
    51  // space-separated sequence of pairs giving the match and
    52  // submatch indices. An unmatched subexpression formats
    53  // its pair as a single - (not illustrated above).  For now
    54  // each regexp run produces two match results, one for a
    55  // ``full match'' that restricts the regexp to matching the entire
    56  // string or nothing, and one for a ``partial match'' that gives
    57  // the leftmost first match found in the string.
    58  //
    59  // Lines beginning with # are comments. Lines beginning with
    60  // a capital letter are test names printed during RE2's test suite
    61  // and are echoed into t but otherwise ignored.
    62  //
    63  // At time of writing, re2-exhaustive.txt is 59 MB but compresses to 385 kB,
    64  // so we store re2-exhaustive.txt.bz2 in the repository and decompress it on the fly.
    65  //
    66  func TestRE2Search(t *testing.T) {
    67  	testRE2(t, "testdata/re2-search.txt")
    68  }
    69  
    70  func testRE2(t *testing.T, file string) {
    71  	f, err := os.Open(file)
    72  	if err != nil {
    73  		t.Fatal(err)
    74  	}
    75  	defer f.Close()
    76  	var txt io.Reader
    77  	if strings.HasSuffix(file, ".bz2") {
    78  		z := bzip2.NewReader(f)
    79  		txt = z
    80  		file = file[:len(file)-len(".bz2")] // for error messages
    81  	} else {
    82  		txt = f
    83  	}
    84  	lineno := 0
    85  	scanner := bufio.NewScanner(txt)
    86  	var (
    87  		str       []string
    88  		input     []string
    89  		inStrings bool
    90  		re        *Regexp
    91  		refull    *Regexp
    92  		nfail     int
    93  		ncase     int
    94  	)
    95  	for lineno := 1; scanner.Scan(); lineno++ {
    96  		line := scanner.Text()
    97  		switch {
    98  		case line == "":
    99  			t.Fatalf("%s:%d: unexpected blank line", file, lineno)
   100  		case line[0] == '#':
   101  			continue
   102  		case 'A' <= line[0] && line[0] <= 'Z':
   103  			// Test name.
   104  			t.Logf("%s\n", line)
   105  			continue
   106  		case line == "strings":
   107  			str = str[:0]
   108  			inStrings = true
   109  		case line == "regexps":
   110  			inStrings = false
   111  		case line[0] == '"':
   112  			q, err := strconv.Unquote(line)
   113  			if err != nil {
   114  				// Fatal because we'll get out of sync.
   115  				t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err)
   116  			}
   117  			if inStrings {
   118  				str = append(str, q)
   119  				continue
   120  			}
   121  			// Is a regexp.
   122  			if len(input) != 0 {
   123  				t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q)
   124  			}
   125  			re, err = tryCompile(q)
   126  			if err != nil {
   127  				if err.Error() == "error parsing regexp: invalid escape sequence: `\\C`" {
   128  					// We don't and likely never will support \C; keep going.
   129  					continue
   130  				}
   131  				t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err)
   132  				if nfail++; nfail >= 100 {
   133  					t.Fatalf("stopping after %d errors", nfail)
   134  				}
   135  				continue
   136  			}
   137  			full := `\A(?:` + q + `)\z`
   138  			refull, err = tryCompile(full)
   139  			if err != nil {
   140  				// Fatal because q worked, so this should always work.
   141  				t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err)
   142  			}
   143  			input = str
   144  		case line[0] == '-' || '0' <= line[0] && line[0] <= '9':
   145  			// A sequence of match results.
   146  			ncase++
   147  			if re == nil {
   148  				// Failed to compile: skip results.
   149  				continue
   150  			}
   151  			if len(input) == 0 {
   152  				t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno)
   153  			}
   154  			var text string
   155  			text, input = input[0], input[1:]
   156  			if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) {
   157  				// RE2's \B considers every byte position,
   158  				// so it sees 'not word boundary' in the
   159  				// middle of UTF-8 sequences. This package
   160  				// only considers the positions between runes,
   161  				// so it disagrees. Skip those cases.
   162  				continue
   163  			}
   164  			res := strings.Split(line, ";")
   165  			if len(res) != len(run) {
   166  				t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run))
   167  			}
   168  			for i := range res {
   169  				have, suffix := run[i](re, refull, text)
   170  				want := parseResult(t, file, lineno, res[i])
   171  				if !same(have, want) {
   172  					t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want)
   173  					if nfail++; nfail >= 100 {
   174  						t.Fatalf("stopping after %d errors", nfail)
   175  					}
   176  					continue
   177  				}
   178  				b, suffix := match[i](re, refull, text)
   179  				if b != (want != nil) {
   180  					t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b)
   181  					if nfail++; nfail >= 100 {
   182  						t.Fatalf("stopping after %d errors", nfail)
   183  					}
   184  					continue
   185  				}
   186  			}
   187  
   188  		default:
   189  			t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line)
   190  		}
   191  	}
   192  	if err := scanner.Err(); err != nil {
   193  		t.Fatalf("%s:%d: %v", file, lineno, err)
   194  	}
   195  	if len(input) != 0 {
   196  		t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input))
   197  	}
   198  	t.Logf("%d cases tested", ncase)
   199  }
   200  
   201  var run = []func(*Regexp, *Regexp, string) ([]int, string){
   202  	runFull,
   203  	runPartial,
   204  	runFullLongest,
   205  	runPartialLongest,
   206  }
   207  
   208  func runFull(re, refull *Regexp, text string) ([]int, string) {
   209  	refull.longest = false
   210  	return refull.FindStringSubmatchIndex(text), "[full]"
   211  }
   212  
   213  func runPartial(re, refull *Regexp, text string) ([]int, string) {
   214  	re.longest = false
   215  	return re.FindStringSubmatchIndex(text), ""
   216  }
   217  
   218  func runFullLongest(re, refull *Regexp, text string) ([]int, string) {
   219  	refull.longest = true
   220  	return refull.FindStringSubmatchIndex(text), "[full,longest]"
   221  }
   222  
   223  func runPartialLongest(re, refull *Regexp, text string) ([]int, string) {
   224  	re.longest = true
   225  	return re.FindStringSubmatchIndex(text), "[longest]"
   226  }
   227  
   228  var match = []func(*Regexp, *Regexp, string) (bool, string){
   229  	matchFull,
   230  	matchPartial,
   231  	matchFullLongest,
   232  	matchPartialLongest,
   233  }
   234  
   235  func matchFull(re, refull *Regexp, text string) (bool, string) {
   236  	refull.longest = false
   237  	return refull.MatchString(text), "[full]"
   238  }
   239  
   240  func matchPartial(re, refull *Regexp, text string) (bool, string) {
   241  	re.longest = false
   242  	return re.MatchString(text), ""
   243  }
   244  
   245  func matchFullLongest(re, refull *Regexp, text string) (bool, string) {
   246  	refull.longest = true
   247  	return refull.MatchString(text), "[full,longest]"
   248  }
   249  
   250  func matchPartialLongest(re, refull *Regexp, text string) (bool, string) {
   251  	re.longest = true
   252  	return re.MatchString(text), "[longest]"
   253  }
   254  
   255  func isSingleBytes(s string) bool {
   256  	for _, c := range s {
   257  		if c >= utf8.RuneSelf {
   258  			return false
   259  		}
   260  	}
   261  	return true
   262  }
   263  
   264  func tryCompile(s string) (re *Regexp, err error) {
   265  	// Protect against panic during Compile.
   266  	defer func() {
   267  		if r := recover(); r != nil {
   268  			err = fmt.Errorf("panic: %v", r)
   269  		}
   270  	}()
   271  	return Compile(s)
   272  }
   273  
   274  func parseResult(t *testing.T, file string, lineno int, res string) []int {
   275  	// A single - indicates no match.
   276  	if res == "-" {
   277  		return nil
   278  	}
   279  	// Otherwise, a space-separated list of pairs.
   280  	n := 1
   281  	for j := 0; j < len(res); j++ {
   282  		if res[j] == ' ' {
   283  			n++
   284  		}
   285  	}
   286  	out := make([]int, 2*n)
   287  	i := 0
   288  	n = 0
   289  	for j := 0; j <= len(res); j++ {
   290  		if j == len(res) || res[j] == ' ' {
   291  			// Process a single pair.  - means no submatch.
   292  			pair := res[i:j]
   293  			if pair == "-" {
   294  				out[n] = -1
   295  				out[n+1] = -1
   296  			} else {
   297  				loStr, hiStr, _ := strings.Cut(pair, "-")
   298  				lo, err1 := strconv.Atoi(loStr)
   299  				hi, err2 := strconv.Atoi(hiStr)
   300  				if err1 != nil || err2 != nil || lo > hi {
   301  					t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
   302  				}
   303  				out[n] = lo
   304  				out[n+1] = hi
   305  			}
   306  			n += 2
   307  			i = j + 1
   308  		}
   309  	}
   310  	return out
   311  }
   312  
   313  func same(x, y []int) bool {
   314  	if len(x) != len(y) {
   315  		return false
   316  	}
   317  	for i, xi := range x {
   318  		if xi != y[i] {
   319  			return false
   320  		}
   321  	}
   322  	return true
   323  }
   324  
   325  // TestFowler runs this package's regexp API against the
   326  // POSIX regular expression tests collected by Glenn Fowler
   327  // at http://www2.research.att.com/~astopen/testregex/testregex.html.
   328  func TestFowler(t *testing.T) {
   329  	files, err := filepath.Glob("testdata/*.dat")
   330  	if err != nil {
   331  		t.Fatal(err)
   332  	}
   333  	for _, file := range files {
   334  		t.Log(file)
   335  		testFowler(t, file)
   336  	}
   337  }
   338  
   339  var notab = MustCompilePOSIX(`[^\t]+`)
   340  
   341  func testFowler(t *testing.T, file string) {
   342  	f, err := os.Open(file)
   343  	if err != nil {
   344  		t.Error(err)
   345  		return
   346  	}
   347  	defer f.Close()
   348  	b := bufio.NewReader(f)
   349  	lineno := 0
   350  	lastRegexp := ""
   351  Reading:
   352  	for {
   353  		lineno++
   354  		line, err := b.ReadString('\n')
   355  		if err != nil {
   356  			if err != io.EOF {
   357  				t.Errorf("%s:%d: %v", file, lineno, err)
   358  			}
   359  			break Reading
   360  		}
   361  
   362  		// http://www2.research.att.com/~astopen/man/man1/testregex.html
   363  		//
   364  		// INPUT FORMAT
   365  		//   Input lines may be blank, a comment beginning with #, or a test
   366  		//   specification. A specification is five fields separated by one
   367  		//   or more tabs. NULL denotes the empty string and NIL denotes the
   368  		//   0 pointer.
   369  		if line[0] == '#' || line[0] == '\n' {
   370  			continue Reading
   371  		}
   372  		line = line[:len(line)-1]
   373  		field := notab.FindAllString(line, -1)
   374  		for i, f := range field {
   375  			if f == "NULL" {
   376  				field[i] = ""
   377  			}
   378  			if f == "NIL" {
   379  				t.Logf("%s:%d: skip: %s", file, lineno, line)
   380  				continue Reading
   381  			}
   382  		}
   383  		if len(field) == 0 {
   384  			continue Reading
   385  		}
   386  
   387  		//   Field 1: the regex(3) flags to apply, one character per REG_feature
   388  		//   flag. The test is skipped if REG_feature is not supported by the
   389  		//   implementation. If the first character is not [BEASKLP] then the
   390  		//   specification is a global control line. One or more of [BEASKLP] may be
   391  		//   specified; the test will be repeated for each mode.
   392  		//
   393  		//     B 	basic			BRE	(grep, ed, sed)
   394  		//     E 	REG_EXTENDED		ERE	(egrep)
   395  		//     A	REG_AUGMENTED		ARE	(egrep with negation)
   396  		//     S	REG_SHELL		SRE	(sh glob)
   397  		//     K	REG_SHELL|REG_AUGMENTED	KRE	(ksh glob)
   398  		//     L	REG_LITERAL		LRE	(fgrep)
   399  		//
   400  		//     a	REG_LEFT|REG_RIGHT	implicit ^...$
   401  		//     b	REG_NOTBOL		lhs does not match ^
   402  		//     c	REG_COMMENT		ignore space and #...\n
   403  		//     d	REG_SHELL_DOT		explicit leading . match
   404  		//     e	REG_NOTEOL		rhs does not match $
   405  		//     f	REG_MULTIPLE		multiple \n separated patterns
   406  		//     g	FNM_LEADING_DIR		testfnmatch only -- match until /
   407  		//     h	REG_MULTIREF		multiple digit backref
   408  		//     i	REG_ICASE		ignore case
   409  		//     j	REG_SPAN		. matches \n
   410  		//     k	REG_ESCAPE		\ to escape [...] delimiter
   411  		//     l	REG_LEFT		implicit ^...
   412  		//     m	REG_MINIMAL		minimal match
   413  		//     n	REG_NEWLINE		explicit \n match
   414  		//     o	REG_ENCLOSED		(|&) magic inside [@|&](...)
   415  		//     p	REG_SHELL_PATH		explicit / match
   416  		//     q	REG_DELIMITED		delimited pattern
   417  		//     r	REG_RIGHT		implicit ...$
   418  		//     s	REG_SHELL_ESCAPED	\ not special
   419  		//     t	REG_MUSTDELIM		all delimiters must be specified
   420  		//     u	standard unspecified behavior -- errors not counted
   421  		//     v	REG_CLASS_ESCAPE	\ special inside [...]
   422  		//     w	REG_NOSUB		no subexpression match array
   423  		//     x	REG_LENIENT		let some errors slide
   424  		//     y	REG_LEFT		regexec() implicit ^...
   425  		//     z	REG_NULL		NULL subexpressions ok
   426  		//     $	                        expand C \c escapes in fields 2 and 3
   427  		//     /	                        field 2 is a regsubcomp() expression
   428  		//     =	                        field 3 is a regdecomp() expression
   429  		//
   430  		//   Field 1 control lines:
   431  		//
   432  		//     C		set LC_COLLATE and LC_CTYPE to locale in field 2
   433  		//
   434  		//     ?test ...	output field 5 if passed and != EXPECTED, silent otherwise
   435  		//     &test ...	output field 5 if current and previous passed
   436  		//     |test ...	output field 5 if current passed and previous failed
   437  		//     ; ...	output field 2 if previous failed
   438  		//     {test ...	skip if failed until }
   439  		//     }		end of skip
   440  		//
   441  		//     : comment		comment copied as output NOTE
   442  		//     :comment:test	:comment: ignored
   443  		//     N[OTE] comment	comment copied as output NOTE
   444  		//     T[EST] comment	comment
   445  		//
   446  		//     number		use number for nmatch (20 by default)
   447  		flag := field[0]
   448  		switch flag[0] {
   449  		case '?', '&', '|', ';', '{', '}':
   450  			// Ignore all the control operators.
   451  			// Just run everything.
   452  			flag = flag[1:]
   453  			if flag == "" {
   454  				continue Reading
   455  			}
   456  		case ':':
   457  			var ok bool
   458  			if _, flag, ok = strings.Cut(flag[1:], ":"); !ok {
   459  				t.Logf("skip: %s", line)
   460  				continue Reading
   461  			}
   462  		case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   463  			t.Logf("skip: %s", line)
   464  			continue Reading
   465  		}
   466  
   467  		// Can check field count now that we've handled the myriad comment formats.
   468  		if len(field) < 4 {
   469  			t.Errorf("%s:%d: too few fields: %s", file, lineno, line)
   470  			continue Reading
   471  		}
   472  
   473  		// Expand C escapes (a.k.a. Go escapes).
   474  		if strings.Contains(flag, "$") {
   475  			f := `"` + field[1] + `"`
   476  			if field[1], err = strconv.Unquote(f); err != nil {
   477  				t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
   478  			}
   479  			f = `"` + field[2] + `"`
   480  			if field[2], err = strconv.Unquote(f); err != nil {
   481  				t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
   482  			}
   483  		}
   484  
   485  		//   Field 2: the regular expression pattern; SAME uses the pattern from
   486  		//     the previous specification.
   487  		//
   488  		if field[1] == "SAME" {
   489  			field[1] = lastRegexp
   490  		}
   491  		lastRegexp = field[1]
   492  
   493  		//   Field 3: the string to match.
   494  		text := field[2]
   495  
   496  		//   Field 4: the test outcome...
   497  		ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3])
   498  		if !ok {
   499  			t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3])
   500  			continue Reading
   501  		}
   502  
   503  		//   Field 5: optional comment appended to the report.
   504  
   505  	Testing:
   506  		// Run test once for each specified capital letter mode that we support.
   507  		for _, c := range flag {
   508  			pattern := field[1]
   509  			syn := syntax.POSIX | syntax.ClassNL
   510  			switch c {
   511  			default:
   512  				continue Testing
   513  			case 'E':
   514  				// extended regexp (what we support)
   515  			case 'L':
   516  				// literal
   517  				pattern = QuoteMeta(pattern)
   518  			}
   519  
   520  			for _, c := range flag {
   521  				switch c {
   522  				case 'i':
   523  					syn |= syntax.FoldCase
   524  				}
   525  			}
   526  
   527  			re, err := compile(pattern, syn, true)
   528  			if err != nil {
   529  				if shouldCompile {
   530  					t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern)
   531  				}
   532  				continue Testing
   533  			}
   534  			if !shouldCompile {
   535  				t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern)
   536  				continue Testing
   537  			}
   538  			match := re.MatchString(text)
   539  			if match != shouldMatch {
   540  				t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch)
   541  				continue Testing
   542  			}
   543  			have := re.FindStringSubmatchIndex(text)
   544  			if (len(have) > 0) != match {
   545  				t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, pattern, text, have)
   546  				continue Testing
   547  			}
   548  			if len(have) > len(pos) {
   549  				have = have[:len(pos)]
   550  			}
   551  			if !same(have, pos) {
   552  				t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos)
   553  			}
   554  		}
   555  	}
   556  }
   557  
   558  func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) {
   559  	//   Field 4: the test outcome. This is either one of the posix error
   560  	//     codes (with REG_ omitted) or the match array, a list of (m,n)
   561  	//     entries with m and n being first and last+1 positions in the
   562  	//     field 3 string, or NULL if REG_NOSUB is in effect and success
   563  	//     is expected. BADPAT is acceptable in place of any regcomp(3)
   564  	//     error code. The match[] array is initialized to (-2,-2) before
   565  	//     each test. All array elements from 0 to nmatch-1 must be specified
   566  	//     in the outcome. Unspecified endpoints (offset -1) are denoted by ?.
   567  	//     Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a
   568  	//     matched (?{...}) expression, where x is the text enclosed by {...},
   569  	//     o is the expression ordinal counting from 1, and n is the length of
   570  	//     the unmatched portion of the subject string. If x starts with a
   571  	//     number then that is the return value of re_execf(), otherwise 0 is
   572  	//     returned.
   573  	switch {
   574  	case s == "":
   575  		// Match with no position information.
   576  		ok = true
   577  		compiled = true
   578  		matched = true
   579  		return
   580  	case s == "NOMATCH":
   581  		// Match failure.
   582  		ok = true
   583  		compiled = true
   584  		matched = false
   585  		return
   586  	case 'A' <= s[0] && s[0] <= 'Z':
   587  		// All the other error codes are compile errors.
   588  		ok = true
   589  		compiled = false
   590  		return
   591  	}
   592  	compiled = true
   593  
   594  	var x []int
   595  	for s != "" {
   596  		var end byte = ')'
   597  		if len(x)%2 == 0 {
   598  			if s[0] != '(' {
   599  				ok = false
   600  				return
   601  			}
   602  			s = s[1:]
   603  			end = ','
   604  		}
   605  		i := 0
   606  		for i < len(s) && s[i] != end {
   607  			i++
   608  		}
   609  		if i == 0 || i == len(s) {
   610  			ok = false
   611  			return
   612  		}
   613  		var v = -1
   614  		var err error
   615  		if s[:i] != "?" {
   616  			v, err = strconv.Atoi(s[:i])
   617  			if err != nil {
   618  				ok = false
   619  				return
   620  			}
   621  		}
   622  		x = append(x, v)
   623  		s = s[i+1:]
   624  	}
   625  	if len(x)%2 != 0 {
   626  		ok = false
   627  		return
   628  	}
   629  	ok = true
   630  	matched = true
   631  	pos = x
   632  	return
   633  }
   634  
   635  var text []byte
   636  
   637  func makeText(n int) []byte {
   638  	if len(text) >= n {
   639  		return text[:n]
   640  	}
   641  	text = make([]byte, n)
   642  	x := ^uint32(0)
   643  	for i := range text {
   644  		x += x
   645  		x ^= 1
   646  		if int32(x) < 0 {
   647  			x ^= 0x88888eef
   648  		}
   649  		if x%31 == 0 {
   650  			text[i] = '\n'
   651  		} else {
   652  			text[i] = byte(x%(0x7E+1-0x20) + 0x20)
   653  		}
   654  	}
   655  	return text
   656  }
   657  
   658  func BenchmarkMatch(b *testing.B) {
   659  	isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
   660  
   661  	for _, data := range benchData {
   662  		r := MustCompile(data.re)
   663  		for _, size := range benchSizes {
   664  			if (isRaceBuilder || testing.Short()) && size.n > 1<<10 {
   665  				continue
   666  			}
   667  			t := makeText(size.n)
   668  			b.Run(data.name+"/"+size.name, func(b *testing.B) {
   669  				b.SetBytes(int64(size.n))
   670  				for i := 0; i < b.N; i++ {
   671  					if r.Match(t) {
   672  						b.Fatal("match!")
   673  					}
   674  				}
   675  			})
   676  		}
   677  	}
   678  }
   679  
   680  func BenchmarkMatch_onepass_regex(b *testing.B) {
   681  	isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
   682  	r := MustCompile(`(?s)\A.*\z`)
   683  	if r.onepass == nil {
   684  		b.Fatalf("want onepass regex, but %q is not onepass", r)
   685  	}
   686  	for _, size := range benchSizes {
   687  		if (isRaceBuilder || testing.Short()) && size.n > 1<<10 {
   688  			continue
   689  		}
   690  		t := makeText(size.n)
   691  		b.Run(size.name, func(b *testing.B) {
   692  			b.SetBytes(int64(size.n))
   693  			b.ReportAllocs()
   694  			for i := 0; i < b.N; i++ {
   695  				if !r.Match(t) {
   696  					b.Fatal("not match!")
   697  				}
   698  			}
   699  		})
   700  	}
   701  }
   702  
   703  var benchData = []struct{ name, re string }{
   704  	{"Easy0", "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
   705  	{"Easy0i", "(?i)ABCDEFGHIJklmnopqrstuvwxyz$"},
   706  	{"Easy1", "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"},
   707  	{"Medium", "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
   708  	{"Hard", "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
   709  	{"Hard1", "ABCD|CDEF|EFGH|GHIJ|IJKL|KLMN|MNOP|OPQR|QRST|STUV|UVWX|WXYZ"},
   710  }
   711  
   712  var benchSizes = []struct {
   713  	name string
   714  	n    int
   715  }{
   716  	{"16", 16},
   717  	{"32", 32},
   718  	{"1K", 1 << 10},
   719  	{"32K", 32 << 10},
   720  	{"1M", 1 << 20},
   721  	{"32M", 32 << 20},
   722  }
   723  
   724  func TestLongest(t *testing.T) {
   725  	re, err := Compile(`a(|b)`)
   726  	if err != nil {
   727  		t.Fatal(err)
   728  	}
   729  	if g, w := re.FindString("ab"), "a"; g != w {
   730  		t.Errorf("first match was %q, want %q", g, w)
   731  	}
   732  	re.Longest()
   733  	if g, w := re.FindString("ab"), "ab"; g != w {
   734  		t.Errorf("longest match was %q, want %q", g, w)
   735  	}
   736  }
   737  
   738  // TestProgramTooLongForBacktrack tests that a regex which is too long
   739  // for the backtracker still executes properly.
   740  func TestProgramTooLongForBacktrack(t *testing.T) {
   741  	longRegex := MustCompile(`(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|twentyone|twentytwo|twentythree|twentyfour|twentyfive|twentysix|twentyseven|twentyeight|twentynine|thirty|thirtyone|thirtytwo|thirtythree|thirtyfour|thirtyfive|thirtysix|thirtyseven|thirtyeight|thirtynine|forty|fortyone|fortytwo|fortythree|fortyfour|fortyfive|fortysix|fortyseven|fortyeight|fortynine|fifty|fiftyone|fiftytwo|fiftythree|fiftyfour|fiftyfive|fiftysix|fiftyseven|fiftyeight|fiftynine|sixty|sixtyone|sixtytwo|sixtythree|sixtyfour|sixtyfive|sixtysix|sixtyseven|sixtyeight|sixtynine|seventy|seventyone|seventytwo|seventythree|seventyfour|seventyfive|seventysix|seventyseven|seventyeight|seventynine|eighty|eightyone|eightytwo|eightythree|eightyfour|eightyfive|eightysix|eightyseven|eightyeight|eightynine|ninety|ninetyone|ninetytwo|ninetythree|ninetyfour|ninetyfive|ninetysix|ninetyseven|ninetyeight|ninetynine|onehundred)`)
   742  	if !longRegex.MatchString("two") {
   743  		t.Errorf("longRegex.MatchString(\"two\") was false, want true")
   744  	}
   745  	if longRegex.MatchString("xxx") {
   746  		t.Errorf("longRegex.MatchString(\"xxx\") was true, want false")
   747  	}
   748  }
   749  

View as plain text