Source file
src/strings/replace_test.go
1
2
3
4
5 package strings_test
6
7 import (
8 "bytes"
9 "fmt"
10 . "strings"
11 "testing"
12 )
13
14 var htmlEscaper = NewReplacer(
15 "&", "&",
16 "<", "<",
17 ">", ">",
18 `"`, """,
19 "'", "'",
20 )
21
22 var htmlUnescaper = NewReplacer(
23 "&", "&",
24 "<", "<",
25 ">", ">",
26 """, `"`,
27 "'", "'",
28 )
29
30
31 func oldHTMLEscape(s string) string {
32 s = Replace(s, "&", "&", -1)
33 s = Replace(s, "<", "<", -1)
34 s = Replace(s, ">", ">", -1)
35 s = Replace(s, `"`, """, -1)
36 s = Replace(s, "'", "'", -1)
37 return s
38 }
39
40 var capitalLetters = NewReplacer("a", "A", "b", "B")
41
42
43 func TestReplacer(t *testing.T) {
44 type testCase struct {
45 r *Replacer
46 in, out string
47 }
48 var testCases []testCase
49
50
51 str := func(b byte) string {
52 return string([]byte{b})
53 }
54 var s []string
55
56
57 s = nil
58 for i := 0; i < 256; i++ {
59 s = append(s, str(byte(i)), str(byte(i+1)))
60 }
61 inc := NewReplacer(s...)
62
63
64 testCases = append(testCases,
65 testCase{capitalLetters, "brad", "BrAd"},
66 testCase{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)},
67 testCase{capitalLetters, "", ""},
68
69 testCase{inc, "brad", "csbe"},
70 testCase{inc, "\x00\xff", "\x01\x00"},
71 testCase{inc, "", ""},
72
73 testCase{NewReplacer("a", "1", "a", "2"), "brad", "br1d"},
74 )
75
76
77 s = nil
78 for i := 0; i < 256; i++ {
79 n := i + 1 - 'a'
80 if n < 1 {
81 n = 1
82 }
83 s = append(s, str(byte(i)), Repeat(str(byte(i)), n))
84 }
85 repeat := NewReplacer(s...)
86
87
88 testCases = append(testCases,
89 testCase{htmlEscaper, "No changes", "No changes"},
90 testCase{htmlEscaper, "I <3 escaping & stuff", "I <3 escaping & stuff"},
91 testCase{htmlEscaper, "&&&", "&&&"},
92 testCase{htmlEscaper, "", ""},
93
94 testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
95 testCase{repeat, "abba", "abbbba"},
96 testCase{repeat, "", ""},
97
98 testCase{NewReplacer("a", "11", "a", "22"), "brad", "br11d"},
99 )
100
101
102
103 testCases = append(testCases,
104 testCase{htmlUnescaper, "&amp;", "&"},
105 testCase{htmlUnescaper, "<b>HTML's neat</b>", "<b>HTML's neat</b>"},
106 testCase{htmlUnescaper, "", ""},
107
108 testCase{NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
109
110 testCase{NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
111
112 testCase{NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
113 )
114
115
116
117 gen1 := NewReplacer(
118 "aaa", "3[aaa]",
119 "aa", "2[aa]",
120 "a", "1[a]",
121 "i", "i",
122 "longerst", "most long",
123 "longer", "medium",
124 "long", "short",
125 "xx", "xx",
126 "x", "X",
127 "X", "Y",
128 "Y", "Z",
129 )
130 testCases = append(testCases,
131 testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"},
132 testCase{gen1, "long, longerst, longer", "short, most long, medium"},
133 testCase{gen1, "xxxxx", "xxxxX"},
134 testCase{gen1, "XiX", "YiY"},
135 testCase{gen1, "", ""},
136 )
137
138
139 gen2 := NewReplacer(
140 "roses", "red",
141 "violets", "blue",
142 "sugar", "sweet",
143 )
144 testCases = append(testCases,
145 testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."},
146 testCase{gen2, "", ""},
147 )
148
149
150 gen3 := NewReplacer(
151 "abracadabra", "poof",
152 "abracadabrakazam", "splat",
153 "abraham", "lincoln",
154 "abrasion", "scrape",
155 "abraham", "isaac",
156 )
157 testCases = append(testCases,
158 testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"},
159 testCase{gen3, "abrasion abracad", "scrape abracad"},
160 testCase{gen3, "abba abram abrasive", "abba abram abrasive"},
161 testCase{gen3, "", ""},
162 )
163
164
165
166 foo1 := NewReplacer(
167 "foo1", "A",
168 "foo2", "B",
169 "foo3", "C",
170 )
171 foo2 := NewReplacer(
172 "foo1", "A",
173 "foo2", "B",
174 "foo31", "C",
175 "foo32", "D",
176 )
177 foo3 := NewReplacer(
178 "foo11", "A",
179 "foo12", "B",
180 "foo31", "C",
181 "foo32", "D",
182 )
183 foo4 := NewReplacer(
184 "foo12", "B",
185 "foo32", "D",
186 )
187 testCases = append(testCases,
188 testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"},
189 testCase{foo1, "", ""},
190
191 testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"},
192 testCase{foo2, "", ""},
193
194 testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"},
195 testCase{foo3, "", ""},
196
197 testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"},
198 testCase{foo4, "", ""},
199 )
200
201
202 allBytes := make([]byte, 256)
203 for i := range allBytes {
204 allBytes[i] = byte(i)
205 }
206 allString := string(allBytes)
207 genAll := NewReplacer(
208 allString, "[all]",
209 "\xff", "[ff]",
210 "\x00", "[00]",
211 )
212 testCases = append(testCases,
213 testCase{genAll, allString, "[all]"},
214 testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"},
215 testCase{genAll, "", ""},
216 )
217
218
219
220 blankToX1 := NewReplacer("", "X")
221 blankToX2 := NewReplacer("", "X", "", "")
222 blankHighPriority := NewReplacer("", "X", "o", "O")
223 blankLowPriority := NewReplacer("o", "O", "", "X")
224 blankNoOp1 := NewReplacer("", "")
225 blankNoOp2 := NewReplacer("", "", "", "A")
226 blankFoo := NewReplacer("", "X", "foobar", "R", "foobaz", "Z")
227 testCases = append(testCases,
228 testCase{blankToX1, "foo", "XfXoXoX"},
229 testCase{blankToX1, "", "X"},
230
231 testCase{blankToX2, "foo", "XfXoXoX"},
232 testCase{blankToX2, "", "X"},
233
234 testCase{blankHighPriority, "oo", "XOXOX"},
235 testCase{blankHighPriority, "ii", "XiXiX"},
236 testCase{blankHighPriority, "oiio", "XOXiXiXOX"},
237 testCase{blankHighPriority, "iooi", "XiXOXOXiX"},
238 testCase{blankHighPriority, "", "X"},
239
240 testCase{blankLowPriority, "oo", "OOX"},
241 testCase{blankLowPriority, "ii", "XiXiX"},
242 testCase{blankLowPriority, "oiio", "OXiXiOX"},
243 testCase{blankLowPriority, "iooi", "XiOOXiX"},
244 testCase{blankLowPriority, "", "X"},
245
246 testCase{blankNoOp1, "foo", "foo"},
247 testCase{blankNoOp1, "", ""},
248
249 testCase{blankNoOp2, "foo", "foo"},
250 testCase{blankNoOp2, "", ""},
251
252 testCase{blankFoo, "foobarfoobaz", "XRXZX"},
253 testCase{blankFoo, "foobar-foobaz", "XRX-XZX"},
254 testCase{blankFoo, "", "X"},
255 )
256
257
258
259 abcMatcher := NewReplacer("abc", "[match]")
260
261 testCases = append(testCases,
262 testCase{abcMatcher, "", ""},
263 testCase{abcMatcher, "ab", "ab"},
264 testCase{abcMatcher, "abc", "[match]"},
265 testCase{abcMatcher, "abcd", "[match]d"},
266 testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"},
267 )
268
269
270
271 noHello := NewReplacer("Hello", "")
272 testCases = append(testCases,
273 testCase{noHello, "Hello", ""},
274 testCase{noHello, "Hellox", "x"},
275 testCase{noHello, "xHello", "x"},
276 testCase{noHello, "xHellox", "xx"},
277 )
278
279
280
281 nop := NewReplacer()
282 testCases = append(testCases,
283 testCase{nop, "abc", "abc"},
284 testCase{nop, "", ""},
285 )
286
287
288
289 for i, tc := range testCases {
290 if s := tc.r.Replace(tc.in); s != tc.out {
291 t.Errorf("%d. Replace(%q) = %q, want %q", i, tc.in, s, tc.out)
292 }
293 var buf bytes.Buffer
294 n, err := tc.r.WriteString(&buf, tc.in)
295 if err != nil {
296 t.Errorf("%d. WriteString: %v", i, err)
297 continue
298 }
299 got := buf.String()
300 if got != tc.out {
301 t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out)
302 continue
303 }
304 if n != len(tc.out) {
305 t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)",
306 i, tc.in, n, len(tc.out), tc.out)
307 }
308 }
309 }
310
311 var algorithmTestCases = []struct {
312 r *Replacer
313 want string
314 }{
315 {capitalLetters, "*strings.byteReplacer"},
316 {htmlEscaper, "*strings.byteStringReplacer"},
317 {NewReplacer("12", "123"), "*strings.singleStringReplacer"},
318 {NewReplacer("1", "12"), "*strings.byteStringReplacer"},
319 {NewReplacer("", "X"), "*strings.genericReplacer"},
320 {NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"},
321 }
322
323
324 func TestPickAlgorithm(t *testing.T) {
325 for i, tc := range algorithmTestCases {
326 got := fmt.Sprintf("%T", tc.r.Replacer())
327 if got != tc.want {
328 t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want)
329 }
330 }
331 }
332
333 type errWriter struct{}
334
335 func (errWriter) Write(p []byte) (n int, err error) {
336 return 0, fmt.Errorf("unwritable")
337 }
338
339
340
341 func TestWriteStringError(t *testing.T) {
342 for i, tc := range algorithmTestCases {
343 n, err := tc.r.WriteString(errWriter{}, "abc")
344 if n != 0 || err == nil || err.Error() != "unwritable" {
345 t.Errorf("%d. WriteStringError = %d, %v, want 0, unwritable", i, n, err)
346 }
347 }
348 }
349
350
351
352
353 func TestGenericTrieBuilding(t *testing.T) {
354 testCases := []struct{ in, out string }{
355 {"abc;abdef;abdefgh;xx;xy;z", `-
356 a-
357 .b-
358 ..c+
359 ..d-
360 ...ef+
361 .....gh+
362 x-
363 .x+
364 .y+
365 z+
366 `},
367 {"abracadabra;abracadabrakazam;abraham;abrasion", `-
368 a-
369 .bra-
370 ....c-
371 .....adabra+
372 ...........kazam+
373 ....h-
374 .....am+
375 ....s-
376 .....ion+
377 `},
378 {"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `-
379 X+
380 Y+
381 a+
382 .a+
383 ..a+
384 i+
385 l-
386 .ong+
387 ....er+
388 ......st+
389 x+
390 .x+
391 `},
392 {"foo;;foo;foo1", `+
393 f-
394 .oo+
395 ...1+
396 `},
397 }
398
399 for _, tc := range testCases {
400 keys := Split(tc.in, ";")
401 args := make([]string, len(keys)*2)
402 for i, key := range keys {
403 args[i*2] = key
404 }
405
406 got := NewReplacer(args...).PrintTrie()
407
408 wantbuf := make([]byte, 0, len(tc.out))
409 for i := 0; i < len(tc.out); i++ {
410 if tc.out[i] != '\t' {
411 wantbuf = append(wantbuf, tc.out[i])
412 }
413 }
414 want := string(wantbuf)
415
416 if got != want {
417 t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want)
418 }
419 }
420 }
421
422 func BenchmarkGenericNoMatch(b *testing.B) {
423 str := Repeat("A", 100) + Repeat("B", 100)
424 generic := NewReplacer("a", "A", "b", "B", "12", "123")
425 for i := 0; i < b.N; i++ {
426 generic.Replace(str)
427 }
428 }
429
430 func BenchmarkGenericMatch1(b *testing.B) {
431 str := Repeat("a", 100) + Repeat("b", 100)
432 generic := NewReplacer("a", "A", "b", "B", "12", "123")
433 for i := 0; i < b.N; i++ {
434 generic.Replace(str)
435 }
436 }
437
438 func BenchmarkGenericMatch2(b *testing.B) {
439 str := Repeat("It's <b>HTML</b>!", 100)
440 for i := 0; i < b.N; i++ {
441 htmlUnescaper.Replace(str)
442 }
443 }
444
445 func benchmarkSingleString(b *testing.B, pattern, text string) {
446 r := NewReplacer(pattern, "[match]")
447 b.SetBytes(int64(len(text)))
448 b.ResetTimer()
449 for i := 0; i < b.N; i++ {
450 r.Replace(text)
451 }
452 }
453
454 func BenchmarkSingleMaxSkipping(b *testing.B) {
455 benchmarkSingleString(b, Repeat("b", 25), Repeat("a", 10000))
456 }
457
458 func BenchmarkSingleLongSuffixFail(b *testing.B) {
459 benchmarkSingleString(b, "b"+Repeat("a", 500), Repeat("a", 1002))
460 }
461
462 func BenchmarkSingleMatch(b *testing.B) {
463 benchmarkSingleString(b, "abcdef", Repeat("abcdefghijklmno", 1000))
464 }
465
466 func BenchmarkByteByteNoMatch(b *testing.B) {
467 str := Repeat("A", 100) + Repeat("B", 100)
468 for i := 0; i < b.N; i++ {
469 capitalLetters.Replace(str)
470 }
471 }
472
473 func BenchmarkByteByteMatch(b *testing.B) {
474 str := Repeat("a", 100) + Repeat("b", 100)
475 for i := 0; i < b.N; i++ {
476 capitalLetters.Replace(str)
477 }
478 }
479
480 func BenchmarkByteStringMatch(b *testing.B) {
481 str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">"
482 for i := 0; i < b.N; i++ {
483 htmlEscaper.Replace(str)
484 }
485 }
486
487 func BenchmarkHTMLEscapeNew(b *testing.B) {
488 str := "I <3 to escape HTML & other text too."
489 for i := 0; i < b.N; i++ {
490 htmlEscaper.Replace(str)
491 }
492 }
493
494 func BenchmarkHTMLEscapeOld(b *testing.B) {
495 str := "I <3 to escape HTML & other text too."
496 for i := 0; i < b.N; i++ {
497 oldHTMLEscape(str)
498 }
499 }
500
501 func BenchmarkByteStringReplacerWriteString(b *testing.B) {
502 str := Repeat("I <3 to escape HTML & other text too.", 100)
503 buf := new(bytes.Buffer)
504 for i := 0; i < b.N; i++ {
505 htmlEscaper.WriteString(buf, str)
506 buf.Reset()
507 }
508 }
509
510 func BenchmarkByteReplacerWriteString(b *testing.B) {
511 str := Repeat("abcdefghijklmnopqrstuvwxyz", 100)
512 buf := new(bytes.Buffer)
513 for i := 0; i < b.N; i++ {
514 capitalLetters.WriteString(buf, str)
515 buf.Reset()
516 }
517 }
518
519
520 func BenchmarkByteByteReplaces(b *testing.B) {
521 str := Repeat("a", 100) + Repeat("b", 100)
522 for i := 0; i < b.N; i++ {
523 Replace(Replace(str, "a", "A", -1), "b", "B", -1)
524 }
525 }
526
527
528 func BenchmarkByteByteMap(b *testing.B) {
529 str := Repeat("a", 100) + Repeat("b", 100)
530 fn := func(r rune) rune {
531 switch r {
532 case 'a':
533 return 'A'
534 case 'b':
535 return 'B'
536 }
537 return r
538 }
539 for i := 0; i < b.N; i++ {
540 Map(fn, str)
541 }
542 }
543
544 var mapdata = []struct{ name, data string }{
545 {"ASCII", "a b c d e f g h i j k l m n o p q r s t u v w x y z"},
546 {"Greek", "α β γ δ ε ζ η θ ι κ λ μ ν ξ ο π ρ ς σ τ υ φ χ ψ ω"},
547 }
548
549 func BenchmarkMap(b *testing.B) {
550 mapidentity := func(r rune) rune {
551 return r
552 }
553
554 b.Run("identity", func(b *testing.B) {
555 for _, md := range mapdata {
556 b.Run(md.name, func(b *testing.B) {
557 for i := 0; i < b.N; i++ {
558 Map(mapidentity, md.data)
559 }
560 })
561 }
562 })
563
564 mapchange := func(r rune) rune {
565 if 'a' <= r && r <= 'z' {
566 return r + 'A' - 'a'
567 }
568 if 'α' <= r && r <= 'ω' {
569 return r + 'Α' - 'α'
570 }
571 return r
572 }
573
574 b.Run("change", func(b *testing.B) {
575 for _, md := range mapdata {
576 b.Run(md.name, func(b *testing.B) {
577 for i := 0; i < b.N; i++ {
578 Map(mapchange, md.data)
579 }
580 })
581 }
582 })
583 }
584
View as plain text