1
2
3
4
5 package utf8_test
6
7 import (
8 "bytes"
9 "testing"
10 "unicode"
11 . "unicode/utf8"
12 )
13
14
15 func init() {
16 if MaxRune != unicode.MaxRune {
17 panic("utf8.MaxRune is wrong")
18 }
19 if RuneError != unicode.ReplacementChar {
20 panic("utf8.RuneError is wrong")
21 }
22 }
23
24
25 func TestConstants(t *testing.T) {
26 if MaxRune != unicode.MaxRune {
27 t.Errorf("utf8.MaxRune is wrong: %x should be %x", MaxRune, unicode.MaxRune)
28 }
29 if RuneError != unicode.ReplacementChar {
30 t.Errorf("utf8.RuneError is wrong: %x should be %x", RuneError, unicode.ReplacementChar)
31 }
32 }
33
34 type Utf8Map struct {
35 r rune
36 str string
37 }
38
39 var utf8map = []Utf8Map{
40 {0x0000, "\x00"},
41 {0x0001, "\x01"},
42 {0x007e, "\x7e"},
43 {0x007f, "\x7f"},
44 {0x0080, "\xc2\x80"},
45 {0x0081, "\xc2\x81"},
46 {0x00bf, "\xc2\xbf"},
47 {0x00c0, "\xc3\x80"},
48 {0x00c1, "\xc3\x81"},
49 {0x00c8, "\xc3\x88"},
50 {0x00d0, "\xc3\x90"},
51 {0x00e0, "\xc3\xa0"},
52 {0x00f0, "\xc3\xb0"},
53 {0x00f8, "\xc3\xb8"},
54 {0x00ff, "\xc3\xbf"},
55 {0x0100, "\xc4\x80"},
56 {0x07ff, "\xdf\xbf"},
57 {0x0400, "\xd0\x80"},
58 {0x0800, "\xe0\xa0\x80"},
59 {0x0801, "\xe0\xa0\x81"},
60 {0x1000, "\xe1\x80\x80"},
61 {0xd000, "\xed\x80\x80"},
62 {0xd7ff, "\xed\x9f\xbf"},
63 {0xe000, "\xee\x80\x80"},
64 {0xfffe, "\xef\xbf\xbe"},
65 {0xffff, "\xef\xbf\xbf"},
66 {0x10000, "\xf0\x90\x80\x80"},
67 {0x10001, "\xf0\x90\x80\x81"},
68 {0x40000, "\xf1\x80\x80\x80"},
69 {0x10fffe, "\xf4\x8f\xbf\xbe"},
70 {0x10ffff, "\xf4\x8f\xbf\xbf"},
71 {0xFFFD, "\xef\xbf\xbd"},
72 }
73
74 var surrogateMap = []Utf8Map{
75 {0xd800, "\xed\xa0\x80"},
76 {0xdfff, "\xed\xbf\xbf"},
77 }
78
79 var testStrings = []string{
80 "",
81 "abcd",
82 "☺☻☹",
83 "日a本b語ç日ð本Ê語þ日¥本¼語i日©",
84 "日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©",
85 "\x80\x80\x80\x80",
86 }
87
88 func TestFullRune(t *testing.T) {
89 for _, m := range utf8map {
90 b := []byte(m.str)
91 if !FullRune(b) {
92 t.Errorf("FullRune(%q) (%U) = false, want true", b, m.r)
93 }
94 s := m.str
95 if !FullRuneInString(s) {
96 t.Errorf("FullRuneInString(%q) (%U) = false, want true", s, m.r)
97 }
98 b1 := b[0 : len(b)-1]
99 if FullRune(b1) {
100 t.Errorf("FullRune(%q) = true, want false", b1)
101 }
102 s1 := string(b1)
103 if FullRuneInString(s1) {
104 t.Errorf("FullRune(%q) = true, want false", s1)
105 }
106 }
107 for _, s := range []string{"\xc0", "\xc1"} {
108 b := []byte(s)
109 if !FullRune(b) {
110 t.Errorf("FullRune(%q) = false, want true", s)
111 }
112 if !FullRuneInString(s) {
113 t.Errorf("FullRuneInString(%q) = false, want true", s)
114 }
115 }
116 }
117
118 func TestEncodeRune(t *testing.T) {
119 for _, m := range utf8map {
120 b := []byte(m.str)
121 var buf [10]byte
122 n := EncodeRune(buf[0:], m.r)
123 b1 := buf[0:n]
124 if !bytes.Equal(b, b1) {
125 t.Errorf("EncodeRune(%#04x) = %q want %q", m.r, b1, b)
126 }
127 }
128 }
129
130 func TestAppendRune(t *testing.T) {
131 for _, m := range utf8map {
132 if buf := AppendRune(nil, m.r); string(buf) != m.str {
133 t.Errorf("AppendRune(nil, %#04x) = %s, want %s", m.r, buf, m.str)
134 }
135 if buf := AppendRune([]byte("init"), m.r); string(buf) != "init"+m.str {
136 t.Errorf("AppendRune(init, %#04x) = %s, want %s", m.r, buf, "init"+m.str)
137 }
138 }
139 }
140
141 func TestDecodeRune(t *testing.T) {
142 for _, m := range utf8map {
143 b := []byte(m.str)
144 r, size := DecodeRune(b)
145 if r != m.r || size != len(b) {
146 t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b, r, size, m.r, len(b))
147 }
148 s := m.str
149 r, size = DecodeRuneInString(s)
150 if r != m.r || size != len(b) {
151 t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, r, size, m.r, len(b))
152 }
153
154
155 r, size = DecodeRune(b[0:cap(b)])
156 if r != m.r || size != len(b) {
157 t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b, r, size, m.r, len(b))
158 }
159 s = m.str + "\x00"
160 r, size = DecodeRuneInString(s)
161 if r != m.r || size != len(b) {
162 t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, r, size, m.r, len(b))
163 }
164
165
166 wantsize := 1
167 if wantsize >= len(b) {
168 wantsize = 0
169 }
170 r, size = DecodeRune(b[0 : len(b)-1])
171 if r != RuneError || size != wantsize {
172 t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b[0:len(b)-1], r, size, RuneError, wantsize)
173 }
174 s = m.str[0 : len(m.str)-1]
175 r, size = DecodeRuneInString(s)
176 if r != RuneError || size != wantsize {
177 t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, r, size, RuneError, wantsize)
178 }
179
180
181 if len(b) == 1 {
182 b[0] = 0x80
183 } else {
184 b[len(b)-1] = 0x7F
185 }
186 r, size = DecodeRune(b)
187 if r != RuneError || size != 1 {
188 t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b, r, size, RuneError, 1)
189 }
190 s = string(b)
191 r, size = DecodeRuneInString(s)
192 if r != RuneError || size != 1 {
193 t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, r, size, RuneError, 1)
194 }
195
196 }
197 }
198
199 func TestDecodeSurrogateRune(t *testing.T) {
200 for _, m := range surrogateMap {
201 b := []byte(m.str)
202 r, size := DecodeRune(b)
203 if r != RuneError || size != 1 {
204 t.Errorf("DecodeRune(%q) = %x, %d want %x, %d", b, r, size, RuneError, 1)
205 }
206 s := m.str
207 r, size = DecodeRuneInString(s)
208 if r != RuneError || size != 1 {
209 t.Errorf("DecodeRuneInString(%q) = %x, %d want %x, %d", b, r, size, RuneError, 1)
210 }
211 }
212 }
213
214
215
216 func TestSequencing(t *testing.T) {
217 for _, ts := range testStrings {
218 for _, m := range utf8map {
219 for _, s := range []string{ts + m.str, m.str + ts, ts + m.str + ts} {
220 testSequence(t, s)
221 }
222 }
223 }
224 }
225
226 func runtimeRuneCount(s string) int {
227 return len([]rune(s))
228 }
229
230
231
232
233
234 func TestRuntimeConversion(t *testing.T) {
235 for _, ts := range testStrings {
236 count := RuneCountInString(ts)
237 if n := runtimeRuneCount(ts); n != count {
238 t.Errorf("%q: len([]rune()) counted %d runes; got %d from RuneCountInString", ts, n, count)
239 break
240 }
241
242 runes := []rune(ts)
243 if n := len(runes); n != count {
244 t.Errorf("%q: []rune() has length %d; got %d from RuneCountInString", ts, n, count)
245 break
246 }
247 i := 0
248 for _, r := range ts {
249 if r != runes[i] {
250 t.Errorf("%q[%d]: expected %c (%U); got %c (%U)", ts, i, runes[i], runes[i], r, r)
251 }
252 i++
253 }
254 }
255 }
256
257 var invalidSequenceTests = []string{
258 "\xed\xa0\x80\x80",
259 "\xed\xbf\xbf\x80",
260
261
262 "\x91\x80\x80\x80",
263
264
265 "\xC2\x7F\x80\x80",
266 "\xC2\xC0\x80\x80",
267 "\xDF\x7F\x80\x80",
268 "\xDF\xC0\x80\x80",
269
270
271 "\xE0\x9F\xBF\x80",
272 "\xE0\xA0\x7F\x80",
273 "\xE0\xBF\xC0\x80",
274 "\xE0\xC0\x80\x80",
275
276
277 "\xE1\x7F\xBF\x80",
278 "\xE1\x80\x7F\x80",
279 "\xE1\xBF\xC0\x80",
280 "\xE1\xC0\x80\x80",
281
282
283 "\xED\x7F\xBF\x80",
284 "\xED\x80\x7F\x80",
285 "\xED\x9F\xC0\x80",
286 "\xED\xA0\x80\x80",
287
288
289 "\xF0\x8F\xBF\xBF",
290 "\xF0\x90\x7F\xBF",
291 "\xF0\x90\x80\x7F",
292 "\xF0\xBF\xBF\xC0",
293 "\xF0\xBF\xC0\x80",
294 "\xF0\xC0\x80\x80",
295
296
297 "\xF1\x7F\xBF\xBF",
298 "\xF1\x80\x7F\xBF",
299 "\xF1\x80\x80\x7F",
300 "\xF1\xBF\xBF\xC0",
301 "\xF1\xBF\xC0\x80",
302 "\xF1\xC0\x80\x80",
303
304
305 "\xF4\x7F\xBF\xBF",
306 "\xF4\x80\x7F\xBF",
307 "\xF4\x80\x80\x7F",
308 "\xF4\x8F\xBF\xC0",
309 "\xF4\x8F\xC0\x80",
310 "\xF4\x90\x80\x80",
311 }
312
313 func runtimeDecodeRune(s string) rune {
314 for _, r := range s {
315 return r
316 }
317 return -1
318 }
319
320 func TestDecodeInvalidSequence(t *testing.T) {
321 for _, s := range invalidSequenceTests {
322 r1, _ := DecodeRune([]byte(s))
323 if want := RuneError; r1 != want {
324 t.Errorf("DecodeRune(%#x) = %#04x, want %#04x", s, r1, want)
325 return
326 }
327 r2, _ := DecodeRuneInString(s)
328 if want := RuneError; r2 != want {
329 t.Errorf("DecodeRuneInString(%q) = %#04x, want %#04x", s, r2, want)
330 return
331 }
332 if r1 != r2 {
333 t.Errorf("DecodeRune(%#x) = %#04x mismatch with DecodeRuneInString(%q) = %#04x", s, r1, s, r2)
334 return
335 }
336 r3 := runtimeDecodeRune(s)
337 if r2 != r3 {
338 t.Errorf("DecodeRuneInString(%q) = %#04x mismatch with runtime.decoderune(%q) = %#04x", s, r2, s, r3)
339 return
340 }
341 }
342 }
343
344 func testSequence(t *testing.T, s string) {
345 type info struct {
346 index int
347 r rune
348 }
349 index := make([]info, len(s))
350 b := []byte(s)
351 si := 0
352 j := 0
353 for i, r := range s {
354 if si != i {
355 t.Errorf("Sequence(%q) mismatched index %d, want %d", s, si, i)
356 return
357 }
358 index[j] = info{i, r}
359 j++
360 r1, size1 := DecodeRune(b[i:])
361 if r != r1 {
362 t.Errorf("DecodeRune(%q) = %#04x, want %#04x", s[i:], r1, r)
363 return
364 }
365 r2, size2 := DecodeRuneInString(s[i:])
366 if r != r2 {
367 t.Errorf("DecodeRuneInString(%q) = %#04x, want %#04x", s[i:], r2, r)
368 return
369 }
370 if size1 != size2 {
371 t.Errorf("DecodeRune/DecodeRuneInString(%q) size mismatch %d/%d", s[i:], size1, size2)
372 return
373 }
374 si += size1
375 }
376 j--
377 for si = len(s); si > 0; {
378 r1, size1 := DecodeLastRune(b[0:si])
379 r2, size2 := DecodeLastRuneInString(s[0:si])
380 if size1 != size2 {
381 t.Errorf("DecodeLastRune/DecodeLastRuneInString(%q, %d) size mismatch %d/%d", s, si, size1, size2)
382 return
383 }
384 if r1 != index[j].r {
385 t.Errorf("DecodeLastRune(%q, %d) = %#04x, want %#04x", s, si, r1, index[j].r)
386 return
387 }
388 if r2 != index[j].r {
389 t.Errorf("DecodeLastRuneInString(%q, %d) = %#04x, want %#04x", s, si, r2, index[j].r)
390 return
391 }
392 si -= size1
393 if si != index[j].index {
394 t.Errorf("DecodeLastRune(%q) index mismatch at %d, want %d", s, si, index[j].index)
395 return
396 }
397 j--
398 }
399 if si != 0 {
400 t.Errorf("DecodeLastRune(%q) finished at %d, not 0", s, si)
401 }
402 }
403
404
405 func TestNegativeRune(t *testing.T) {
406 errorbuf := make([]byte, UTFMax)
407 errorbuf = errorbuf[0:EncodeRune(errorbuf, RuneError)]
408 buf := make([]byte, UTFMax)
409 buf = buf[0:EncodeRune(buf, -1)]
410 if !bytes.Equal(buf, errorbuf) {
411 t.Errorf("incorrect encoding [% x] for -1; expected [% x]", buf, errorbuf)
412 }
413 }
414
415 type RuneCountTest struct {
416 in string
417 out int
418 }
419
420 var runecounttests = []RuneCountTest{
421 {"abcd", 4},
422 {"☺☻☹", 3},
423 {"1,2,3,4", 7},
424 {"\xe2\x00", 2},
425 {"\xe2\x80", 2},
426 {"a\xe2\x80", 3},
427 }
428
429 func TestRuneCount(t *testing.T) {
430 for _, tt := range runecounttests {
431 if out := RuneCountInString(tt.in); out != tt.out {
432 t.Errorf("RuneCountInString(%q) = %d, want %d", tt.in, out, tt.out)
433 }
434 if out := RuneCount([]byte(tt.in)); out != tt.out {
435 t.Errorf("RuneCount(%q) = %d, want %d", tt.in, out, tt.out)
436 }
437 }
438 }
439
440 type RuneLenTest struct {
441 r rune
442 size int
443 }
444
445 var runelentests = []RuneLenTest{
446 {0, 1},
447 {'e', 1},
448 {'é', 2},
449 {'☺', 3},
450 {RuneError, 3},
451 {MaxRune, 4},
452 {0xD800, -1},
453 {0xDFFF, -1},
454 {MaxRune + 1, -1},
455 {-1, -1},
456 }
457
458 func TestRuneLen(t *testing.T) {
459 for _, tt := range runelentests {
460 if size := RuneLen(tt.r); size != tt.size {
461 t.Errorf("RuneLen(%#U) = %d, want %d", tt.r, size, tt.size)
462 }
463 }
464 }
465
466 type ValidTest struct {
467 in string
468 out bool
469 }
470
471 var validTests = []ValidTest{
472 {"", true},
473 {"a", true},
474 {"abc", true},
475 {"Ж", true},
476 {"ЖЖ", true},
477 {"брэд-ЛГТМ", true},
478 {"☺☻☹", true},
479 {"aa\xe2", false},
480 {string([]byte{66, 250}), false},
481 {string([]byte{66, 250, 67}), false},
482 {"a\uFFFDb", true},
483 {string("\xF4\x8F\xBF\xBF"), true},
484 {string("\xF4\x90\x80\x80"), false},
485 {string("\xF7\xBF\xBF\xBF"), false},
486 {string("\xFB\xBF\xBF\xBF\xBF"), false},
487 {string("\xc0\x80"), false},
488 {string("\xed\xa0\x80"), false},
489 {string("\xed\xbf\xbf"), false},
490 }
491
492 func TestValid(t *testing.T) {
493 for _, tt := range validTests {
494 if Valid([]byte(tt.in)) != tt.out {
495 t.Errorf("Valid(%q) = %v; want %v", tt.in, !tt.out, tt.out)
496 }
497 if ValidString(tt.in) != tt.out {
498 t.Errorf("ValidString(%q) = %v; want %v", tt.in, !tt.out, tt.out)
499 }
500 }
501 }
502
503 type ValidRuneTest struct {
504 r rune
505 ok bool
506 }
507
508 var validrunetests = []ValidRuneTest{
509 {0, true},
510 {'e', true},
511 {'é', true},
512 {'☺', true},
513 {RuneError, true},
514 {MaxRune, true},
515 {0xD7FF, true},
516 {0xD800, false},
517 {0xDFFF, false},
518 {0xE000, true},
519 {MaxRune + 1, false},
520 {-1, false},
521 }
522
523 func TestValidRune(t *testing.T) {
524 for _, tt := range validrunetests {
525 if ok := ValidRune(tt.r); ok != tt.ok {
526 t.Errorf("ValidRune(%#U) = %t, want %t", tt.r, ok, tt.ok)
527 }
528 }
529 }
530
531 func BenchmarkRuneCountTenASCIIChars(b *testing.B) {
532 s := []byte("0123456789")
533 for i := 0; i < b.N; i++ {
534 RuneCount(s)
535 }
536 }
537
538 func BenchmarkRuneCountTenJapaneseChars(b *testing.B) {
539 s := []byte("日本語日本語日本語日")
540 for i := 0; i < b.N; i++ {
541 RuneCount(s)
542 }
543 }
544
545 func BenchmarkRuneCountInStringTenASCIIChars(b *testing.B) {
546 for i := 0; i < b.N; i++ {
547 RuneCountInString("0123456789")
548 }
549 }
550
551 func BenchmarkRuneCountInStringTenJapaneseChars(b *testing.B) {
552 for i := 0; i < b.N; i++ {
553 RuneCountInString("日本語日本語日本語日")
554 }
555 }
556
557 func BenchmarkValidTenASCIIChars(b *testing.B) {
558 s := []byte("0123456789")
559 for i := 0; i < b.N; i++ {
560 Valid(s)
561 }
562 }
563
564 func BenchmarkValidTenJapaneseChars(b *testing.B) {
565 s := []byte("日本語日本語日本語日")
566 for i := 0; i < b.N; i++ {
567 Valid(s)
568 }
569 }
570
571 func BenchmarkValidStringTenASCIIChars(b *testing.B) {
572 for i := 0; i < b.N; i++ {
573 ValidString("0123456789")
574 }
575 }
576
577 func BenchmarkValidStringTenJapaneseChars(b *testing.B) {
578 for i := 0; i < b.N; i++ {
579 ValidString("日本語日本語日本語日")
580 }
581 }
582
583 func BenchmarkEncodeASCIIRune(b *testing.B) {
584 buf := make([]byte, UTFMax)
585 for i := 0; i < b.N; i++ {
586 EncodeRune(buf, 'a')
587 }
588 }
589
590 func BenchmarkEncodeJapaneseRune(b *testing.B) {
591 buf := make([]byte, UTFMax)
592 for i := 0; i < b.N; i++ {
593 EncodeRune(buf, '本')
594 }
595 }
596
597 func BenchmarkAppendASCIIRune(b *testing.B) {
598 buf := make([]byte, UTFMax)
599 for i := 0; i < b.N; i++ {
600 AppendRune(buf[:0], 'a')
601 }
602 }
603
604 func BenchmarkAppendJapaneseRune(b *testing.B) {
605 buf := make([]byte, UTFMax)
606 for i := 0; i < b.N; i++ {
607 AppendRune(buf[:0], '本')
608 }
609 }
610
611 func BenchmarkDecodeASCIIRune(b *testing.B) {
612 a := []byte{'a'}
613 for i := 0; i < b.N; i++ {
614 DecodeRune(a)
615 }
616 }
617
618 func BenchmarkDecodeJapaneseRune(b *testing.B) {
619 nihon := []byte("本")
620 for i := 0; i < b.N; i++ {
621 DecodeRune(nihon)
622 }
623 }
624
625
626
627 var boolSink bool
628
629 func BenchmarkFullRune(b *testing.B) {
630 benchmarks := []struct {
631 name string
632 data []byte
633 }{
634 {"ASCII", []byte("a")},
635 {"Incomplete", []byte("\xf0\x90\x80")},
636 {"Japanese", []byte("本")},
637 }
638 for _, bm := range benchmarks {
639 b.Run(bm.name, func(b *testing.B) {
640 for i := 0; i < b.N; i++ {
641 boolSink = FullRune(bm.data)
642 }
643 })
644 }
645 }
646
View as plain text