1
2
3
4
5
6
7
8
9
10 package norm
11
12 import (
13 "unicode/utf8"
14
15 "golang.org/x/text/transform"
16 )
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 type Form int
35
36 const (
37 NFC Form = iota
38 NFD
39 NFKC
40 NFKD
41 )
42
43
44 func (f Form) Bytes(b []byte) []byte {
45 src := inputBytes(b)
46 ft := formTable[f]
47 n, ok := ft.quickSpan(src, 0, len(b), true)
48 if ok {
49 return b
50 }
51 out := make([]byte, n, len(b))
52 copy(out, b[0:n])
53 rb := reorderBuffer{f: *ft, src: src, nsrc: len(b), out: out, flushF: appendFlush}
54 return doAppendInner(&rb, n)
55 }
56
57
58 func (f Form) String(s string) string {
59 src := inputString(s)
60 ft := formTable[f]
61 n, ok := ft.quickSpan(src, 0, len(s), true)
62 if ok {
63 return s
64 }
65 out := make([]byte, n, len(s))
66 copy(out, s[0:n])
67 rb := reorderBuffer{f: *ft, src: src, nsrc: len(s), out: out, flushF: appendFlush}
68 return string(doAppendInner(&rb, n))
69 }
70
71
72 func (f Form) IsNormal(b []byte) bool {
73 src := inputBytes(b)
74 ft := formTable[f]
75 bp, ok := ft.quickSpan(src, 0, len(b), true)
76 if ok {
77 return true
78 }
79 rb := reorderBuffer{f: *ft, src: src, nsrc: len(b)}
80 rb.setFlusher(nil, cmpNormalBytes)
81 for bp < len(b) {
82 rb.out = b[bp:]
83 if bp = decomposeSegment(&rb, bp, true); bp < 0 {
84 return false
85 }
86 bp, _ = rb.f.quickSpan(rb.src, bp, len(b), true)
87 }
88 return true
89 }
90
91 func cmpNormalBytes(rb *reorderBuffer) bool {
92 b := rb.out
93 for i := 0; i < rb.nrune; i++ {
94 info := rb.rune[i]
95 if int(info.size) > len(b) {
96 return false
97 }
98 p := info.pos
99 pe := p + info.size
100 for ; p < pe; p++ {
101 if b[0] != rb.byte[p] {
102 return false
103 }
104 b = b[1:]
105 }
106 }
107 return true
108 }
109
110
111 func (f Form) IsNormalString(s string) bool {
112 src := inputString(s)
113 ft := formTable[f]
114 bp, ok := ft.quickSpan(src, 0, len(s), true)
115 if ok {
116 return true
117 }
118 rb := reorderBuffer{f: *ft, src: src, nsrc: len(s)}
119 rb.setFlusher(nil, func(rb *reorderBuffer) bool {
120 for i := 0; i < rb.nrune; i++ {
121 info := rb.rune[i]
122 if bp+int(info.size) > len(s) {
123 return false
124 }
125 p := info.pos
126 pe := p + info.size
127 for ; p < pe; p++ {
128 if s[bp] != rb.byte[p] {
129 return false
130 }
131 bp++
132 }
133 }
134 return true
135 })
136 for bp < len(s) {
137 if bp = decomposeSegment(&rb, bp, true); bp < 0 {
138 return false
139 }
140 bp, _ = rb.f.quickSpan(rb.src, bp, len(s), true)
141 }
142 return true
143 }
144
145
146
147
148 func patchTail(rb *reorderBuffer) bool {
149 info, p := lastRuneStart(&rb.f, rb.out)
150 if p == -1 || info.size == 0 {
151 return true
152 }
153 end := p + int(info.size)
154 extra := len(rb.out) - end
155 if extra > 0 {
156
157
158 x := make([]byte, 0)
159 x = append(x, rb.out[len(rb.out)-extra:]...)
160 rb.out = rb.out[:end]
161 decomposeToLastBoundary(rb)
162 rb.doFlush()
163 rb.out = append(rb.out, x...)
164 return false
165 }
166 buf := rb.out[p:]
167 rb.out = rb.out[:p]
168 decomposeToLastBoundary(rb)
169 if s := rb.ss.next(info); s == ssStarter {
170 rb.doFlush()
171 rb.ss.first(info)
172 } else if s == ssOverflow {
173 rb.doFlush()
174 rb.insertCGJ()
175 rb.ss = 0
176 }
177 rb.insertUnsafe(inputBytes(buf), 0, info)
178 return true
179 }
180
181 func appendQuick(rb *reorderBuffer, i int) int {
182 if rb.nsrc == i {
183 return i
184 }
185 end, _ := rb.f.quickSpan(rb.src, i, rb.nsrc, true)
186 rb.out = rb.src.appendSlice(rb.out, i, end)
187 return end
188 }
189
190
191
192 func (f Form) Append(out []byte, src ...byte) []byte {
193 return f.doAppend(out, inputBytes(src), len(src))
194 }
195
196 func (f Form) doAppend(out []byte, src input, n int) []byte {
197 if n == 0 {
198 return out
199 }
200 ft := formTable[f]
201
202 if len(out) == 0 {
203 p, _ := ft.quickSpan(src, 0, n, true)
204 out = src.appendSlice(out, 0, p)
205 if p == n {
206 return out
207 }
208 rb := reorderBuffer{f: *ft, src: src, nsrc: n, out: out, flushF: appendFlush}
209 return doAppendInner(&rb, p)
210 }
211 rb := reorderBuffer{f: *ft, src: src, nsrc: n}
212 return doAppend(&rb, out, 0)
213 }
214
215 func doAppend(rb *reorderBuffer, out []byte, p int) []byte {
216 rb.setFlusher(out, appendFlush)
217 src, n := rb.src, rb.nsrc
218 doMerge := len(out) > 0
219 if q := src.skipContinuationBytes(p); q > p {
220
221 rb.out = src.appendSlice(rb.out, p, q)
222 p = q
223 doMerge = patchTail(rb)
224 }
225 fd := &rb.f
226 if doMerge {
227 var info Properties
228 if p < n {
229 info = fd.info(src, p)
230 if !info.BoundaryBefore() || info.nLeadingNonStarters() > 0 {
231 if p == 0 {
232 decomposeToLastBoundary(rb)
233 }
234 p = decomposeSegment(rb, p, true)
235 }
236 }
237 if info.size == 0 {
238 rb.doFlush()
239
240 return src.appendSlice(rb.out, p, n)
241 }
242 if rb.nrune > 0 {
243 return doAppendInner(rb, p)
244 }
245 }
246 p = appendQuick(rb, p)
247 return doAppendInner(rb, p)
248 }
249
250 func doAppendInner(rb *reorderBuffer, p int) []byte {
251 for n := rb.nsrc; p < n; {
252 p = decomposeSegment(rb, p, true)
253 p = appendQuick(rb, p)
254 }
255 return rb.out
256 }
257
258
259
260 func (f Form) AppendString(out []byte, src string) []byte {
261 return f.doAppend(out, inputString(src), len(src))
262 }
263
264
265
266 func (f Form) QuickSpan(b []byte) int {
267 n, _ := formTable[f].quickSpan(inputBytes(b), 0, len(b), true)
268 return n
269 }
270
271
272
273 func (f Form) Span(b []byte, atEOF bool) (n int, err error) {
274 n, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), atEOF)
275 if n < len(b) {
276 if !ok {
277 err = transform.ErrEndOfSpan
278 } else {
279 err = transform.ErrShortSrc
280 }
281 }
282 return n, err
283 }
284
285
286
287 func (f Form) SpanString(s string, atEOF bool) (n int, err error) {
288 n, ok := formTable[f].quickSpan(inputString(s), 0, len(s), atEOF)
289 if n < len(s) {
290 if !ok {
291 err = transform.ErrEndOfSpan
292 } else {
293 err = transform.ErrShortSrc
294 }
295 }
296 return n, err
297 }
298
299
300
301
302
303 func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool) {
304 var lastCC uint8
305 ss := streamSafe(0)
306 lastSegStart := i
307 for n = end; i < n; {
308 if j := src.skipASCII(i, n); i != j {
309 i = j
310 lastSegStart = i - 1
311 lastCC = 0
312 ss = 0
313 continue
314 }
315 info := f.info(src, i)
316 if info.size == 0 {
317 if atEOF {
318
319 return n, true
320 }
321 return lastSegStart, true
322 }
323
324
325 switch ss.next(info) {
326 case ssStarter:
327 lastSegStart = i
328 case ssOverflow:
329 return lastSegStart, false
330 case ssSuccess:
331 if lastCC > info.ccc {
332 return lastSegStart, false
333 }
334 }
335 if f.composing {
336 if !info.isYesC() {
337 break
338 }
339 } else {
340 if !info.isYesD() {
341 break
342 }
343 }
344 lastCC = info.ccc
345 i += int(info.size)
346 }
347 if i == n {
348 if !atEOF {
349 n = lastSegStart
350 }
351 return n, true
352 }
353 return lastSegStart, false
354 }
355
356
357
358 func (f Form) QuickSpanString(s string) int {
359 n, _ := formTable[f].quickSpan(inputString(s), 0, len(s), true)
360 return n
361 }
362
363
364
365 func (f Form) FirstBoundary(b []byte) int {
366 return f.firstBoundary(inputBytes(b), len(b))
367 }
368
369 func (f Form) firstBoundary(src input, nsrc int) int {
370 i := src.skipContinuationBytes(0)
371 if i >= nsrc {
372 return -1
373 }
374 fd := formTable[f]
375 ss := streamSafe(0)
376
377
378
379 for {
380 info := fd.info(src, i)
381 if info.size == 0 {
382 return -1
383 }
384 if s := ss.next(info); s != ssSuccess {
385 return i
386 }
387 i += int(info.size)
388 if i >= nsrc {
389 if !info.BoundaryAfter() && !ss.isMax() {
390 return -1
391 }
392 return nsrc
393 }
394 }
395 }
396
397
398
399 func (f Form) FirstBoundaryInString(s string) int {
400 return f.firstBoundary(inputString(s), len(s))
401 }
402
403
404
405
406 func (f Form) NextBoundary(b []byte, atEOF bool) int {
407 return f.nextBoundary(inputBytes(b), len(b), atEOF)
408 }
409
410
411
412
413 func (f Form) NextBoundaryInString(s string, atEOF bool) int {
414 return f.nextBoundary(inputString(s), len(s), atEOF)
415 }
416
417 func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int {
418 if nsrc == 0 {
419 if atEOF {
420 return 0
421 }
422 return -1
423 }
424 fd := formTable[f]
425 info := fd.info(src, 0)
426 if info.size == 0 {
427 if atEOF {
428 return 1
429 }
430 return -1
431 }
432 ss := streamSafe(0)
433 ss.first(info)
434
435 for i := int(info.size); i < nsrc; i += int(info.size) {
436 info = fd.info(src, i)
437 if info.size == 0 {
438 if atEOF {
439 return i
440 }
441 return -1
442 }
443
444
445 if s := ss.next(info); s != ssSuccess {
446 return i
447 }
448 }
449 if !atEOF && !info.BoundaryAfter() && !ss.isMax() {
450 return -1
451 }
452 return nsrc
453 }
454
455
456
457 func (f Form) LastBoundary(b []byte) int {
458 return lastBoundary(formTable[f], b)
459 }
460
461 func lastBoundary(fd *formInfo, b []byte) int {
462 i := len(b)
463 info, p := lastRuneStart(fd, b)
464 if p == -1 {
465 return -1
466 }
467 if info.size == 0 {
468 if p == 0 {
469 return -1
470 }
471 i = p
472 info, p = lastRuneStart(fd, b[:i])
473 if p == -1 {
474 return i
475 }
476 }
477 if p+int(info.size) != i {
478 return i
479 }
480 if info.BoundaryAfter() {
481 return i
482 }
483 ss := streamSafe(0)
484 v := ss.backwards(info)
485 for i = p; i >= 0 && v != ssStarter; i = p {
486 info, p = lastRuneStart(fd, b[:i])
487 if v = ss.backwards(info); v == ssOverflow {
488 break
489 }
490 if p+int(info.size) != i {
491 if p == -1 {
492 return -1
493 }
494 return i
495 }
496 }
497 return i
498 }
499
500
501
502
503 func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int {
504
505 info := rb.f.info(rb.src, sp)
506 if info.size == 0 {
507 return 0
508 }
509 if s := rb.ss.next(info); s == ssStarter {
510
511 if rb.nrune > 0 {
512 goto end
513 }
514 } else if s == ssOverflow {
515 rb.insertCGJ()
516 goto end
517 }
518 if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
519 return int(err)
520 }
521 for {
522 sp += int(info.size)
523 if sp >= rb.nsrc {
524 if !atEOF && !info.BoundaryAfter() {
525 return int(iShortSrc)
526 }
527 break
528 }
529 info = rb.f.info(rb.src, sp)
530 if info.size == 0 {
531 if !atEOF {
532 return int(iShortSrc)
533 }
534 break
535 }
536 if s := rb.ss.next(info); s == ssStarter {
537 break
538 } else if s == ssOverflow {
539 rb.insertCGJ()
540 break
541 }
542 if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
543 return int(err)
544 }
545 }
546 end:
547 if !rb.doFlush() {
548 return int(iShortDst)
549 }
550 return sp
551 }
552
553
554
555 func lastRuneStart(fd *formInfo, buf []byte) (Properties, int) {
556 p := len(buf) - 1
557 for ; p >= 0 && !utf8.RuneStart(buf[p]); p-- {
558 }
559 if p < 0 {
560 return Properties{}, -1
561 }
562 return fd.info(inputBytes(buf), p), p
563 }
564
565
566
567 func decomposeToLastBoundary(rb *reorderBuffer) {
568 fd := &rb.f
569 info, i := lastRuneStart(fd, rb.out)
570 if int(info.size) != len(rb.out)-i {
571
572 return
573 }
574 if info.BoundaryAfter() {
575 return
576 }
577 var add [maxNonStarters + 1]Properties
578 padd := 0
579 ss := streamSafe(0)
580 p := len(rb.out)
581 for {
582 add[padd] = info
583 v := ss.backwards(info)
584 if v == ssOverflow {
585
586
587 break
588 }
589 padd++
590 p -= int(info.size)
591 if v == ssStarter || p < 0 {
592 break
593 }
594 info, i = lastRuneStart(fd, rb.out[:p])
595 if int(info.size) != p-i {
596 break
597 }
598 }
599 rb.ss = ss
600
601 var buf [maxBufferSize * utf8.UTFMax]byte
602 cp := buf[:copy(buf[:], rb.out[p:])]
603 rb.out = rb.out[:p]
604 for padd--; padd >= 0; padd-- {
605 info = add[padd]
606 rb.insertUnsafe(inputBytes(cp), 0, info)
607 cp = cp[info.size:]
608 }
609 }
610
View as plain text