1
2
3
4
5 package bidi
6
7 import (
8 "fmt"
9 "log"
10 )
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56 type level int8
57
58 const implicitLevel level = -1
59
60
61 func (c Class) in(set ...Class) bool {
62 for _, s := range set {
63 if c == s {
64 return true
65 }
66 }
67 return false
68 }
69
70
71 type paragraph struct {
72 initialTypes []Class
73
74
75 pairTypes []bracketType
76 pairValues []rune
77
78 embeddingLevel level
79
80
81 resultTypes []Class
82 resultLevels []level
83
84
85
86
87
88 matchingPDI []int
89
90
91
92
93 matchingIsolateInitiator []int
94 }
95
96
97
98
99
100
101
102
103 func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, levels level) (*paragraph, error) {
104 var err error
105 if err = validateTypes(types); err != nil {
106 return nil, err
107 }
108 if err = validatePbTypes(pairTypes); err != nil {
109 return nil, err
110 }
111 if err = validatePbValues(pairValues, pairTypes); err != nil {
112 return nil, err
113 }
114 if err = validateParagraphEmbeddingLevel(levels); err != nil {
115 return nil, err
116 }
117
118 p := ¶graph{
119 initialTypes: append([]Class(nil), types...),
120 embeddingLevel: levels,
121
122 pairTypes: pairTypes,
123 pairValues: pairValues,
124
125 resultTypes: append([]Class(nil), types...),
126 }
127 p.run()
128 return p, nil
129 }
130
131 func (p *paragraph) Len() int { return len(p.initialTypes) }
132
133
134
135 func (p *paragraph) run() {
136 p.determineMatchingIsolates()
137
138
139
140
141
142 if p.embeddingLevel == implicitLevel {
143 p.embeddingLevel = p.determineParagraphEmbeddingLevel(0, p.Len())
144 }
145
146
147 p.resultLevels = make([]level, p.Len())
148 setLevels(p.resultLevels, p.embeddingLevel)
149
150
151
152 p.determineExplicitEmbeddingLevels()
153
154
155
156
157
158
159
160
161
162 for _, seq := range p.determineIsolatingRunSequences() {
163
164
165 seq.resolveWeakTypes()
166
167
168
169 resolvePairedBrackets(seq)
170
171
172
173 seq.resolveNeutralTypes()
174
175
176
177 seq.resolveImplicitLevels()
178
179
180 seq.applyLevelsAndTypes()
181 }
182
183
184
185
186 p.assignLevelsToCharactersRemovedByX9()
187 }
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204 func (p *paragraph) determineMatchingIsolates() {
205 p.matchingPDI = make([]int, p.Len())
206 p.matchingIsolateInitiator = make([]int, p.Len())
207
208 for i := range p.matchingIsolateInitiator {
209 p.matchingIsolateInitiator[i] = -1
210 }
211
212 for i := range p.matchingPDI {
213 p.matchingPDI[i] = -1
214
215 if t := p.resultTypes[i]; t.in(LRI, RLI, FSI) {
216 depthCounter := 1
217 for j := i + 1; j < p.Len(); j++ {
218 if u := p.resultTypes[j]; u.in(LRI, RLI, FSI) {
219 depthCounter++
220 } else if u == PDI {
221 if depthCounter--; depthCounter == 0 {
222 p.matchingPDI[i] = j
223 p.matchingIsolateInitiator[j] = i
224 break
225 }
226 }
227 }
228 if p.matchingPDI[i] == -1 {
229 p.matchingPDI[i] = p.Len()
230 }
231 }
232 }
233 }
234
235
236
237
238
239
240 func (p *paragraph) determineParagraphEmbeddingLevel(start, end int) level {
241 var strongType Class = unknownClass
242
243
244 for i := start; i < end; i++ {
245 if t := p.resultTypes[i]; t.in(L, AL, R) {
246 strongType = t
247 break
248 } else if t.in(FSI, LRI, RLI) {
249 i = p.matchingPDI[i]
250 if i > end {
251 log.Panic("assert (i <= end)")
252 }
253 }
254 }
255
256 switch strongType {
257 case unknownClass:
258
259 return 0
260 case L:
261 return 0
262 default:
263 return 1
264 }
265 }
266
267 const maxDepth = 125
268
269
270
271 type directionalStatusStack struct {
272 stackCounter int
273 embeddingLevelStack [maxDepth + 1]level
274 overrideStatusStack [maxDepth + 1]Class
275 isolateStatusStack [maxDepth + 1]bool
276 }
277
278 func (s *directionalStatusStack) empty() { s.stackCounter = 0 }
279 func (s *directionalStatusStack) pop() { s.stackCounter-- }
280 func (s *directionalStatusStack) depth() int { return s.stackCounter }
281
282 func (s *directionalStatusStack) push(level level, overrideStatus Class, isolateStatus bool) {
283 s.embeddingLevelStack[s.stackCounter] = level
284 s.overrideStatusStack[s.stackCounter] = overrideStatus
285 s.isolateStatusStack[s.stackCounter] = isolateStatus
286 s.stackCounter++
287 }
288
289 func (s *directionalStatusStack) lastEmbeddingLevel() level {
290 return s.embeddingLevelStack[s.stackCounter-1]
291 }
292
293 func (s *directionalStatusStack) lastDirectionalOverrideStatus() Class {
294 return s.overrideStatusStack[s.stackCounter-1]
295 }
296
297 func (s *directionalStatusStack) lastDirectionalIsolateStatus() bool {
298 return s.isolateStatusStack[s.stackCounter-1]
299 }
300
301
302 func (p *paragraph) determineExplicitEmbeddingLevels() {
303 var stack directionalStatusStack
304 var overflowIsolateCount, overflowEmbeddingCount, validIsolateCount int
305
306
307 stack.push(p.embeddingLevel, ON, false)
308
309 for i, t := range p.resultTypes {
310
311 switch t {
312 case RLE, LRE, RLO, LRO, RLI, LRI, FSI:
313 isIsolate := t.in(RLI, LRI, FSI)
314 isRTL := t.in(RLE, RLO, RLI)
315
316
317 if t == FSI {
318 isRTL = (p.determineParagraphEmbeddingLevel(i+1, p.matchingPDI[i]) == 1)
319 }
320 if isIsolate {
321 p.resultLevels[i] = stack.lastEmbeddingLevel()
322 if stack.lastDirectionalOverrideStatus() != ON {
323 p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
324 }
325 }
326
327 var newLevel level
328 if isRTL {
329
330 newLevel = (stack.lastEmbeddingLevel() + 1) | 1
331 } else {
332
333 newLevel = (stack.lastEmbeddingLevel() + 2) &^ 1
334 }
335
336 if newLevel <= maxDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0 {
337 if isIsolate {
338 validIsolateCount++
339 }
340
341
342
343
344 switch t {
345 case LRO:
346 stack.push(newLevel, L, isIsolate)
347 case RLO:
348 stack.push(newLevel, R, isIsolate)
349 default:
350 stack.push(newLevel, ON, isIsolate)
351 }
352
353 if !isIsolate {
354 p.resultLevels[i] = newLevel
355 }
356 } else {
357
358
359 if isIsolate {
360 overflowIsolateCount++
361 } else {
362 if overflowIsolateCount == 0 {
363 overflowEmbeddingCount++
364 }
365 }
366 }
367
368
369 case PDI:
370 if overflowIsolateCount > 0 {
371 overflowIsolateCount--
372 } else if validIsolateCount == 0 {
373
374 } else {
375 overflowEmbeddingCount = 0
376 for !stack.lastDirectionalIsolateStatus() {
377 stack.pop()
378 }
379 stack.pop()
380 validIsolateCount--
381 }
382 p.resultLevels[i] = stack.lastEmbeddingLevel()
383
384
385 case PDF:
386
387 p.resultLevels[i] = stack.lastEmbeddingLevel()
388
389 if overflowIsolateCount > 0 {
390
391 } else if overflowEmbeddingCount > 0 {
392 overflowEmbeddingCount--
393 } else if !stack.lastDirectionalIsolateStatus() && stack.depth() >= 2 {
394 stack.pop()
395 }
396
397 case B:
398
399
400
401
402 stack.empty()
403 overflowIsolateCount = 0
404 overflowEmbeddingCount = 0
405 validIsolateCount = 0
406 p.resultLevels[i] = p.embeddingLevel
407
408 default:
409 p.resultLevels[i] = stack.lastEmbeddingLevel()
410 if stack.lastDirectionalOverrideStatus() != ON {
411 p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
412 }
413 }
414 }
415 }
416
417 type isolatingRunSequence struct {
418 p *paragraph
419
420 indexes []int
421
422 types []Class
423 resolvedLevels []level
424 level level
425 sos, eos Class
426 }
427
428 func (i *isolatingRunSequence) Len() int { return len(i.indexes) }
429
430 func maxLevel(a, b level) level {
431 if a > b {
432 return a
433 }
434 return b
435 }
436
437
438
439 func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence {
440 length := len(indexes)
441 types := make([]Class, length)
442 for i, x := range indexes {
443 types[i] = p.resultTypes[x]
444 }
445
446
447 prevChar := indexes[0] - 1
448 for prevChar >= 0 && isRemovedByX9(p.initialTypes[prevChar]) {
449 prevChar--
450 }
451 prevLevel := p.embeddingLevel
452 if prevChar >= 0 {
453 prevLevel = p.resultLevels[prevChar]
454 }
455
456 var succLevel level
457 lastType := types[length-1]
458 if lastType.in(LRI, RLI, FSI) {
459 succLevel = p.embeddingLevel
460 } else {
461
462 limit := indexes[length-1] + 1
463 for ; limit < p.Len() && isRemovedByX9(p.initialTypes[limit]); limit++ {
464
465 }
466 succLevel = p.embeddingLevel
467 if limit < p.Len() {
468 succLevel = p.resultLevels[limit]
469 }
470 }
471 level := p.resultLevels[indexes[0]]
472 return &isolatingRunSequence{
473 p: p,
474 indexes: indexes,
475 types: types,
476 level: level,
477 sos: typeForLevel(maxLevel(prevLevel, level)),
478 eos: typeForLevel(maxLevel(succLevel, level)),
479 }
480 }
481
482
483
484
485
486 func (s *isolatingRunSequence) resolveWeakTypes() {
487
488
489 s.assertOnly(L, R, AL, EN, ES, ET, AN, CS, B, S, WS, ON, NSM, LRI, RLI, FSI, PDI)
490
491
492
493 precedingCharacterType := s.sos
494 for i, t := range s.types {
495 if t == NSM {
496 s.types[i] = precedingCharacterType
497 } else {
498
499
500
501 precedingCharacterType = t
502 }
503 }
504
505
506
507 for i, t := range s.types {
508 if t == EN {
509 for j := i - 1; j >= 0; j-- {
510 if t := s.types[j]; t.in(L, R, AL) {
511 if t == AL {
512 s.types[i] = AN
513 }
514 break
515 }
516 }
517 }
518 }
519
520
521 for i, t := range s.types {
522 if t == AL {
523 s.types[i] = R
524 }
525 }
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540 for i := 1; i < s.Len()-1; i++ {
541 t := s.types[i]
542 if t == ES || t == CS {
543 prevSepType := s.types[i-1]
544 succSepType := s.types[i+1]
545 if prevSepType == EN && succSepType == EN {
546 s.types[i] = EN
547 } else if s.types[i] == CS && prevSepType == AN && succSepType == AN {
548 s.types[i] = AN
549 }
550 }
551 }
552
553
554 for i, t := range s.types {
555 if t == ET {
556
557 runStart := i
558 runEnd := s.findRunLimit(runStart, ET)
559
560
561 t := s.sos
562 if runStart > 0 {
563 t = s.types[runStart-1]
564 }
565 if t != EN {
566 t = s.eos
567 if runEnd < len(s.types) {
568 t = s.types[runEnd]
569 }
570 }
571 if t == EN {
572 setTypes(s.types[runStart:runEnd], EN)
573 }
574
575 i = runEnd
576 }
577 }
578
579
580 for i, t := range s.types {
581 if t.in(ES, ET, CS) {
582 s.types[i] = ON
583 }
584 }
585
586
587 for i, t := range s.types {
588 if t == EN {
589
590 prevStrongType := s.sos
591 for j := i - 1; j >= 0; j-- {
592 t = s.types[j]
593 if t == L || t == R {
594 prevStrongType = t
595 break
596 }
597 }
598 if prevStrongType == L {
599 s.types[i] = L
600 }
601 }
602 }
603 }
604
605
606 func (s *isolatingRunSequence) resolveNeutralTypes() {
607
608
609 s.assertOnly(L, R, EN, AN, B, S, WS, ON, RLI, LRI, FSI, PDI)
610
611 for i, t := range s.types {
612 switch t {
613 case WS, ON, B, S, RLI, LRI, FSI, PDI:
614
615 runStart := i
616 runEnd := s.findRunLimit(runStart, B, S, WS, ON, RLI, LRI, FSI, PDI)
617
618
619 var leadType, trailType Class
620
621
622
623 if runStart == 0 {
624 leadType = s.sos
625 } else {
626 leadType = s.types[runStart-1]
627 if leadType.in(AN, EN) {
628 leadType = R
629 }
630 }
631 if runEnd == len(s.types) {
632 trailType = s.eos
633 } else {
634 trailType = s.types[runEnd]
635 if trailType.in(AN, EN) {
636 trailType = R
637 }
638 }
639
640 var resolvedType Class
641 if leadType == trailType {
642
643 resolvedType = leadType
644 } else {
645
646
647
648 resolvedType = typeForLevel(s.level)
649 }
650
651 setTypes(s.types[runStart:runEnd], resolvedType)
652
653
654 i = runEnd
655 }
656 }
657 }
658
659 func setLevels(levels []level, newLevel level) {
660 for i := range levels {
661 levels[i] = newLevel
662 }
663 }
664
665 func setTypes(types []Class, newType Class) {
666 for i := range types {
667 types[i] = newType
668 }
669 }
670
671
672 func (s *isolatingRunSequence) resolveImplicitLevels() {
673
674
675 s.assertOnly(L, R, EN, AN)
676
677 s.resolvedLevels = make([]level, len(s.types))
678 setLevels(s.resolvedLevels, s.level)
679
680 if (s.level & 1) == 0 {
681 for i, t := range s.types {
682
683 if t == L {
684
685 } else if t == R {
686 s.resolvedLevels[i] += 1
687 } else {
688 s.resolvedLevels[i] += 2
689 }
690 }
691 } else {
692 for i, t := range s.types {
693
694 if t == R {
695
696 } else {
697 s.resolvedLevels[i] += 1
698 }
699 }
700 }
701 }
702
703
704
705 func (s *isolatingRunSequence) applyLevelsAndTypes() {
706 for i, x := range s.indexes {
707 s.p.resultTypes[x] = s.types[i]
708 s.p.resultLevels[x] = s.resolvedLevels[i]
709 }
710 }
711
712
713
714
715 func (s *isolatingRunSequence) findRunLimit(index int, validSet ...Class) int {
716 loop:
717 for ; index < len(s.types); index++ {
718 t := s.types[index]
719 for _, valid := range validSet {
720 if t == valid {
721 continue loop
722 }
723 }
724 return index
725 }
726 return len(s.types)
727 }
728
729
730
731 func (s *isolatingRunSequence) assertOnly(codes ...Class) {
732 loop:
733 for i, t := range s.types {
734 for _, c := range codes {
735 if t == c {
736 continue loop
737 }
738 }
739 log.Panicf("invalid bidi code %v present in assertOnly at position %d", t, s.indexes[i])
740 }
741 }
742
743
744
745
746
747
748
749 func (p *paragraph) determineLevelRuns() [][]int {
750 run := []int{}
751 allRuns := [][]int{}
752 currentLevel := implicitLevel
753
754 for i := range p.initialTypes {
755 if !isRemovedByX9(p.initialTypes[i]) {
756 if p.resultLevels[i] != currentLevel {
757
758 if currentLevel >= 0 {
759 allRuns = append(allRuns, run)
760 run = nil
761 }
762
763 currentLevel = p.resultLevels[i]
764 }
765 run = append(run, i)
766 }
767 }
768
769 if len(run) > 0 {
770 allRuns = append(allRuns, run)
771 }
772 return allRuns
773 }
774
775
776 func (p *paragraph) determineIsolatingRunSequences() []*isolatingRunSequence {
777 levelRuns := p.determineLevelRuns()
778
779
780 runForCharacter := make([]int, p.Len())
781 for i, run := range levelRuns {
782 for _, index := range run {
783 runForCharacter[index] = i
784 }
785 }
786
787 sequences := []*isolatingRunSequence{}
788
789 var currentRunSequence []int
790
791 for _, run := range levelRuns {
792 first := run[0]
793 if p.initialTypes[first] != PDI || p.matchingIsolateInitiator[first] == -1 {
794 currentRunSequence = nil
795
796 for {
797
798 currentRunSequence = append(currentRunSequence, run...)
799
800 last := currentRunSequence[len(currentRunSequence)-1]
801 lastT := p.initialTypes[last]
802 if lastT.in(LRI, RLI, FSI) && p.matchingPDI[last] != p.Len() {
803 run = levelRuns[runForCharacter[p.matchingPDI[last]]]
804 } else {
805 break
806 }
807 }
808 sequences = append(sequences, p.isolatingRunSequence(currentRunSequence))
809 }
810 }
811 return sequences
812 }
813
814
815
816
817
818 func (p *paragraph) assignLevelsToCharactersRemovedByX9() {
819 for i, t := range p.initialTypes {
820 if t.in(LRE, RLE, LRO, RLO, PDF, BN) {
821 p.resultTypes[i] = t
822 p.resultLevels[i] = -1
823 }
824 }
825
826
827
828
829 if p.resultLevels[0] == -1 {
830 p.resultLevels[0] = p.embeddingLevel
831 }
832 for i := 1; i < len(p.initialTypes); i++ {
833 if p.resultLevels[i] == -1 {
834 p.resultLevels[i] = p.resultLevels[i-1]
835 }
836 }
837
838
839 }
840
841
842
843
844
845
846
847
848
849
850
851 func (p *paragraph) getLevels(linebreaks []int) []level {
852
853
854
855
856
857
858
859
860
861
862
863 validateLineBreaks(linebreaks, p.Len())
864
865 result := append([]level(nil), p.resultLevels...)
866
867
868
869
870 for i, t := range p.initialTypes {
871 if t.in(B, S) {
872
873 result[i] = p.embeddingLevel
874
875
876 for j := i - 1; j >= 0; j-- {
877 if isWhitespace(p.initialTypes[j]) {
878 result[j] = p.embeddingLevel
879 } else {
880 break
881 }
882 }
883 }
884 }
885
886
887 start := 0
888 for _, limit := range linebreaks {
889 for j := limit - 1; j >= start; j-- {
890 if isWhitespace(p.initialTypes[j]) {
891 result[j] = p.embeddingLevel
892 } else {
893 break
894 }
895 }
896 start = limit
897 }
898
899 return result
900 }
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917 func (p *paragraph) getReordering(linebreaks []int) []int {
918 validateLineBreaks(linebreaks, p.Len())
919
920 return computeMultilineReordering(p.getLevels(linebreaks), linebreaks)
921 }
922
923
924
925 func computeMultilineReordering(levels []level, linebreaks []int) []int {
926 result := make([]int, len(levels))
927
928 start := 0
929 for _, limit := range linebreaks {
930 tempLevels := make([]level, limit-start)
931 copy(tempLevels, levels[start:])
932
933 for j, order := range computeReordering(tempLevels) {
934 result[start+j] = order + start
935 }
936 start = limit
937 }
938 return result
939 }
940
941
942
943
944 func computeReordering(levels []level) []int {
945 result := make([]int, len(levels))
946
947 for i := range result {
948 result[i] = i
949 }
950
951
952
953
954 highestLevel := level(0)
955 lowestOddLevel := level(maxDepth + 2)
956 for _, level := range levels {
957 if level > highestLevel {
958 highestLevel = level
959 }
960 if level&1 != 0 && level < lowestOddLevel {
961 lowestOddLevel = level
962 }
963 }
964
965 for level := highestLevel; level >= lowestOddLevel; level-- {
966 for i := 0; i < len(levels); i++ {
967 if levels[i] >= level {
968
969 start := i
970 limit := i + 1
971 for limit < len(levels) && levels[limit] >= level {
972 limit++
973 }
974
975 for j, k := start, limit-1; j < k; j, k = j+1, k-1 {
976 result[j], result[k] = result[k], result[j]
977 }
978
979 i = limit
980 }
981 }
982 }
983
984 return result
985 }
986
987
988
989 func isWhitespace(c Class) bool {
990 switch c {
991 case LRE, RLE, LRO, RLO, PDF, LRI, RLI, FSI, PDI, BN, WS:
992 return true
993 }
994 return false
995 }
996
997
998 func isRemovedByX9(c Class) bool {
999 switch c {
1000 case LRE, RLE, LRO, RLO, PDF, BN:
1001 return true
1002 }
1003 return false
1004 }
1005
1006
1007 func typeForLevel(level level) Class {
1008 if (level & 0x1) == 0 {
1009 return L
1010 }
1011 return R
1012 }
1013
1014 func validateTypes(types []Class) error {
1015 if len(types) == 0 {
1016 return fmt.Errorf("types is null")
1017 }
1018 for i, t := range types[:len(types)-1] {
1019 if t == B {
1020 return fmt.Errorf("B type before end of paragraph at index: %d", i)
1021 }
1022 }
1023 return nil
1024 }
1025
1026 func validateParagraphEmbeddingLevel(embeddingLevel level) error {
1027 if embeddingLevel != implicitLevel &&
1028 embeddingLevel != 0 &&
1029 embeddingLevel != 1 {
1030 return fmt.Errorf("illegal paragraph embedding level: %d", embeddingLevel)
1031 }
1032 return nil
1033 }
1034
1035 func validateLineBreaks(linebreaks []int, textLength int) error {
1036 prev := 0
1037 for i, next := range linebreaks {
1038 if next <= prev {
1039 return fmt.Errorf("bad linebreak: %d at index: %d", next, i)
1040 }
1041 prev = next
1042 }
1043 if prev != textLength {
1044 return fmt.Errorf("last linebreak was %d, want %d", prev, textLength)
1045 }
1046 return nil
1047 }
1048
1049 func validatePbTypes(pairTypes []bracketType) error {
1050 if len(pairTypes) == 0 {
1051 return fmt.Errorf("pairTypes is null")
1052 }
1053 for i, pt := range pairTypes {
1054 switch pt {
1055 case bpNone, bpOpen, bpClose:
1056 default:
1057 return fmt.Errorf("illegal pairType value at %d: %v", i, pairTypes[i])
1058 }
1059 }
1060 return nil
1061 }
1062
1063 func validatePbValues(pairValues []rune, pairTypes []bracketType) error {
1064 if pairValues == nil {
1065 return fmt.Errorf("pairValues is null")
1066 }
1067 if len(pairTypes) != len(pairValues) {
1068 return fmt.Errorf("pairTypes is different length from pairValues")
1069 }
1070 return nil
1071 }
1072
View as plain text