1
2
3
4
5 package fuzzy
6
7 import (
8 "unicode"
9 )
10
11
12 type RuneRole byte
13
14 const (
15
16 RNone RuneRole = iota
17
18 RSep
19
20 RTail
21
22 RUCTail
23
24 RHead
25 )
26
27
28
29
30 func RuneRoles(candidate []byte, reuse []RuneRole) []RuneRole {
31 var output []RuneRole
32 if cap(reuse) < len(candidate) {
33 output = make([]RuneRole, 0, len(candidate))
34 } else {
35 output = reuse[:0]
36 }
37
38 prev, prev2 := rtNone, rtNone
39 for i := 0; i < len(candidate); i++ {
40 r := rune(candidate[i])
41
42 role := RNone
43
44 curr := rtLower
45 if candidate[i] <= unicode.MaxASCII {
46 curr = runeType(rt[candidate[i]] - '0')
47 }
48
49 if curr == rtLower {
50 if prev == rtNone || prev == rtPunct {
51 role = RHead
52 } else {
53 role = RTail
54 }
55 } else if curr == rtUpper {
56 role = RHead
57
58 if prev == rtUpper {
59
60
61 if i+1 == len(candidate) {
62
63
64 role = RUCTail
65 }
66 }
67 } else if curr == rtPunct {
68 switch r {
69 case '.', ':':
70 role = RSep
71 }
72 }
73 if curr != rtLower {
74 if i > 1 && output[i-1] == RHead && prev2 == rtUpper && (output[i-2] == RHead || output[i-2] == RUCTail) {
75
76
77
78 output[i-1] = RUCTail
79 }
80 }
81
82 output = append(output, role)
83 prev2 = prev
84 prev = curr
85 }
86 return output
87 }
88
89 type runeType byte
90
91 const (
92 rtNone runeType = iota
93 rtPunct
94 rtLower
95 rtUpper
96 )
97
98 const rt = "00000000000000000000000000000000000000000000001122222222221000000333333333333333333333333330000002222222222222222222222222200000"
99
100
101
102
103 func LastSegment(input string, roles []RuneRole) string {
104
105 end := len(input) - 1
106 for end >= 0 && roles[end] == RSep {
107 end--
108 }
109 if end < 0 {
110 return ""
111 }
112
113 start := end - 1
114 for start >= 0 && roles[start] != RSep {
115 start--
116 }
117
118 return input[start+1 : end+1]
119 }
120
121
122 func fromChunks(chunks []string, buffer []byte) []byte {
123 ii := 0
124 for _, chunk := range chunks {
125 for i := 0; i < len(chunk); i++ {
126 if ii >= cap(buffer) {
127 break
128 }
129 buffer[ii] = chunk[i]
130 ii++
131 }
132 }
133 return buffer[:ii]
134 }
135
136
137
138
139
140 func toLower(input []byte, reuse []byte) []byte {
141 output := reuse
142 if cap(reuse) < len(input) {
143 output = make([]byte, len(input))
144 }
145
146 for i := 0; i < len(input); i++ {
147 r := rune(input[i])
148 if input[i] <= unicode.MaxASCII {
149 if 'A' <= r && r <= 'Z' {
150 r += 'a' - 'A'
151 }
152 }
153 output[i] = byte(r)
154 }
155 return output[:len(input)]
156 }
157
158
159
160 type WordConsumer func(start, end int)
161
162
163
164 func Words(roles []RuneRole, consume WordConsumer) {
165 var wordStart int
166 for i, r := range roles {
167 switch r {
168 case RUCTail, RTail:
169 case RHead, RNone, RSep:
170 if i != wordStart {
171 consume(wordStart, i)
172 }
173 wordStart = i
174 if r != RHead {
175
176 wordStart = i + 1
177 }
178 }
179 }
180 if wordStart != len(roles) {
181 consume(wordStart, len(roles))
182 }
183 }
184
View as plain text