Source file
src/hash/maphash/smhasher_test.go
1
2
3
4
5 package maphash
6
7 import (
8 "fmt"
9 "math"
10 "math/rand"
11 "runtime"
12 "strings"
13 "testing"
14 "unsafe"
15 )
16
17
18
19
20
21 var fixedSeed = MakeSeed()
22
23
24
25
26 func TestSmhasherSanity(t *testing.T) {
27 r := rand.New(rand.NewSource(1234))
28 const REP = 10
29 const KEYMAX = 128
30 const PAD = 16
31 const OFFMAX = 16
32 for k := 0; k < REP; k++ {
33 for n := 0; n < KEYMAX; n++ {
34 for i := 0; i < OFFMAX; i++ {
35 var b [KEYMAX + OFFMAX + 2*PAD]byte
36 var c [KEYMAX + OFFMAX + 2*PAD]byte
37 randBytes(r, b[:])
38 randBytes(r, c[:])
39 copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n])
40 if bytesHash(b[PAD:PAD+n]) != bytesHash(c[PAD+i:PAD+i+n]) {
41 t.Errorf("hash depends on bytes outside key")
42 }
43 }
44 }
45 }
46 }
47
48 func bytesHash(b []byte) uint64 {
49 var h Hash
50 h.SetSeed(fixedSeed)
51 h.Write(b)
52 return h.Sum64()
53 }
54 func stringHash(s string) uint64 {
55 var h Hash
56 h.SetSeed(fixedSeed)
57 h.WriteString(s)
58 return h.Sum64()
59 }
60
61 const hashSize = 64
62
63 func randBytes(r *rand.Rand, b []byte) {
64 r.Read(b)
65 }
66
67
68 type hashSet struct {
69 m map[uint64]struct{}
70 n int
71 }
72
73 func newHashSet() *hashSet {
74 return &hashSet{make(map[uint64]struct{}), 0}
75 }
76 func (s *hashSet) add(h uint64) {
77 s.m[h] = struct{}{}
78 s.n++
79 }
80 func (s *hashSet) addS(x string) {
81 s.add(stringHash(x))
82 }
83 func (s *hashSet) addB(x []byte) {
84 s.add(bytesHash(x))
85 }
86 func (s *hashSet) addS_seed(x string, seed Seed) {
87 var h Hash
88 h.SetSeed(seed)
89 h.WriteString(x)
90 s.add(h.Sum64())
91 }
92 func (s *hashSet) check(t *testing.T) {
93 const SLOP = 10.0
94 collisions := s.n - len(s.m)
95 pairs := int64(s.n) * int64(s.n-1) / 2
96 expected := float64(pairs) / math.Pow(2.0, float64(hashSize))
97 stddev := math.Sqrt(expected)
98 if float64(collisions) > expected+SLOP*(3*stddev+1) {
99 t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions, expected, stddev)
100 }
101 }
102
103
104 func TestSmhasherAppendedZeros(t *testing.T) {
105 s := "hello" + strings.Repeat("\x00", 256)
106 h := newHashSet()
107 for i := 0; i <= len(s); i++ {
108 h.addS(s[:i])
109 }
110 h.check(t)
111 }
112
113
114 func TestSmhasherSmallKeys(t *testing.T) {
115 h := newHashSet()
116 var b [3]byte
117 for i := 0; i < 256; i++ {
118 b[0] = byte(i)
119 h.addB(b[:1])
120 for j := 0; j < 256; j++ {
121 b[1] = byte(j)
122 h.addB(b[:2])
123 if !testing.Short() {
124 for k := 0; k < 256; k++ {
125 b[2] = byte(k)
126 h.addB(b[:3])
127 }
128 }
129 }
130 }
131 h.check(t)
132 }
133
134
135 func TestSmhasherZeros(t *testing.T) {
136 N := 256 * 1024
137 if testing.Short() {
138 N = 1024
139 }
140 h := newHashSet()
141 b := make([]byte, N)
142 for i := 0; i <= N; i++ {
143 h.addB(b[:i])
144 }
145 h.check(t)
146 }
147
148
149 func TestSmhasherTwoNonzero(t *testing.T) {
150 if runtime.GOARCH == "wasm" {
151 t.Skip("Too slow on wasm")
152 }
153 if testing.Short() {
154 t.Skip("Skipping in short mode")
155 }
156 h := newHashSet()
157 for n := 2; n <= 16; n++ {
158 twoNonZero(h, n)
159 }
160 h.check(t)
161 }
162 func twoNonZero(h *hashSet, n int) {
163 b := make([]byte, n)
164
165
166 h.addB(b)
167
168
169 for i := 0; i < n; i++ {
170 for x := 1; x < 256; x++ {
171 b[i] = byte(x)
172 h.addB(b)
173 b[i] = 0
174 }
175 }
176
177
178 for i := 0; i < n; i++ {
179 for x := 1; x < 256; x++ {
180 b[i] = byte(x)
181 for j := i + 1; j < n; j++ {
182 for y := 1; y < 256; y++ {
183 b[j] = byte(y)
184 h.addB(b)
185 b[j] = 0
186 }
187 }
188 b[i] = 0
189 }
190 }
191 }
192
193
194 func TestSmhasherCyclic(t *testing.T) {
195 if testing.Short() {
196 t.Skip("Skipping in short mode")
197 }
198 r := rand.New(rand.NewSource(1234))
199 const REPEAT = 8
200 const N = 1000000
201 for n := 4; n <= 12; n++ {
202 h := newHashSet()
203 b := make([]byte, REPEAT*n)
204 for i := 0; i < N; i++ {
205 b[0] = byte(i * 79 % 97)
206 b[1] = byte(i * 43 % 137)
207 b[2] = byte(i * 151 % 197)
208 b[3] = byte(i * 199 % 251)
209 randBytes(r, b[4:n])
210 for j := n; j < n*REPEAT; j++ {
211 b[j] = b[j-n]
212 }
213 h.addB(b)
214 }
215 h.check(t)
216 }
217 }
218
219
220 func TestSmhasherSparse(t *testing.T) {
221 if runtime.GOARCH == "wasm" {
222 t.Skip("Too slow on wasm")
223 }
224 if testing.Short() {
225 t.Skip("Skipping in short mode")
226 }
227 sparse(t, 32, 6)
228 sparse(t, 40, 6)
229 sparse(t, 48, 5)
230 sparse(t, 56, 5)
231 sparse(t, 64, 5)
232 sparse(t, 96, 4)
233 sparse(t, 256, 3)
234 sparse(t, 2048, 2)
235 }
236 func sparse(t *testing.T, n int, k int) {
237 b := make([]byte, n/8)
238 h := newHashSet()
239 setbits(h, b, 0, k)
240 h.check(t)
241 }
242
243
244 func setbits(h *hashSet, b []byte, i int, k int) {
245 h.addB(b)
246 if k == 0 {
247 return
248 }
249 for j := i; j < len(b)*8; j++ {
250 b[j/8] |= byte(1 << uint(j&7))
251 setbits(h, b, j+1, k-1)
252 b[j/8] &= byte(^(1 << uint(j&7)))
253 }
254 }
255
256
257
258 func TestSmhasherPermutation(t *testing.T) {
259 if runtime.GOARCH == "wasm" {
260 t.Skip("Too slow on wasm")
261 }
262 if testing.Short() {
263 t.Skip("Skipping in short mode")
264 }
265 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8)
266 permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8)
267 permutation(t, []uint32{0, 1}, 20)
268 permutation(t, []uint32{0, 1 << 31}, 20)
269 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6)
270 }
271 func permutation(t *testing.T, s []uint32, n int) {
272 b := make([]byte, n*4)
273 h := newHashSet()
274 genPerm(h, b, s, 0)
275 h.check(t)
276 }
277 func genPerm(h *hashSet, b []byte, s []uint32, n int) {
278 h.addB(b[:n])
279 if n == len(b) {
280 return
281 }
282 for _, v := range s {
283 b[n] = byte(v)
284 b[n+1] = byte(v >> 8)
285 b[n+2] = byte(v >> 16)
286 b[n+3] = byte(v >> 24)
287 genPerm(h, b, s, n+4)
288 }
289 }
290
291 type key interface {
292 clear()
293 random(r *rand.Rand)
294 bits() int
295 flipBit(i int)
296 hash() uint64
297 name() string
298 }
299
300 type bytesKey struct {
301 b []byte
302 }
303
304 func (k *bytesKey) clear() {
305 for i := range k.b {
306 k.b[i] = 0
307 }
308 }
309 func (k *bytesKey) random(r *rand.Rand) {
310 randBytes(r, k.b)
311 }
312 func (k *bytesKey) bits() int {
313 return len(k.b) * 8
314 }
315 func (k *bytesKey) flipBit(i int) {
316 k.b[i>>3] ^= byte(1 << uint(i&7))
317 }
318 func (k *bytesKey) hash() uint64 {
319 return bytesHash(k.b)
320 }
321 func (k *bytesKey) name() string {
322 return fmt.Sprintf("bytes%d", len(k.b))
323 }
324
325
326 func TestSmhasherAvalanche(t *testing.T) {
327 if runtime.GOARCH == "wasm" {
328 t.Skip("Too slow on wasm")
329 }
330 if testing.Short() {
331 t.Skip("Skipping in short mode")
332 }
333 avalancheTest1(t, &bytesKey{make([]byte, 2)})
334 avalancheTest1(t, &bytesKey{make([]byte, 4)})
335 avalancheTest1(t, &bytesKey{make([]byte, 8)})
336 avalancheTest1(t, &bytesKey{make([]byte, 16)})
337 avalancheTest1(t, &bytesKey{make([]byte, 32)})
338 avalancheTest1(t, &bytesKey{make([]byte, 200)})
339 }
340 func avalancheTest1(t *testing.T, k key) {
341 const REP = 100000
342 r := rand.New(rand.NewSource(1234))
343 n := k.bits()
344
345
346
347 grid := make([][hashSize]int, n)
348
349 for z := 0; z < REP; z++ {
350
351 k.random(r)
352 h := k.hash()
353
354
355 for i := 0; i < n; i++ {
356 k.flipBit(i)
357 d := h ^ k.hash()
358 k.flipBit(i)
359
360
361 g := &grid[i]
362 for j := 0; j < hashSize; j++ {
363 g[j] += int(d & 1)
364 d >>= 1
365 }
366 }
367 }
368
369
370
371
372
373
374 N := n * hashSize
375 var c float64
376
377 for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 {
378 }
379 c *= 4.0
380 mean := .5 * REP
381 stddev := .5 * math.Sqrt(REP)
382 low := int(mean - c*stddev)
383 high := int(mean + c*stddev)
384 for i := 0; i < n; i++ {
385 for j := 0; j < hashSize; j++ {
386 x := grid[i][j]
387 if x < low || x > high {
388 t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP)
389 }
390 }
391 }
392 }
393
394
395 func TestSmhasherWindowed(t *testing.T) {
396 windowed(t, &bytesKey{make([]byte, 128)})
397 }
398 func windowed(t *testing.T, k key) {
399 if runtime.GOARCH == "wasm" {
400 t.Skip("Too slow on wasm")
401 }
402 if testing.Short() {
403 t.Skip("Skipping in short mode")
404 }
405 const BITS = 16
406
407 for r := 0; r < k.bits(); r++ {
408 h := newHashSet()
409 for i := 0; i < 1<<BITS; i++ {
410 k.clear()
411 for j := 0; j < BITS; j++ {
412 if i>>uint(j)&1 != 0 {
413 k.flipBit((j + r) % k.bits())
414 }
415 }
416 h.add(k.hash())
417 }
418 h.check(t)
419 }
420 }
421
422
423 func TestSmhasherText(t *testing.T) {
424 if testing.Short() {
425 t.Skip("Skipping in short mode")
426 }
427 text(t, "Foo", "Bar")
428 text(t, "FooBar", "")
429 text(t, "", "FooBar")
430 }
431 func text(t *testing.T, prefix, suffix string) {
432 const N = 4
433 const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789"
434 const L = len(S)
435 b := make([]byte, len(prefix)+N+len(suffix))
436 copy(b, prefix)
437 copy(b[len(prefix)+N:], suffix)
438 h := newHashSet()
439 c := b[len(prefix):]
440 for i := 0; i < L; i++ {
441 c[0] = S[i]
442 for j := 0; j < L; j++ {
443 c[1] = S[j]
444 for k := 0; k < L; k++ {
445 c[2] = S[k]
446 for x := 0; x < L; x++ {
447 c[3] = S[x]
448 h.addB(b)
449 }
450 }
451 }
452 }
453 h.check(t)
454 }
455
456
457 func TestSmhasherSeed(t *testing.T) {
458 if unsafe.Sizeof(uintptr(0)) == 4 {
459 t.Skip("32-bit platforms don't have ideal seed-input distributions (see issue 33988)")
460 }
461 h := newHashSet()
462 const N = 100000
463 s := "hello"
464 for i := 0; i < N; i++ {
465 h.addS_seed(s, Seed{s: uint64(i + 1)})
466 h.addS_seed(s, Seed{s: uint64(i+1) << 32})
467 }
468 h.check(t)
469 }
470
View as plain text