Source file src/hash/maphash/maphash.go
1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package maphash provides hash functions on byte sequences. 6 // These hash functions are intended to be used to implement hash tables or 7 // other data structures that need to map arbitrary strings or byte 8 // sequences to a uniform distribution on unsigned 64-bit integers. 9 // Each different instance of a hash table or data structure should use its own Seed. 10 // 11 // The hash functions are not cryptographically secure. 12 // (See crypto/sha256 and crypto/sha512 for cryptographic use.) 13 // 14 package maphash 15 16 import ( 17 "internal/unsafeheader" 18 "unsafe" 19 ) 20 21 // A Seed is a random value that selects the specific hash function 22 // computed by a Hash. If two Hashes use the same Seeds, they 23 // will compute the same hash values for any given input. 24 // If two Hashes use different Seeds, they are very likely to compute 25 // distinct hash values for any given input. 26 // 27 // A Seed must be initialized by calling MakeSeed. 28 // The zero seed is uninitialized and not valid for use with Hash's SetSeed method. 29 // 30 // Each Seed value is local to a single process and cannot be serialized 31 // or otherwise recreated in a different process. 32 type Seed struct { 33 s uint64 34 } 35 36 // A Hash computes a seeded hash of a byte sequence. 37 // 38 // The zero Hash is a valid Hash ready to use. 39 // A zero Hash chooses a random seed for itself during 40 // the first call to a Reset, Write, Seed, or Sum64 method. 41 // For control over the seed, use SetSeed. 42 // 43 // The computed hash values depend only on the initial seed and 44 // the sequence of bytes provided to the Hash object, not on the way 45 // in which the bytes are provided. For example, the three sequences 46 // 47 // h.Write([]byte{'f','o','o'}) 48 // h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o') 49 // h.WriteString("foo") 50 // 51 // all have the same effect. 52 // 53 // Hashes are intended to be collision-resistant, even for situations 54 // where an adversary controls the byte sequences being hashed. 55 // 56 // A Hash is not safe for concurrent use by multiple goroutines, but a Seed is. 57 // If multiple goroutines must compute the same seeded hash, 58 // each can declare its own Hash and call SetSeed with a common Seed. 59 type Hash struct { 60 _ [0]func() // not comparable 61 seed Seed // initial seed used for this hash 62 state Seed // current hash of all flushed bytes 63 buf [bufSize]byte // unflushed byte buffer 64 n int // number of unflushed bytes 65 } 66 67 // bufSize is the size of the Hash write buffer. 68 // The buffer ensures that writes depend only on the sequence of bytes, 69 // not the sequence of WriteByte/Write/WriteString calls, 70 // by always calling rthash with a full buffer (except for the tail). 71 const bufSize = 128 72 73 // initSeed seeds the hash if necessary. 74 // initSeed is called lazily before any operation that actually uses h.seed/h.state. 75 // Note that this does not include Write/WriteByte/WriteString in the case 76 // where they only add to h.buf. (If they write too much, they call h.flush, 77 // which does call h.initSeed.) 78 func (h *Hash) initSeed() { 79 if h.seed.s == 0 { 80 seed := MakeSeed() 81 h.seed = seed 82 h.state = seed 83 } 84 } 85 86 // WriteByte adds b to the sequence of bytes hashed by h. 87 // It never fails; the error result is for implementing io.ByteWriter. 88 func (h *Hash) WriteByte(b byte) error { 89 if h.n == len(h.buf) { 90 h.flush() 91 } 92 h.buf[h.n] = b 93 h.n++ 94 return nil 95 } 96 97 // Write adds b to the sequence of bytes hashed by h. 98 // It always writes all of b and never fails; the count and error result are for implementing io.Writer. 99 func (h *Hash) Write(b []byte) (int, error) { 100 size := len(b) 101 // Deal with bytes left over in h.buf. 102 // h.n <= bufSize is always true. 103 // Checking it is ~free and it lets the compiler eliminate a bounds check. 104 if h.n > 0 && h.n <= bufSize { 105 k := copy(h.buf[h.n:], b) 106 h.n += k 107 if h.n < bufSize { 108 // Copied the entirety of b to h.buf. 109 return size, nil 110 } 111 b = b[k:] 112 h.flush() 113 // No need to set h.n = 0 here; it happens just before exit. 114 } 115 // Process as many full buffers as possible, without copying, and calling initSeed only once. 116 if len(b) > bufSize { 117 h.initSeed() 118 for len(b) > bufSize { 119 h.state.s = rthash(&b[0], bufSize, h.state.s) 120 b = b[bufSize:] 121 } 122 } 123 // Copy the tail. 124 copy(h.buf[:], b) 125 h.n = len(b) 126 return size, nil 127 } 128 129 // WriteString adds the bytes of s to the sequence of bytes hashed by h. 130 // It always writes all of s and never fails; the count and error result are for implementing io.StringWriter. 131 func (h *Hash) WriteString(s string) (int, error) { 132 // WriteString mirrors Write. See Write for comments. 133 size := len(s) 134 if h.n > 0 && h.n <= bufSize { 135 k := copy(h.buf[h.n:], s) 136 h.n += k 137 if h.n < bufSize { 138 return size, nil 139 } 140 s = s[k:] 141 h.flush() 142 } 143 if len(s) > bufSize { 144 h.initSeed() 145 for len(s) > bufSize { 146 ptr := (*byte)((*unsafeheader.String)(unsafe.Pointer(&s)).Data) 147 h.state.s = rthash(ptr, bufSize, h.state.s) 148 s = s[bufSize:] 149 } 150 } 151 copy(h.buf[:], s) 152 h.n = len(s) 153 return size, nil 154 } 155 156 // Seed returns h's seed value. 157 func (h *Hash) Seed() Seed { 158 h.initSeed() 159 return h.seed 160 } 161 162 // SetSeed sets h to use seed, which must have been returned by MakeSeed 163 // or by another Hash's Seed method. 164 // Two Hash objects with the same seed behave identically. 165 // Two Hash objects with different seeds will very likely behave differently. 166 // Any bytes added to h before this call will be discarded. 167 func (h *Hash) SetSeed(seed Seed) { 168 if seed.s == 0 { 169 panic("maphash: use of uninitialized Seed") 170 } 171 h.seed = seed 172 h.state = seed 173 h.n = 0 174 } 175 176 // Reset discards all bytes added to h. 177 // (The seed remains the same.) 178 func (h *Hash) Reset() { 179 h.initSeed() 180 h.state = h.seed 181 h.n = 0 182 } 183 184 // precondition: buffer is full. 185 func (h *Hash) flush() { 186 if h.n != len(h.buf) { 187 panic("maphash: flush of partially full buffer") 188 } 189 h.initSeed() 190 h.state.s = rthash(&h.buf[0], h.n, h.state.s) 191 h.n = 0 192 } 193 194 // Sum64 returns h's current 64-bit value, which depends on 195 // h's seed and the sequence of bytes added to h since the 196 // last call to Reset or SetSeed. 197 // 198 // All bits of the Sum64 result are close to uniformly and 199 // independently distributed, so it can be safely reduced 200 // by using bit masking, shifting, or modular arithmetic. 201 func (h *Hash) Sum64() uint64 { 202 h.initSeed() 203 return rthash(&h.buf[0], h.n, h.state.s) 204 } 205 206 // MakeSeed returns a new random seed. 207 func MakeSeed() Seed { 208 var s1, s2 uint64 209 for { 210 s1 = uint64(runtime_fastrand()) 211 s2 = uint64(runtime_fastrand()) 212 // We use seed 0 to indicate an uninitialized seed/hash, 213 // so keep trying until we get a non-zero seed. 214 if s1|s2 != 0 { 215 break 216 } 217 } 218 return Seed{s: s1<<32 + s2} 219 } 220 221 //go:linkname runtime_fastrand runtime.fastrand 222 func runtime_fastrand() uint32 223 224 func rthash(ptr *byte, len int, seed uint64) uint64 { 225 if len == 0 { 226 return seed 227 } 228 // The runtime hasher only works on uintptr. For 64-bit 229 // architectures, we use the hasher directly. Otherwise, 230 // we use two parallel hashers on the lower and upper 32 bits. 231 if unsafe.Sizeof(uintptr(0)) == 8 { 232 return uint64(runtime_memhash(unsafe.Pointer(ptr), uintptr(seed), uintptr(len))) 233 } 234 lo := runtime_memhash(unsafe.Pointer(ptr), uintptr(seed), uintptr(len)) 235 hi := runtime_memhash(unsafe.Pointer(ptr), uintptr(seed>>32), uintptr(len)) 236 return uint64(hi)<<32 | uint64(lo) 237 } 238 239 //go:linkname runtime_memhash runtime.memhash 240 //go:noescape 241 func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr 242 243 // Sum appends the hash's current 64-bit value to b. 244 // It exists for implementing hash.Hash. 245 // For direct calls, it is more efficient to use Sum64. 246 func (h *Hash) Sum(b []byte) []byte { 247 x := h.Sum64() 248 return append(b, 249 byte(x>>0), 250 byte(x>>8), 251 byte(x>>16), 252 byte(x>>24), 253 byte(x>>32), 254 byte(x>>40), 255 byte(x>>48), 256 byte(x>>56)) 257 } 258 259 // Size returns h's hash value size, 8 bytes. 260 func (h *Hash) Size() int { return 8 } 261 262 // BlockSize returns h's block size. 263 func (h *Hash) BlockSize() int { return len(h.buf) } 264