// Copyright 2019 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package sys // Copied from math/bits to avoid dependence. var len8tab = [256]uint8{ 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, } var ntz8tab = [256]uint8{ 0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, } // len64 returns the minimum number of bits required to represent x; the result is 0 for x == 0. func Len64(x uint64) (n int) { if x >= 1<<32 { x >>= 32 n = 32 } if x >= 1<<16 { x >>= 16 n += 16 } if x >= 1<<8 { x >>= 8 n += 8 } return n + int(len8tab[x]) } // --- OnesCount --- const m0 = 0x5555555555555555 // 01010101 ... const m1 = 0x3333333333333333 // 00110011 ... const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ... // OnesCount64 returns the number of one bits ("population count") in x. func OnesCount64(x uint64) int { // Implementation: Parallel summing of adjacent bits. // See "Hacker's Delight", Chap. 5: Counting Bits. // The following pattern shows the general approach: // // x = x>>1&(m0&m) + x&(m0&m) // x = x>>2&(m1&m) + x&(m1&m) // x = x>>4&(m2&m) + x&(m2&m) // x = x>>8&(m3&m) + x&(m3&m) // x = x>>16&(m4&m) + x&(m4&m) // x = x>>32&(m5&m) + x&(m5&m) // return int(x) // // Masking (& operations) can be left away when there's no // danger that a field's sum will carry over into the next // field: Since the result cannot be > 64, 8 bits is enough // and we can ignore the masks for the shifts by 8 and up. // Per "Hacker's Delight", the first line can be simplified // more, but it saves at best one instruction, so we leave // it alone for clarity. const m = 1<<64 - 1 x = x>>1&(m0&m) + x&(m0&m) x = x>>2&(m1&m) + x&(m1&m) x = (x>>4 + x) & (m2 & m) x += x >> 8 x += x >> 16 x += x >> 32 return int(x) & (1<<7 - 1) } var deBruijn64tab = [64]byte{ 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4, 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5, 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11, 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6, } const deBruijn64 = 0x03f79d71b4ca8b09 // TrailingZeros64 returns the number of trailing zero bits in x; the result is 64 for x == 0. func TrailingZeros64(x uint64) int { if x == 0 { return 64 } // If popcount is fast, replace code below with return popcount(^x & (x - 1)). // // x & -x leaves only the right-most bit set in the word. Let k be the // index of that bit. Since only a single bit is set, the value is two // to the power of k. Multiplying by a power of two is equivalent to // left shifting, in this case by k bits. The de Bruijn (64 bit) constant // is such that all six bit, consecutive substrings are distinct. // Therefore, if we have a left shifted version of this constant we can // find by how many bits it was shifted by looking at which six bit // substring ended up at the top of the word. // (Knuth, volume 4, section 7.3.1) return int(deBruijn64tab[(x&-x)*deBruijn64>>(64-6)]) } // LeadingZeros64 returns the number of leading zero bits in x; the result is 64 for x == 0. func LeadingZeros64(x uint64) int { return 64 - Len64(x) } // LeadingZeros8 returns the number of leading zero bits in x; the result is 8 for x == 0. func LeadingZeros8(x uint8) int { return 8 - Len8(x) } // TrailingZeros8 returns the number of trailing zero bits in x; the result is 8 for x == 0. func TrailingZeros8(x uint8) int { return int(ntz8tab[x]) } // Len8 returns the minimum number of bits required to represent x; the result is 0 for x == 0. func Len8(x uint8) int { return int(len8tab[x]) } // Prefetch prefetches data from memory addr to cache // // AMD64: Produce PREFETCHT0 instruction // // ARM64: Produce PRFM instruction with PLDL1KEEP option func Prefetch(addr uintptr) {} // PrefetchStreamed prefetches data from memory addr, with a hint that this data is being streamed. // That is, it is likely to be accessed very soon, but only once. If possible, this will avoid polluting the cache. // // AMD64: Produce PREFETCHNTA instruction // // ARM64: Produce PRFM instruction with PLDL1STRM option func PrefetchStreamed(addr uintptr) {}