Source file src/runtime/internal/sys/intrinsics_common.go

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package sys
     6  
     7  // Copied from math/bits to avoid dependence.
     8  
     9  var len8tab = [256]uint8{
    10  	0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
    11  	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
    12  	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
    13  	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
    14  	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
    15  	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
    16  	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
    17  	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
    18  	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
    19  	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
    20  	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
    21  	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
    22  	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
    23  	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
    24  	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
    25  	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
    26  }
    27  
    28  var ntz8tab = [256]uint8{
    29  	0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
    30  	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
    31  	0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
    32  	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
    33  	0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
    34  	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
    35  	0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
    36  	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
    37  	0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
    38  	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
    39  	0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
    40  	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
    41  	0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
    42  	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
    43  	0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
    44  	0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
    45  }
    46  
    47  // len64 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
    48  func Len64(x uint64) (n int) {
    49  	if x >= 1<<32 {
    50  		x >>= 32
    51  		n = 32
    52  	}
    53  	if x >= 1<<16 {
    54  		x >>= 16
    55  		n += 16
    56  	}
    57  	if x >= 1<<8 {
    58  		x >>= 8
    59  		n += 8
    60  	}
    61  	return n + int(len8tab[x])
    62  }
    63  
    64  // --- OnesCount ---
    65  
    66  const m0 = 0x5555555555555555 // 01010101 ...
    67  const m1 = 0x3333333333333333 // 00110011 ...
    68  const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ...
    69  
    70  // OnesCount64 returns the number of one bits ("population count") in x.
    71  func OnesCount64(x uint64) int {
    72  	// Implementation: Parallel summing of adjacent bits.
    73  	// See "Hacker's Delight", Chap. 5: Counting Bits.
    74  	// The following pattern shows the general approach:
    75  	//
    76  	//   x = x>>1&(m0&m) + x&(m0&m)
    77  	//   x = x>>2&(m1&m) + x&(m1&m)
    78  	//   x = x>>4&(m2&m) + x&(m2&m)
    79  	//   x = x>>8&(m3&m) + x&(m3&m)
    80  	//   x = x>>16&(m4&m) + x&(m4&m)
    81  	//   x = x>>32&(m5&m) + x&(m5&m)
    82  	//   return int(x)
    83  	//
    84  	// Masking (& operations) can be left away when there's no
    85  	// danger that a field's sum will carry over into the next
    86  	// field: Since the result cannot be > 64, 8 bits is enough
    87  	// and we can ignore the masks for the shifts by 8 and up.
    88  	// Per "Hacker's Delight", the first line can be simplified
    89  	// more, but it saves at best one instruction, so we leave
    90  	// it alone for clarity.
    91  	const m = 1<<64 - 1
    92  	x = x>>1&(m0&m) + x&(m0&m)
    93  	x = x>>2&(m1&m) + x&(m1&m)
    94  	x = (x>>4 + x) & (m2 & m)
    95  	x += x >> 8
    96  	x += x >> 16
    97  	x += x >> 32
    98  	return int(x) & (1<<7 - 1)
    99  }
   100  
   101  var deBruijn64tab = [64]byte{
   102  	0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
   103  	62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
   104  	63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
   105  	54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
   106  }
   107  
   108  const deBruijn64 = 0x03f79d71b4ca8b09
   109  
   110  // TrailingZeros64 returns the number of trailing zero bits in x; the result is 64 for x == 0.
   111  func TrailingZeros64(x uint64) int {
   112  	if x == 0 {
   113  		return 64
   114  	}
   115  	// If popcount is fast, replace code below with return popcount(^x & (x - 1)).
   116  	//
   117  	// x & -x leaves only the right-most bit set in the word. Let k be the
   118  	// index of that bit. Since only a single bit is set, the value is two
   119  	// to the power of k. Multiplying by a power of two is equivalent to
   120  	// left shifting, in this case by k bits. The de Bruijn (64 bit) constant
   121  	// is such that all six bit, consecutive substrings are distinct.
   122  	// Therefore, if we have a left shifted version of this constant we can
   123  	// find by how many bits it was shifted by looking at which six bit
   124  	// substring ended up at the top of the word.
   125  	// (Knuth, volume 4, section 7.3.1)
   126  	return int(deBruijn64tab[(x&-x)*deBruijn64>>(64-6)])
   127  }
   128  
   129  // LeadingZeros64 returns the number of leading zero bits in x; the result is 64 for x == 0.
   130  func LeadingZeros64(x uint64) int { return 64 - Len64(x) }
   131  
   132  // LeadingZeros8 returns the number of leading zero bits in x; the result is 8 for x == 0.
   133  func LeadingZeros8(x uint8) int { return 8 - Len8(x) }
   134  
   135  // TrailingZeros8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
   136  func TrailingZeros8(x uint8) int {
   137  	return int(ntz8tab[x])
   138  }
   139  
   140  // Len8 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
   141  func Len8(x uint8) int {
   142  	return int(len8tab[x])
   143  }
   144  
   145  // Prefetch prefetches data from memory addr to cache
   146  //
   147  // AMD64: Produce PREFETCHT0 instruction
   148  //
   149  // ARM64: Produce PRFM instruction with PLDL1KEEP option
   150  func Prefetch(addr uintptr) {}
   151  
   152  // PrefetchStreamed prefetches data from memory addr, with a hint that this data is being streamed.
   153  // That is, it is likely to be accessed very soon, but only once. If possible, this will avoid polluting the cache.
   154  //
   155  // AMD64: Produce PREFETCHNTA instruction
   156  //
   157  // ARM64: Produce PRFM instruction with PLDL1STRM option
   158  func PrefetchStreamed(addr uintptr) {}
   159  

View as plain text