Text file src/internal/bytealg/count_ppc64x.s

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build ppc64le || ppc64
     6  
     7  #include "go_asm.h"
     8  #include "textflag.h"
     9  
    10  TEXT ·Count<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
    11  #ifdef GOEXPERIMENT_regabiargs
    12  // R3 = byte array pointer
    13  // R4 = length
    14          MOVBZ R6,R5               // R5 = byte
    15  #else
    16  
    17  	MOVD  b_base+0(FP), R3    // R3 = byte array pointer
    18  	MOVD  b_len+8(FP), R4     // R4 = length
    19  	MOVBZ c+24(FP), R5        // R5 = byte
    20  	MOVD  $ret+32(FP), R14    // R14 = &ret
    21  #endif
    22  	BR    countbytebody<>(SB)
    23  
    24  TEXT ·CountString<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-32
    25  #ifdef GOEXPERIMENT_regabiargs
    26  // R3 = byte array pointer
    27  // R4 = length
    28          MOVBZ R5,R5               // R5 = byte
    29  #else
    30  	MOVD  s_base+0(FP), R3    // R3 = string
    31  	MOVD  s_len+8(FP), R4     // R4 = length
    32  	MOVBZ c+16(FP), R5        // R5 = byte
    33  	MOVD  $ret+24(FP), R14    // R14 = &ret
    34  #endif
    35  	BR    countbytebody<>(SB)
    36  
    37  // R3: addr of string
    38  // R4: len of string
    39  // R5: byte to count
    40  // R14: addr for return value when not regabi
    41  // endianness shouldn't matter since we are just counting and order
    42  // is irrelevant
    43  TEXT countbytebody<>(SB), NOSPLIT|NOFRAME, $0-0
    44  	DCBT (R3)    // Prepare cache line.
    45  	MOVD R0, R18 // byte count
    46  	MOVD R3, R19 // Save base address for calculating the index later.
    47  	MOVD R4, R16
    48  
    49  	MOVD   R5, R6
    50  	RLDIMI $8, R6, $48, R6
    51  	RLDIMI $16, R6, $32, R6
    52  	RLDIMI $32, R6, $0, R6  // fill reg with the byte to count
    53  
    54  	VSPLTISW $3, V4     // used for shift
    55  	MTVRD    R6, V1     // move compare byte
    56  	VSPLTB   $7, V1, V1 // replicate byte across V1
    57  
    58  	CMPU   R4, $32          // Check if it's a small string (<32 bytes)
    59  	BLT    tail             // Jump to the small string case
    60  	XXLXOR VS37, VS37, VS37 // clear V5 (aka VS37) to use as accumulator
    61  
    62  cmploop:
    63  	LXVW4X (R3), VS32 // load bytes from string
    64  
    65  	// when the bytes match, the corresponding byte contains all 1s
    66  	VCMPEQUB V1, V0, V2     // compare bytes
    67  	VPOPCNTD V2, V3         // each double word contains its count
    68  	VADDUDM  V3, V5, V5     // accumulate bit count in each double word
    69  	ADD      $16, R3, R3    // increment pointer
    70  	SUB      $16, R16, R16  // remaining bytes
    71  	CMP      R16, $16       // at least 16 remaining?
    72  	BGE      cmploop
    73  	VSRD     V5, V4, V5     // shift by 3 to convert bits to bytes
    74  	VSLDOI   $8, V5, V5, V6 // get the double word values from vector
    75  	MFVSRD   V5, R9
    76  	MFVSRD   V6, R10
    77  	ADD      R9, R10, R9
    78  	ADD      R9, R18, R18
    79  
    80  tail:
    81  	CMP R16, $8 // 8 bytes left?
    82  	BLT small
    83  
    84  	MOVD    (R3), R12     // load 8 bytes
    85  	CMPB    R12, R6, R17  // compare bytes
    86  	POPCNTD R17, R15      // bit count
    87  	SRD     $3, R15, R15  // byte count
    88  	ADD     R15, R18, R18 // add to byte count
    89  
    90  next1:
    91  	ADD $8, R3, R3
    92  	SUB $8, R16, R16 // remaining bytes
    93  	BR  tail
    94  
    95  small:
    96  	CMP   $0, R16   // any remaining
    97  	BEQ   done
    98  	MOVBZ (R3), R12 // check each remaining byte
    99  	CMP   R12, R5
   100  	BNE   next2
   101  	ADD   $1, R18
   102  
   103  next2:
   104  	SUB $1, R16
   105  	ADD $1, R3  // inc address
   106  	BR  small
   107  
   108  done:
   109  #ifdef GOEXPERIMENT_regabiargs
   110          MOVD R18, R3    // return count
   111  #else
   112  	MOVD R18, (R14) // return count
   113  #endif
   114  
   115  	RET
   116  

View as plain text