Text file src/internal/bytealg/equal_amd64.s

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  // memequal(a, b unsafe.Pointer, size uintptr) bool
     9  TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT,$0-25
    10  	// AX = a    (want in SI)
    11  	// BX = b    (want in DI)
    12  	// CX = size (want in BX)
    13  	CMPQ	AX, BX
    14  	JNE	neq
    15  	MOVQ	$1, AX	// return 1
    16  	RET
    17  neq:
    18  	MOVQ	AX, SI
    19  	MOVQ	BX, DI
    20  	MOVQ	CX, BX
    21  	JMP	memeqbody<>(SB)
    22  
    23  // memequal_varlen(a, b unsafe.Pointer) bool
    24  TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
    25  	// AX = a       (want in SI)
    26  	// BX = b       (want in DI)
    27  	// 8(DX) = size (want in BX)
    28  	CMPQ	AX, BX
    29  	JNE	neq
    30  	MOVQ	$1, AX	// return 1
    31  	RET
    32  neq:
    33  	MOVQ	AX, SI
    34  	MOVQ	BX, DI
    35  	MOVQ	8(DX), BX    // compiler stores size at offset 8 in the closure
    36  	JMP	memeqbody<>(SB)
    37  
    38  // Input:
    39  //   a in SI
    40  //   b in DI
    41  //   count in BX
    42  // Output:
    43  //   result in AX
    44  TEXT memeqbody<>(SB),NOSPLIT,$0-0
    45  	CMPQ	BX, $8
    46  	JB	small
    47  	CMPQ	BX, $64
    48  	JB	bigloop
    49  	CMPB	internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
    50  	JE	hugeloop_avx2
    51  
    52  	// 64 bytes at a time using xmm registers
    53  hugeloop:
    54  	CMPQ	BX, $64
    55  	JB	bigloop
    56  	MOVOU	(SI), X0
    57  	MOVOU	(DI), X1
    58  	MOVOU	16(SI), X2
    59  	MOVOU	16(DI), X3
    60  	MOVOU	32(SI), X4
    61  	MOVOU	32(DI), X5
    62  	MOVOU	48(SI), X6
    63  	MOVOU	48(DI), X7
    64  	PCMPEQB	X1, X0
    65  	PCMPEQB	X3, X2
    66  	PCMPEQB	X5, X4
    67  	PCMPEQB	X7, X6
    68  	PAND	X2, X0
    69  	PAND	X6, X4
    70  	PAND	X4, X0
    71  	PMOVMSKB X0, DX
    72  	ADDQ	$64, SI
    73  	ADDQ	$64, DI
    74  	SUBQ	$64, BX
    75  	CMPL	DX, $0xffff
    76  	JEQ	hugeloop
    77  	XORQ	AX, AX	// return 0
    78  	RET
    79  
    80  	// 64 bytes at a time using ymm registers
    81  hugeloop_avx2:
    82  	CMPQ	BX, $64
    83  	JB	bigloop_avx2
    84  	VMOVDQU	(SI), Y0
    85  	VMOVDQU	(DI), Y1
    86  	VMOVDQU	32(SI), Y2
    87  	VMOVDQU	32(DI), Y3
    88  	VPCMPEQB	Y1, Y0, Y4
    89  	VPCMPEQB	Y2, Y3, Y5
    90  	VPAND	Y4, Y5, Y6
    91  	VPMOVMSKB Y6, DX
    92  	ADDQ	$64, SI
    93  	ADDQ	$64, DI
    94  	SUBQ	$64, BX
    95  	CMPL	DX, $0xffffffff
    96  	JEQ	hugeloop_avx2
    97  	VZEROUPPER
    98  	XORQ	AX, AX	// return 0
    99  	RET
   100  
   101  bigloop_avx2:
   102  	VZEROUPPER
   103  
   104  	// 8 bytes at a time using 64-bit register
   105  bigloop:
   106  	CMPQ	BX, $8
   107  	JBE	leftover
   108  	MOVQ	(SI), CX
   109  	MOVQ	(DI), DX
   110  	ADDQ	$8, SI
   111  	ADDQ	$8, DI
   112  	SUBQ	$8, BX
   113  	CMPQ	CX, DX
   114  	JEQ	bigloop
   115  	XORQ	AX, AX	// return 0
   116  	RET
   117  
   118  	// remaining 0-8 bytes
   119  leftover:
   120  	MOVQ	-8(SI)(BX*1), CX
   121  	MOVQ	-8(DI)(BX*1), DX
   122  	CMPQ	CX, DX
   123  	SETEQ	AX
   124  	RET
   125  
   126  small:
   127  	CMPQ	BX, $0
   128  	JEQ	equal
   129  
   130  	LEAQ	0(BX*8), CX
   131  	NEGQ	CX
   132  
   133  	CMPB	SI, $0xf8
   134  	JA	si_high
   135  
   136  	// load at SI won't cross a page boundary.
   137  	MOVQ	(SI), SI
   138  	JMP	si_finish
   139  si_high:
   140  	// address ends in 11111xxx. Load up to bytes we want, move to correct position.
   141  	MOVQ	-8(SI)(BX*1), SI
   142  	SHRQ	CX, SI
   143  si_finish:
   144  
   145  	// same for DI.
   146  	CMPB	DI, $0xf8
   147  	JA	di_high
   148  	MOVQ	(DI), DI
   149  	JMP	di_finish
   150  di_high:
   151  	MOVQ	-8(DI)(BX*1), DI
   152  	SHRQ	CX, DI
   153  di_finish:
   154  
   155  	SUBQ	SI, DI
   156  	SHLQ	CX, DI
   157  equal:
   158  	SETEQ	AX
   159  	RET
   160  

View as plain text