1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "textflag.h"
7
8 // memequal(a, b unsafe.Pointer, size uintptr) bool
9 TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT,$0-25
10 // AX = a (want in SI)
11 // BX = b (want in DI)
12 // CX = size (want in BX)
13 CMPQ AX, BX
14 JNE neq
15 MOVQ $1, AX // return 1
16 RET
17 neq:
18 MOVQ AX, SI
19 MOVQ BX, DI
20 MOVQ CX, BX
21 JMP memeqbody<>(SB)
22
23 // memequal_varlen(a, b unsafe.Pointer) bool
24 TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
25 // AX = a (want in SI)
26 // BX = b (want in DI)
27 // 8(DX) = size (want in BX)
28 CMPQ AX, BX
29 JNE neq
30 MOVQ $1, AX // return 1
31 RET
32 neq:
33 MOVQ AX, SI
34 MOVQ BX, DI
35 MOVQ 8(DX), BX // compiler stores size at offset 8 in the closure
36 JMP memeqbody<>(SB)
37
38 // Input:
39 // a in SI
40 // b in DI
41 // count in BX
42 // Output:
43 // result in AX
44 TEXT memeqbody<>(SB),NOSPLIT,$0-0
45 CMPQ BX, $8
46 JB small
47 CMPQ BX, $64
48 JB bigloop
49 CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
50 JE hugeloop_avx2
51
52 // 64 bytes at a time using xmm registers
53 hugeloop:
54 CMPQ BX, $64
55 JB bigloop
56 MOVOU (SI), X0
57 MOVOU (DI), X1
58 MOVOU 16(SI), X2
59 MOVOU 16(DI), X3
60 MOVOU 32(SI), X4
61 MOVOU 32(DI), X5
62 MOVOU 48(SI), X6
63 MOVOU 48(DI), X7
64 PCMPEQB X1, X0
65 PCMPEQB X3, X2
66 PCMPEQB X5, X4
67 PCMPEQB X7, X6
68 PAND X2, X0
69 PAND X6, X4
70 PAND X4, X0
71 PMOVMSKB X0, DX
72 ADDQ $64, SI
73 ADDQ $64, DI
74 SUBQ $64, BX
75 CMPL DX, $0xffff
76 JEQ hugeloop
77 XORQ AX, AX // return 0
78 RET
79
80 // 64 bytes at a time using ymm registers
81 hugeloop_avx2:
82 CMPQ BX, $64
83 JB bigloop_avx2
84 VMOVDQU (SI), Y0
85 VMOVDQU (DI), Y1
86 VMOVDQU 32(SI), Y2
87 VMOVDQU 32(DI), Y3
88 VPCMPEQB Y1, Y0, Y4
89 VPCMPEQB Y2, Y3, Y5
90 VPAND Y4, Y5, Y6
91 VPMOVMSKB Y6, DX
92 ADDQ $64, SI
93 ADDQ $64, DI
94 SUBQ $64, BX
95 CMPL DX, $0xffffffff
96 JEQ hugeloop_avx2
97 VZEROUPPER
98 XORQ AX, AX // return 0
99 RET
100
101 bigloop_avx2:
102 VZEROUPPER
103
104 // 8 bytes at a time using 64-bit register
105 bigloop:
106 CMPQ BX, $8
107 JBE leftover
108 MOVQ (SI), CX
109 MOVQ (DI), DX
110 ADDQ $8, SI
111 ADDQ $8, DI
112 SUBQ $8, BX
113 CMPQ CX, DX
114 JEQ bigloop
115 XORQ AX, AX // return 0
116 RET
117
118 // remaining 0-8 bytes
119 leftover:
120 MOVQ -8(SI)(BX*1), CX
121 MOVQ -8(DI)(BX*1), DX
122 CMPQ CX, DX
123 SETEQ AX
124 RET
125
126 small:
127 CMPQ BX, $0
128 JEQ equal
129
130 LEAQ 0(BX*8), CX
131 NEGQ CX
132
133 CMPB SI, $0xf8
134 JA si_high
135
136 // load at SI won't cross a page boundary.
137 MOVQ (SI), SI
138 JMP si_finish
139 si_high:
140 // address ends in 11111xxx. Load up to bytes we want, move to correct position.
141 MOVQ -8(SI)(BX*1), SI
142 SHRQ CX, SI
143 si_finish:
144
145 // same for DI.
146 CMPB DI, $0xf8
147 JA di_high
148 MOVQ (DI), DI
149 JMP di_finish
150 di_high:
151 MOVQ -8(DI)(BX*1), DI
152 SHRQ CX, DI
153 di_finish:
154
155 SUBQ SI, DI
156 SHLQ CX, DI
157 equal:
158 SETEQ AX
159 RET
160
View as plain text