1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "textflag.h"
6
7 // func xorBytesSSE2(dst, a, b *byte, n int)
8 TEXT ·xorBytesSSE2(SB), NOSPLIT, $0
9 MOVQ dst+0(FP), BX
10 MOVQ a+8(FP), SI
11 MOVQ b+16(FP), CX
12 MOVQ n+24(FP), DX
13 TESTQ $15, DX // AND 15 & len, if not zero jump to not_aligned.
14 JNZ not_aligned
15
16 aligned:
17 MOVQ $0, AX // position in slices
18
19 loop16b:
20 MOVOU (SI)(AX*1), X0 // XOR 16byte forwards.
21 MOVOU (CX)(AX*1), X1
22 PXOR X1, X0
23 MOVOU X0, (BX)(AX*1)
24 ADDQ $16, AX
25 CMPQ DX, AX
26 JNE loop16b
27 RET
28
29 loop_1b:
30 SUBQ $1, DX // XOR 1byte backwards.
31 MOVB (SI)(DX*1), DI
32 MOVB (CX)(DX*1), AX
33 XORB AX, DI
34 MOVB DI, (BX)(DX*1)
35 TESTQ $7, DX // AND 7 & len, if not zero jump to loop_1b.
36 JNZ loop_1b
37 CMPQ DX, $0 // if len is 0, ret.
38 JE ret
39 TESTQ $15, DX // AND 15 & len, if zero jump to aligned.
40 JZ aligned
41
42 not_aligned:
43 TESTQ $7, DX // AND $7 & len, if not zero jump to loop_1b.
44 JNE loop_1b
45 SUBQ $8, DX // XOR 8bytes backwards.
46 MOVQ (SI)(DX*1), DI
47 MOVQ (CX)(DX*1), AX
48 XORQ AX, DI
49 MOVQ DI, (BX)(DX*1)
50 CMPQ DX, $16 // if len is greater or equal 16 here, it must be aligned.
51 JGE aligned
52
53 ret:
54 RET
55
View as plain text