1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build ppc64 || ppc64le
6
7 #include "textflag.h"
8
9 // func xorBytesVSX(dst, a, b *byte, n int)
10 TEXT ·xorBytesVSX(SB), NOSPLIT, $0
11 MOVD dst+0(FP), R3 // R3 = dst
12 MOVD a+8(FP), R4 // R4 = a
13 MOVD b+16(FP), R5 // R5 = b
14 MOVD n+24(FP), R6 // R6 = n
15
16 CMPU R6, $32, CR7 // Check if n ≥ 32 bytes
17 MOVD R0, R8 // R8 = index
18 CMPU R6, $8, CR6 // Check if 8 ≤ n < 32 bytes
19 BLT CR6, small // Smaller than 8
20 BLT CR7, xor16 // Case for 16 ≤ n < 32 bytes
21
22 // Case for n ≥ 32 bytes
23 preloop32:
24 SRD $5, R6, R7 // Setup loop counter
25 MOVD R7, CTR
26 MOVD $16, R10
27 ANDCC $31, R6, R9 // Check for tailing bytes for later
28 loop32:
29 LXVD2X (R4)(R8), VS32 // VS32 = a[i,...,i+15]
30 LXVD2X (R4)(R10), VS34
31 LXVD2X (R5)(R8), VS33 // VS33 = b[i,...,i+15]
32 LXVD2X (R5)(R10), VS35
33 XXLXOR VS32, VS33, VS32 // VS34 = a[] ^ b[]
34 XXLXOR VS34, VS35, VS34
35 STXVD2X VS32, (R3)(R8) // Store to dst
36 STXVD2X VS34, (R3)(R10)
37 ADD $32, R8 // Update index
38 ADD $32, R10
39 BC 16, 0, loop32 // bdnz loop16
40
41 BEQ CR0, done
42
43 MOVD R9, R6
44 CMP R6, $8
45 BLT small
46 xor16:
47 CMP R6, $16
48 BLT xor8
49 LXVD2X (R4)(R8), VS32
50 LXVD2X (R5)(R8), VS33
51 XXLXOR VS32, VS33, VS32
52 STXVD2X VS32, (R3)(R8)
53 ADD $16, R8
54 ADD $-16, R6
55 CMP R6, $8
56 BLT small
57 xor8:
58 // Case for 8 ≤ n < 16 bytes
59 MOVD (R4)(R8), R14 // R14 = a[i,...,i+7]
60 MOVD (R5)(R8), R15 // R15 = b[i,...,i+7]
61 XOR R14, R15, R16 // R16 = a[] ^ b[]
62 SUB $8, R6 // n = n - 8
63 MOVD R16, (R3)(R8) // Store to dst
64 ADD $8, R8
65
66 // Check if we're finished
67 CMP R6, R0
68 BGT small
69 RET
70
71 // Case for n < 8 bytes and tailing bytes from the
72 // previous cases.
73 small:
74 CMP R6, R0
75 BEQ done
76 MOVD R6, CTR // Setup loop counter
77
78 loop:
79 MOVBZ (R4)(R8), R14 // R14 = a[i]
80 MOVBZ (R5)(R8), R15 // R15 = b[i]
81 XOR R14, R15, R16 // R16 = a[i] ^ b[i]
82 MOVB R16, (R3)(R8) // Store to dst
83 ADD $1, R8
84 BC 16, 0, loop // bdnz loop
85
86 done:
87 RET
88
View as plain text