Text file
src/runtime/memclr_ppc64x.s
1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build ppc64 || ppc64le
6
7 #include "textflag.h"
8
9 // See memclrNoHeapPointers Go doc for important implementation constraints.
10
11 // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
12 TEXT runtime·memclrNoHeapPointers<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-16
13 #ifndef GOEXPERIMENT_regabiargs
14 MOVD ptr+0(FP), R3
15 MOVD n+8(FP), R4
16 #endif
17
18 // Determine if there are doublewords to clear
19 check:
20 ANDCC $7, R4, R5 // R5: leftover bytes to clear
21 SRD $3, R4, R6 // R6: double words to clear
22 CMP R6, $0, CR1 // CR1[EQ] set if no double words
23
24 BC 12, 6, nozerolarge // only single bytes
25 CMP R4, $512
26 BLT under512 // special case for < 512
27 ANDCC $127, R3, R8 // check for 128 alignment of address
28 BEQ zero512setup
29
30 ANDCC $7, R3, R15
31 BEQ zero512xsetup // at least 8 byte aligned
32
33 // zero bytes up to 8 byte alignment
34
35 ANDCC $1, R3, R15 // check for byte alignment
36 BEQ byte2
37 MOVB R0, 0(R3) // zero 1 byte
38 ADD $1, R3 // bump ptr by 1
39 ADD $-1, R4
40
41 byte2:
42 ANDCC $2, R3, R15 // check for 2 byte alignment
43 BEQ byte4
44 MOVH R0, 0(R3) // zero 2 bytes
45 ADD $2, R3 // bump ptr by 2
46 ADD $-2, R4
47
48 byte4:
49 ANDCC $4, R3, R15 // check for 4 byte alignment
50 BEQ zero512xsetup
51 MOVW R0, 0(R3) // zero 4 bytes
52 ADD $4, R3 // bump ptr by 4
53 ADD $-4, R4
54 BR zero512xsetup // ptr should now be 8 byte aligned
55
56 under512:
57 MOVD R6, CTR // R6 = number of double words
58 SRDCC $2, R6, R7 // 32 byte chunks?
59 BNE zero32setup
60
61 // Clear double words
62
63 zero8:
64 MOVD R0, 0(R3) // double word
65 ADD $8, R3
66 ADD $-8, R4
67 BC 16, 0, zero8 // dec ctr, br zero8 if ctr not 0
68 BR nozerolarge // handle leftovers
69
70 // Prepare to clear 32 bytes at a time.
71
72 zero32setup:
73 DCBTST (R3) // prepare data cache
74 XXLXOR VS32, VS32, VS32 // clear VS32 (V0)
75 MOVD R7, CTR // number of 32 byte chunks
76 MOVD $16, R8
77
78 zero32:
79 STXVD2X VS32, (R3+R0) // store 16 bytes
80 STXVD2X VS32, (R3+R8)
81 ADD $32, R3
82 ADD $-32, R4
83 BC 16, 0, zero32 // dec ctr, br zero32 if ctr not 0
84 RLDCLCC $61, R4, $3, R6 // remaining doublewords
85 BEQ nozerolarge
86 MOVD R6, CTR // set up the CTR for doublewords
87 BR zero8
88
89 nozerolarge:
90 ANDCC $7, R4, R5 // any remaining bytes
91 BC 4, 1, LR // ble lr
92
93 zerotail:
94 MOVD R5, CTR // set up to clear tail bytes
95
96 zerotailloop:
97 MOVB R0, 0(R3) // clear single bytes
98 ADD $1, R3
99 BC 16, 0, zerotailloop // dec ctr, br zerotailloop if ctr not 0
100 RET
101
102 zero512xsetup: // 512 chunk with extra needed
103 ANDCC $8, R3, R11 // 8 byte alignment?
104 BEQ zero512setup16
105 MOVD R0, 0(R3) // clear 8 bytes
106 ADD $8, R3 // update ptr to next 8
107 ADD $-8, R4 // dec count by 8
108
109 zero512setup16:
110 ANDCC $127, R3, R14 // < 128 byte alignment
111 BEQ zero512setup // handle 128 byte alignment
112 MOVD $128, R15
113 SUB R14, R15, R14 // find increment to 128 alignment
114 SRD $4, R14, R15 // number of 16 byte chunks
115
116 zero512presetup:
117 MOVD R15, CTR // loop counter of 16 bytes
118 XXLXOR VS32, VS32, VS32 // clear VS32 (V0)
119
120 zero512preloop: // clear up to 128 alignment
121 STXVD2X VS32, (R3+R0) // clear 16 bytes
122 ADD $16, R3 // update ptr
123 ADD $-16, R4 // dec count
124 BC 16, 0, zero512preloop
125
126 zero512setup: // setup for dcbz loop
127 CMP R4, $512 // check if at least 512
128 BLT remain
129 SRD $9, R4, R8 // loop count for 512 chunks
130 MOVD R8, CTR // set up counter
131 MOVD $128, R9 // index regs for 128 bytes
132 MOVD $256, R10
133 MOVD $384, R11
134
135 zero512:
136 DCBZ (R3+R0) // clear first chunk
137 DCBZ (R3+R9) // clear second chunk
138 DCBZ (R3+R10) // clear third chunk
139 DCBZ (R3+R11) // clear fourth chunk
140 ADD $512, R3
141 ADD $-512, R4
142 BC 16, 0, zero512
143
144 remain:
145 CMP R4, $128 // check if 128 byte chunks left
146 BLT smaller
147 DCBZ (R3+R0) // clear 128
148 ADD $128, R3
149 ADD $-128, R4
150 BR remain
151
152 smaller:
153 ANDCC $127, R4, R7 // find leftovers
154 BEQ done
155 CMP R7, $64 // more than 64, do 32 at a time
156 BLT zero8setup // less than 64, do 8 at a time
157 SRD $5, R7, R7 // set up counter for 32
158 BR zero32setup
159
160 zero8setup:
161 SRDCC $3, R7, R7 // less than 8 bytes
162 BEQ nozerolarge
163 MOVD R7, CTR
164 BR zero8
165
166 done:
167 RET
168
View as plain text