Text file
src/runtime/memclr_arm64.s
1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "textflag.h"
6
7 // See memclrNoHeapPointers Go doc for important implementation constraints.
8
9 // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
10 // Also called from assembly in sys_windows_arm64.s without g (but using Go stack convention).
11 TEXT runtime·memclrNoHeapPointers<ABIInternal>(SB),NOSPLIT,$0-16
12 #ifndef GOEXPERIMENT_regabiargs
13 MOVD ptr+0(FP), R0
14 MOVD n+8(FP), R1
15 #endif
16
17 CMP $16, R1
18 // If n is equal to 16 bytes, use zero_exact_16 to zero
19 BEQ zero_exact_16
20
21 // If n is greater than 16 bytes, use zero_by_16 to zero
22 BHI zero_by_16
23
24 // n is less than 16 bytes
25 ADD R1, R0, R7
26 TBZ $3, R1, less_than_8
27 MOVD ZR, (R0)
28 MOVD ZR, -8(R7)
29 RET
30
31 less_than_8:
32 TBZ $2, R1, less_than_4
33 MOVW ZR, (R0)
34 MOVW ZR, -4(R7)
35 RET
36
37 less_than_4:
38 CBZ R1, ending
39 MOVB ZR, (R0)
40 TBZ $1, R1, ending
41 MOVH ZR, -2(R7)
42
43 ending:
44 RET
45
46 zero_exact_16:
47 // n is exactly 16 bytes
48 STP (ZR, ZR), (R0)
49 RET
50
51 zero_by_16:
52 // n greater than 16 bytes, check if the start address is aligned
53 NEG R0, R4
54 ANDS $15, R4, R4
55 // Try zeroing using zva if the start address is aligned with 16
56 BEQ try_zva
57
58 // Non-aligned store
59 STP (ZR, ZR), (R0)
60 // Make the destination aligned
61 SUB R4, R1, R1
62 ADD R4, R0, R0
63 B try_zva
64
65 tail_maybe_long:
66 CMP $64, R1
67 BHS no_zva
68
69 tail63:
70 ANDS $48, R1, R3
71 BEQ last16
72 CMPW $32, R3
73 BEQ last48
74 BLT last32
75 STP.P (ZR, ZR), 16(R0)
76 last48:
77 STP.P (ZR, ZR), 16(R0)
78 last32:
79 STP.P (ZR, ZR), 16(R0)
80 // The last store length is at most 16, so it is safe to use
81 // stp to write last 16 bytes
82 last16:
83 ANDS $15, R1, R1
84 CBZ R1, last_end
85 ADD R1, R0, R0
86 STP (ZR, ZR), -16(R0)
87 last_end:
88 RET
89
90 no_zva:
91 SUB $16, R0, R0
92 SUB $64, R1, R1
93
94 loop_64:
95 STP (ZR, ZR), 16(R0)
96 STP (ZR, ZR), 32(R0)
97 STP (ZR, ZR), 48(R0)
98 STP.W (ZR, ZR), 64(R0)
99 SUBS $64, R1, R1
100 BGE loop_64
101 ANDS $63, R1, ZR
102 ADD $16, R0, R0
103 BNE tail63
104 RET
105
106 try_zva:
107 // Try using the ZVA feature to zero entire cache lines
108 // It is not meaningful to use ZVA if the block size is less than 64,
109 // so make sure that n is greater than or equal to 64
110 CMP $63, R1
111 BLE tail63
112
113 CMP $128, R1
114 // Ensure n is at least 128 bytes, so that there is enough to copy after
115 // alignment.
116 BLT no_zva
117 // Check if ZVA is allowed from user code, and if so get the block size
118 MOVW block_size<>(SB), R5
119 TBNZ $31, R5, no_zva
120 CBNZ R5, zero_by_line
121 // DCZID_EL0 bit assignments
122 // [63:5] Reserved
123 // [4] DZP, if bit set DC ZVA instruction is prohibited, else permitted
124 // [3:0] log2 of the block size in words, eg. if it returns 0x4 then block size is 16 words
125 MRS DCZID_EL0, R3
126 TBZ $4, R3, init
127 // ZVA not available
128 MOVW $~0, R5
129 MOVW R5, block_size<>(SB)
130 B no_zva
131
132 init:
133 MOVW $4, R9
134 ANDW $15, R3, R5
135 LSLW R5, R9, R5
136 MOVW R5, block_size<>(SB)
137
138 ANDS $63, R5, R9
139 // Block size is less than 64.
140 BNE no_zva
141
142 zero_by_line:
143 CMP R5, R1
144 // Not enough memory to reach alignment
145 BLO no_zva
146 SUB $1, R5, R6
147 NEG R0, R4
148 ANDS R6, R4, R4
149 // Already aligned
150 BEQ aligned
151
152 // check there is enough to copy after alignment
153 SUB R4, R1, R3
154
155 // Check that the remaining length to ZVA after alignment
156 // is greater than 64.
157 CMP $64, R3
158 CCMP GE, R3, R5, $10 // condition code GE, NZCV=0b1010
159 BLT no_zva
160
161 // We now have at least 64 bytes to zero, update n
162 MOVD R3, R1
163
164 loop_zva_prolog:
165 STP (ZR, ZR), (R0)
166 STP (ZR, ZR), 16(R0)
167 STP (ZR, ZR), 32(R0)
168 SUBS $64, R4, R4
169 STP (ZR, ZR), 48(R0)
170 ADD $64, R0, R0
171 BGE loop_zva_prolog
172
173 ADD R4, R0, R0
174
175 aligned:
176 SUB R5, R1, R1
177
178 loop_zva:
179 WORD $0xd50b7420 // DC ZVA, R0
180 ADD R5, R0, R0
181 SUBS R5, R1, R1
182 BHS loop_zva
183 ANDS R6, R1, R1
184 BNE tail_maybe_long
185 RET
186
187 GLOBL block_size<>(SB), NOPTR, $8
188
View as plain text