1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build ppc64 || ppc64le
6
7 #include "textflag.h"
8
9 // For more details about how various memory models are
10 // enforced on POWER, the following paper provides more
11 // details about how they enforce C/C++ like models. This
12 // gives context about why the strange looking code
13 // sequences below work.
14 //
15 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
16
17 // uint32 ·Load(uint32 volatile* ptr)
18 TEXT ·Load(SB),NOSPLIT|NOFRAME,$-8-12
19 MOVD ptr+0(FP), R3
20 SYNC
21 MOVWZ 0(R3), R3
22 CMPW R3, R3, CR7
23 BC 4, 30, 1(PC) // bne- cr7,0x4
24 ISYNC
25 MOVW R3, ret+8(FP)
26 RET
27
28 // uint8 ·Load8(uint8 volatile* ptr)
29 TEXT ·Load8(SB),NOSPLIT|NOFRAME,$-8-9
30 MOVD ptr+0(FP), R3
31 SYNC
32 MOVBZ 0(R3), R3
33 CMP R3, R3, CR7
34 BC 4, 30, 1(PC) // bne- cr7,0x4
35 ISYNC
36 MOVB R3, ret+8(FP)
37 RET
38
39 // uint64 ·Load64(uint64 volatile* ptr)
40 TEXT ·Load64(SB),NOSPLIT|NOFRAME,$-8-16
41 MOVD ptr+0(FP), R3
42 SYNC
43 MOVD 0(R3), R3
44 CMP R3, R3, CR7
45 BC 4, 30, 1(PC) // bne- cr7,0x4
46 ISYNC
47 MOVD R3, ret+8(FP)
48 RET
49
50 // void *·Loadp(void *volatile *ptr)
51 TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$-8-16
52 MOVD ptr+0(FP), R3
53 SYNC
54 MOVD 0(R3), R3
55 CMP R3, R3, CR7
56 BC 4, 30, 1(PC) // bne- cr7,0x4
57 ISYNC
58 MOVD R3, ret+8(FP)
59 RET
60
61 // uint32 ·LoadAcq(uint32 volatile* ptr)
62 TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$-8-12
63 MOVD ptr+0(FP), R3
64 MOVWZ 0(R3), R3
65 CMPW R3, R3, CR7
66 BC 4, 30, 1(PC) // bne- cr7, 0x4
67 ISYNC
68 MOVW R3, ret+8(FP)
69 RET
70
71 // uint64 ·LoadAcq64(uint64 volatile* ptr)
72 TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$-8-16
73 MOVD ptr+0(FP), R3
74 MOVD 0(R3), R3
75 CMP R3, R3, CR7
76 BC 4, 30, 1(PC) // bne- cr7, 0x4
77 ISYNC
78 MOVD R3, ret+8(FP)
79 RET
80
81 // bool cas(uint32 *ptr, uint32 old, uint32 new)
82 // Atomically:
83 // if(*val == old){
84 // *val = new;
85 // return 1;
86 // } else
87 // return 0;
88 TEXT ·Cas(SB), NOSPLIT, $0-17
89 MOVD ptr+0(FP), R3
90 MOVWZ old+8(FP), R4
91 MOVWZ new+12(FP), R5
92 LWSYNC
93 cas_again:
94 LWAR (R3), R6
95 CMPW R6, R4
96 BNE cas_fail
97 STWCCC R5, (R3)
98 BNE cas_again
99 MOVD $1, R3
100 LWSYNC
101 MOVB R3, ret+16(FP)
102 RET
103 cas_fail:
104 MOVB R0, ret+16(FP)
105 RET
106
107 // bool ·Cas64(uint64 *ptr, uint64 old, uint64 new)
108 // Atomically:
109 // if(*val == old){
110 // *val = new;
111 // return 1;
112 // } else {
113 // return 0;
114 // }
115 TEXT ·Cas64(SB), NOSPLIT, $0-25
116 MOVD ptr+0(FP), R3
117 MOVD old+8(FP), R4
118 MOVD new+16(FP), R5
119 LWSYNC
120 cas64_again:
121 LDAR (R3), R6
122 CMP R6, R4
123 BNE cas64_fail
124 STDCCC R5, (R3)
125 BNE cas64_again
126 MOVD $1, R3
127 LWSYNC
128 MOVB R3, ret+24(FP)
129 RET
130 cas64_fail:
131 MOVB R0, ret+24(FP)
132 RET
133
134 TEXT ·CasRel(SB), NOSPLIT, $0-17
135 MOVD ptr+0(FP), R3
136 MOVWZ old+8(FP), R4
137 MOVWZ new+12(FP), R5
138 LWSYNC
139 cas_again:
140 LWAR (R3), $0, R6 // 0 = Mutex release hint
141 CMPW R6, R4
142 BNE cas_fail
143 STWCCC R5, (R3)
144 BNE cas_again
145 MOVD $1, R3
146 MOVB R3, ret+16(FP)
147 RET
148 cas_fail:
149 MOVB R0, ret+16(FP)
150 RET
151
152 TEXT ·Casint32(SB), NOSPLIT, $0-17
153 BR ·Cas(SB)
154
155 TEXT ·Casint64(SB), NOSPLIT, $0-25
156 BR ·Cas64(SB)
157
158 TEXT ·Casuintptr(SB), NOSPLIT, $0-25
159 BR ·Cas64(SB)
160
161 TEXT ·Loaduintptr(SB), NOSPLIT|NOFRAME, $0-16
162 BR ·Load64(SB)
163
164 TEXT ·LoadAcquintptr(SB), NOSPLIT|NOFRAME, $0-16
165 BR ·LoadAcq64(SB)
166
167 TEXT ·Loaduint(SB), NOSPLIT|NOFRAME, $0-16
168 BR ·Load64(SB)
169
170 TEXT ·Storeint32(SB), NOSPLIT, $0-12
171 BR ·Store(SB)
172
173 TEXT ·Storeint64(SB), NOSPLIT, $0-16
174 BR ·Store64(SB)
175
176 TEXT ·Storeuintptr(SB), NOSPLIT, $0-16
177 BR ·Store64(SB)
178
179 TEXT ·StoreReluintptr(SB), NOSPLIT, $0-16
180 BR ·StoreRel64(SB)
181
182 TEXT ·Xadduintptr(SB), NOSPLIT, $0-24
183 BR ·Xadd64(SB)
184
185 TEXT ·Loadint32(SB), NOSPLIT, $0-12
186 BR ·Load(SB)
187
188 TEXT ·Loadint64(SB), NOSPLIT, $0-16
189 BR ·Load64(SB)
190
191 TEXT ·Xaddint32(SB), NOSPLIT, $0-20
192 BR ·Xadd(SB)
193
194 TEXT ·Xaddint64(SB), NOSPLIT, $0-24
195 BR ·Xadd64(SB)
196
197 // bool casp(void **val, void *old, void *new)
198 // Atomically:
199 // if(*val == old){
200 // *val = new;
201 // return 1;
202 // } else
203 // return 0;
204 TEXT ·Casp1(SB), NOSPLIT, $0-25
205 BR ·Cas64(SB)
206
207 // uint32 xadd(uint32 volatile *ptr, int32 delta)
208 // Atomically:
209 // *val += delta;
210 // return *val;
211 TEXT ·Xadd(SB), NOSPLIT, $0-20
212 MOVD ptr+0(FP), R4
213 MOVW delta+8(FP), R5
214 LWSYNC
215 LWAR (R4), R3
216 ADD R5, R3
217 STWCCC R3, (R4)
218 BNE -3(PC)
219 MOVW R3, ret+16(FP)
220 RET
221
222 // uint64 Xadd64(uint64 volatile *val, int64 delta)
223 // Atomically:
224 // *val += delta;
225 // return *val;
226 TEXT ·Xadd64(SB), NOSPLIT, $0-24
227 MOVD ptr+0(FP), R4
228 MOVD delta+8(FP), R5
229 LWSYNC
230 LDAR (R4), R3
231 ADD R5, R3
232 STDCCC R3, (R4)
233 BNE -3(PC)
234 MOVD R3, ret+16(FP)
235 RET
236
237 // uint32 Xchg(ptr *uint32, new uint32)
238 // Atomically:
239 // old := *ptr;
240 // *ptr = new;
241 // return old;
242 TEXT ·Xchg(SB), NOSPLIT, $0-20
243 MOVD ptr+0(FP), R4
244 MOVW new+8(FP), R5
245 LWSYNC
246 LWAR (R4), R3
247 STWCCC R5, (R4)
248 BNE -2(PC)
249 ISYNC
250 MOVW R3, ret+16(FP)
251 RET
252
253 // uint64 Xchg64(ptr *uint64, new uint64)
254 // Atomically:
255 // old := *ptr;
256 // *ptr = new;
257 // return old;
258 TEXT ·Xchg64(SB), NOSPLIT, $0-24
259 MOVD ptr+0(FP), R4
260 MOVD new+8(FP), R5
261 LWSYNC
262 LDAR (R4), R3
263 STDCCC R5, (R4)
264 BNE -2(PC)
265 ISYNC
266 MOVD R3, ret+16(FP)
267 RET
268
269 TEXT ·Xchgint32(SB), NOSPLIT, $0-20
270 BR ·Xchg(SB)
271
272 TEXT ·Xchgint64(SB), NOSPLIT, $0-24
273 BR ·Xchg64(SB)
274
275 TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
276 BR ·Xchg64(SB)
277
278 TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
279 BR ·Store64(SB)
280
281 TEXT ·Store(SB), NOSPLIT, $0-12
282 MOVD ptr+0(FP), R3
283 MOVW val+8(FP), R4
284 SYNC
285 MOVW R4, 0(R3)
286 RET
287
288 TEXT ·Store8(SB), NOSPLIT, $0-9
289 MOVD ptr+0(FP), R3
290 MOVB val+8(FP), R4
291 SYNC
292 MOVB R4, 0(R3)
293 RET
294
295 TEXT ·Store64(SB), NOSPLIT, $0-16
296 MOVD ptr+0(FP), R3
297 MOVD val+8(FP), R4
298 SYNC
299 MOVD R4, 0(R3)
300 RET
301
302 TEXT ·StoreRel(SB), NOSPLIT, $0-12
303 MOVD ptr+0(FP), R3
304 MOVW val+8(FP), R4
305 LWSYNC
306 MOVW R4, 0(R3)
307 RET
308
309 TEXT ·StoreRel64(SB), NOSPLIT, $0-16
310 MOVD ptr+0(FP), R3
311 MOVD val+8(FP), R4
312 LWSYNC
313 MOVD R4, 0(R3)
314 RET
315
316 // void ·Or8(byte volatile*, byte);
317 TEXT ·Or8(SB), NOSPLIT, $0-9
318 MOVD ptr+0(FP), R3
319 MOVBZ val+8(FP), R4
320 LWSYNC
321 again:
322 LBAR (R3), R6
323 OR R4, R6
324 STBCCC R6, (R3)
325 BNE again
326 RET
327
328 // void ·And8(byte volatile*, byte);
329 TEXT ·And8(SB), NOSPLIT, $0-9
330 MOVD ptr+0(FP), R3
331 MOVBZ val+8(FP), R4
332 LWSYNC
333 again:
334 LBAR (R3), R6
335 AND R4, R6
336 STBCCC R6, (R3)
337 BNE again
338 RET
339
340 // func Or(addr *uint32, v uint32)
341 TEXT ·Or(SB), NOSPLIT, $0-12
342 MOVD ptr+0(FP), R3
343 MOVW val+8(FP), R4
344 LWSYNC
345 again:
346 LWAR (R3), R6
347 OR R4, R6
348 STWCCC R6, (R3)
349 BNE again
350 RET
351
352 // func And(addr *uint32, v uint32)
353 TEXT ·And(SB), NOSPLIT, $0-12
354 MOVD ptr+0(FP), R3
355 MOVW val+8(FP), R4
356 LWSYNC
357 again:
358 LWAR (R3),R6
359 AND R4, R6
360 STWCCC R6, (R3)
361 BNE again
362 RET
363
View as plain text