Text file
src/runtime/race_arm64.s
1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build race
6
7 #include "go_asm.h"
8 #include "funcdata.h"
9 #include "textflag.h"
10 #include "tls_arm64.h"
11
12 // The following thunks allow calling the gcc-compiled race runtime directly
13 // from Go code without going all the way through cgo.
14 // First, it's much faster (up to 50% speedup for real Go programs).
15 // Second, it eliminates race-related special cases from cgocall and scheduler.
16 // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
17
18 // A brief recap of the arm64 calling convention.
19 // Arguments are passed in R0...R7, the rest is on stack.
20 // Callee-saved registers are: R19...R28.
21 // Temporary registers are: R9...R15
22 // SP must be 16-byte aligned.
23
24 // When calling racecalladdr, R9 is the call target address.
25
26 // The race ctx, ThreadState *thr below, is passed in R0 and loaded in racecalladdr.
27
28 // Darwin may return unaligned thread pointer. Align it. (See tls_arm64.s)
29 // No-op on other OSes.
30 #ifdef TLS_darwin
31 #define TP_ALIGN AND $~7, R0
32 #else
33 #define TP_ALIGN
34 #endif
35
36 // Load g from TLS. (See tls_arm64.s)
37 #define load_g \
38 MRS_TPIDR_R0 \
39 TP_ALIGN \
40 MOVD runtime·tls_g(SB), R11 \
41 MOVD (R0)(R11), g
42
43 // func runtime·raceread(addr uintptr)
44 // Called from instrumented code.
45 // Defined as ABIInternal so as to avoid introducing a wrapper,
46 // which would make caller's PC ineffective.
47 TEXT runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
48 #ifdef GOEXPERIMENT_regabiargs
49 MOVD R0, R1 // addr
50 #else
51 MOVD addr+0(FP), R1
52 #endif
53 MOVD LR, R2
54 // void __tsan_read(ThreadState *thr, void *addr, void *pc);
55 MOVD $__tsan_read(SB), R9
56 JMP racecalladdr<>(SB)
57
58 // func runtime·RaceRead(addr uintptr)
59 TEXT runtime·RaceRead(SB), NOSPLIT, $0-8
60 // This needs to be a tail call, because raceread reads caller pc.
61 JMP runtime·raceread(SB)
62
63 // func runtime·racereadpc(void *addr, void *callpc, void *pc)
64 TEXT runtime·racereadpc(SB), NOSPLIT, $0-24
65 MOVD addr+0(FP), R1
66 MOVD callpc+8(FP), R2
67 MOVD pc+16(FP), R3
68 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
69 MOVD $__tsan_read_pc(SB), R9
70 JMP racecalladdr<>(SB)
71
72 // func runtime·racewrite(addr uintptr)
73 // Called from instrumented code.
74 // Defined as ABIInternal so as to avoid introducing a wrapper,
75 // which would make caller's PC ineffective.
76 TEXT runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
77 #ifdef GOEXPERIMENT_regabiargs
78 MOVD R0, R1 // addr
79 #else
80 MOVD addr+0(FP), R1
81 #endif
82 MOVD LR, R2
83 // void __tsan_write(ThreadState *thr, void *addr, void *pc);
84 MOVD $__tsan_write(SB), R9
85 JMP racecalladdr<>(SB)
86
87 // func runtime·RaceWrite(addr uintptr)
88 TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8
89 // This needs to be a tail call, because racewrite reads caller pc.
90 JMP runtime·racewrite(SB)
91
92 // func runtime·racewritepc(void *addr, void *callpc, void *pc)
93 TEXT runtime·racewritepc(SB), NOSPLIT, $0-24
94 MOVD addr+0(FP), R1
95 MOVD callpc+8(FP), R2
96 MOVD pc+16(FP), R3
97 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
98 MOVD $__tsan_write_pc(SB), R9
99 JMP racecalladdr<>(SB)
100
101 // func runtime·racereadrange(addr, size uintptr)
102 // Called from instrumented code.
103 // Defined as ABIInternal so as to avoid introducing a wrapper,
104 // which would make caller's PC ineffective.
105 TEXT runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16
106 #ifdef GOEXPERIMENT_regabiargs
107 MOVD R1, R2 // size
108 MOVD R0, R1 // addr
109 #else
110 MOVD addr+0(FP), R1
111 MOVD size+8(FP), R2
112 #endif
113 MOVD LR, R3
114 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
115 MOVD $__tsan_read_range(SB), R9
116 JMP racecalladdr<>(SB)
117
118 // func runtime·RaceReadRange(addr, size uintptr)
119 TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16
120 // This needs to be a tail call, because racereadrange reads caller pc.
121 JMP runtime·racereadrange(SB)
122
123 // func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
124 TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24
125 MOVD addr+0(FP), R1
126 MOVD size+8(FP), R2
127 MOVD pc+16(FP), R3
128 ADD $4, R3 // pc is function start, tsan wants return address.
129 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
130 MOVD $__tsan_read_range(SB), R9
131 JMP racecalladdr<>(SB)
132
133 // func runtime·racewriterange(addr, size uintptr)
134 // Called from instrumented code.
135 // Defined as ABIInternal so as to avoid introducing a wrapper,
136 // which would make caller's PC ineffective.
137 TEXT runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
138 #ifdef GOEXPERIMENT_regabiargs
139 MOVD R1, R2 // size
140 MOVD R0, R1 // addr
141 #else
142 MOVD addr+0(FP), R1
143 MOVD size+8(FP), R2
144 #endif
145 MOVD LR, R3
146 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
147 MOVD $__tsan_write_range(SB), R9
148 JMP racecalladdr<>(SB)
149
150 // func runtime·RaceWriteRange(addr, size uintptr)
151 TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16
152 // This needs to be a tail call, because racewriterange reads caller pc.
153 JMP runtime·racewriterange(SB)
154
155 // func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
156 TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24
157 MOVD addr+0(FP), R1
158 MOVD size+8(FP), R2
159 MOVD pc+16(FP), R3
160 ADD $4, R3 // pc is function start, tsan wants return address.
161 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
162 MOVD $__tsan_write_range(SB), R9
163 JMP racecalladdr<>(SB)
164
165 // If addr (R1) is out of range, do nothing.
166 // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
167 TEXT racecalladdr<>(SB), NOSPLIT, $0-0
168 load_g
169 MOVD g_racectx(g), R0
170 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
171 MOVD runtime·racearenastart(SB), R10
172 CMP R10, R1
173 BLT data
174 MOVD runtime·racearenaend(SB), R10
175 CMP R10, R1
176 BLT call
177 data:
178 MOVD runtime·racedatastart(SB), R10
179 CMP R10, R1
180 BLT ret
181 MOVD runtime·racedataend(SB), R10
182 CMP R10, R1
183 BGT ret
184 call:
185 JMP racecall<>(SB)
186 ret:
187 RET
188
189 // func runtime·racefuncenter(pc uintptr)
190 // Called from instrumented code.
191 TEXT runtime·racefuncenter<ABIInternal>(SB), NOSPLIT, $0-8
192 #ifdef GOEXPERIMENT_regabiargs
193 MOVD R0, R9 // callpc
194 #else
195 MOVD callpc+0(FP), R9
196 #endif
197 JMP racefuncenter<>(SB)
198
199 // Common code for racefuncenter
200 // R9 = caller's return address
201 TEXT racefuncenter<>(SB), NOSPLIT, $0-0
202 load_g
203 MOVD g_racectx(g), R0 // goroutine racectx
204 MOVD R9, R1
205 // void __tsan_func_enter(ThreadState *thr, void *pc);
206 MOVD $__tsan_func_enter(SB), R9
207 BL racecall<>(SB)
208 RET
209
210 // func runtime·racefuncexit()
211 // Called from instrumented code.
212 TEXT runtime·racefuncexit<ABIInternal>(SB), NOSPLIT, $0-0
213 load_g
214 MOVD g_racectx(g), R0 // race context
215 // void __tsan_func_exit(ThreadState *thr);
216 MOVD $__tsan_func_exit(SB), R9
217 JMP racecall<>(SB)
218
219 // Atomic operations for sync/atomic package.
220 // R3 = addr of arguments passed to this function, it can
221 // be fetched at 40(RSP) in racecallatomic after two times BL
222 // R0, R1, R2 set in racecallatomic
223
224 // Load
225 TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12
226 GO_ARGS
227 MOVD $__tsan_go_atomic32_load(SB), R9
228 BL racecallatomic<>(SB)
229 RET
230
231 TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16
232 GO_ARGS
233 MOVD $__tsan_go_atomic64_load(SB), R9
234 BL racecallatomic<>(SB)
235 RET
236
237 TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
238 GO_ARGS
239 JMP sync∕atomic·LoadInt32(SB)
240
241 TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
242 GO_ARGS
243 JMP sync∕atomic·LoadInt64(SB)
244
245 TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
246 GO_ARGS
247 JMP sync∕atomic·LoadInt64(SB)
248
249 TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
250 GO_ARGS
251 JMP sync∕atomic·LoadInt64(SB)
252
253 // Store
254 TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12
255 GO_ARGS
256 MOVD $__tsan_go_atomic32_store(SB), R9
257 BL racecallatomic<>(SB)
258 RET
259
260 TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16
261 GO_ARGS
262 MOVD $__tsan_go_atomic64_store(SB), R9
263 BL racecallatomic<>(SB)
264 RET
265
266 TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
267 GO_ARGS
268 JMP sync∕atomic·StoreInt32(SB)
269
270 TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
271 GO_ARGS
272 JMP sync∕atomic·StoreInt64(SB)
273
274 TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
275 GO_ARGS
276 JMP sync∕atomic·StoreInt64(SB)
277
278 // Swap
279 TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20
280 GO_ARGS
281 MOVD $__tsan_go_atomic32_exchange(SB), R9
282 BL racecallatomic<>(SB)
283 RET
284
285 TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24
286 GO_ARGS
287 MOVD $__tsan_go_atomic64_exchange(SB), R9
288 BL racecallatomic<>(SB)
289 RET
290
291 TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
292 GO_ARGS
293 JMP sync∕atomic·SwapInt32(SB)
294
295 TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
296 GO_ARGS
297 JMP sync∕atomic·SwapInt64(SB)
298
299 TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
300 GO_ARGS
301 JMP sync∕atomic·SwapInt64(SB)
302
303 // Add
304 TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-20
305 GO_ARGS
306 MOVD $__tsan_go_atomic32_fetch_add(SB), R9
307 BL racecallatomic<>(SB)
308 MOVW add+8(FP), R0 // convert fetch_add to add_fetch
309 MOVW ret+16(FP), R1
310 ADD R0, R1, R0
311 MOVW R0, ret+16(FP)
312 RET
313
314 TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-24
315 GO_ARGS
316 MOVD $__tsan_go_atomic64_fetch_add(SB), R9
317 BL racecallatomic<>(SB)
318 MOVD add+8(FP), R0 // convert fetch_add to add_fetch
319 MOVD ret+16(FP), R1
320 ADD R0, R1, R0
321 MOVD R0, ret+16(FP)
322 RET
323
324 TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
325 GO_ARGS
326 JMP sync∕atomic·AddInt32(SB)
327
328 TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
329 GO_ARGS
330 JMP sync∕atomic·AddInt64(SB)
331
332 TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
333 GO_ARGS
334 JMP sync∕atomic·AddInt64(SB)
335
336 // CompareAndSwap
337 TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
338 GO_ARGS
339 MOVD $__tsan_go_atomic32_compare_exchange(SB), R9
340 BL racecallatomic<>(SB)
341 RET
342
343 TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
344 GO_ARGS
345 MOVD $__tsan_go_atomic64_compare_exchange(SB), R9
346 BL racecallatomic<>(SB)
347 RET
348
349 TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
350 GO_ARGS
351 JMP sync∕atomic·CompareAndSwapInt32(SB)
352
353 TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
354 GO_ARGS
355 JMP sync∕atomic·CompareAndSwapInt64(SB)
356
357 TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
358 GO_ARGS
359 JMP sync∕atomic·CompareAndSwapInt64(SB)
360
361 // Generic atomic operation implementation.
362 // R9 = addr of target function
363 TEXT racecallatomic<>(SB), NOSPLIT, $0
364 // Set up these registers
365 // R0 = *ThreadState
366 // R1 = caller pc
367 // R2 = pc
368 // R3 = addr of incoming arg list
369
370 // Trigger SIGSEGV early.
371 MOVD 40(RSP), R3 // 1st arg is addr. after two times BL, get it at 40(RSP)
372 MOVD (R3), R13 // segv here if addr is bad
373 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
374 MOVD runtime·racearenastart(SB), R10
375 CMP R10, R3
376 BLT racecallatomic_data
377 MOVD runtime·racearenaend(SB), R10
378 CMP R10, R3
379 BLT racecallatomic_ok
380 racecallatomic_data:
381 MOVD runtime·racedatastart(SB), R10
382 CMP R10, R3
383 BLT racecallatomic_ignore
384 MOVD runtime·racedataend(SB), R10
385 CMP R10, R3
386 BGE racecallatomic_ignore
387 racecallatomic_ok:
388 // Addr is within the good range, call the atomic function.
389 load_g
390 MOVD g_racectx(g), R0 // goroutine context
391 MOVD 16(RSP), R1 // caller pc
392 MOVD R9, R2 // pc
393 ADD $40, RSP, R3
394 JMP racecall<>(SB) // does not return
395 racecallatomic_ignore:
396 // Addr is outside the good range.
397 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
398 // An attempt to synchronize on the address would cause crash.
399 MOVD R9, R21 // remember the original function
400 MOVD $__tsan_go_ignore_sync_begin(SB), R9
401 load_g
402 MOVD g_racectx(g), R0 // goroutine context
403 BL racecall<>(SB)
404 MOVD R21, R9 // restore the original function
405 // Call the atomic function.
406 // racecall will call LLVM race code which might clobber R28 (g)
407 load_g
408 MOVD g_racectx(g), R0 // goroutine context
409 MOVD 16(RSP), R1 // caller pc
410 MOVD R9, R2 // pc
411 ADD $40, RSP, R3 // arguments
412 BL racecall<>(SB)
413 // Call __tsan_go_ignore_sync_end.
414 MOVD $__tsan_go_ignore_sync_end(SB), R9
415 MOVD g_racectx(g), R0 // goroutine context
416 BL racecall<>(SB)
417 RET
418
419 // func runtime·racecall(void(*f)(...), ...)
420 // Calls C function f from race runtime and passes up to 4 arguments to it.
421 // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
422 TEXT runtime·racecall(SB), NOSPLIT, $0-0
423 MOVD fn+0(FP), R9
424 MOVD arg0+8(FP), R0
425 MOVD arg1+16(FP), R1
426 MOVD arg2+24(FP), R2
427 MOVD arg3+32(FP), R3
428 JMP racecall<>(SB)
429
430 // Switches SP to g0 stack and calls (R9). Arguments already set.
431 // Clobbers R19, R20.
432 TEXT racecall<>(SB), NOSPLIT|NOFRAME, $0-0
433 MOVD g_m(g), R10
434 // Switch to g0 stack.
435 MOVD RSP, R19 // callee-saved, preserved across the CALL
436 MOVD R30, R20 // callee-saved, preserved across the CALL
437 MOVD m_g0(R10), R11
438 CMP R11, g
439 BEQ call // already on g0
440 MOVD (g_sched+gobuf_sp)(R11), R12
441 MOVD R12, RSP
442 call:
443 BL R9
444 MOVD R19, RSP
445 JMP (R20)
446
447 // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
448 // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
449 // The overall effect of Go->C->Go call chain is similar to that of mcall.
450 // R0 contains command code. R1 contains command-specific context.
451 // See racecallback for command codes.
452 TEXT runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
453 // Handle command raceGetProcCmd (0) here.
454 // First, code below assumes that we are on curg, while raceGetProcCmd
455 // can be executed on g0. Second, it is called frequently, so will
456 // benefit from this fast path.
457 CBNZ R0, rest
458 MOVD g, R13
459 #ifdef TLS_darwin
460 MOVD R27, R12 // save R27 a.k.a. REGTMP (callee-save in C). load_g clobbers it
461 #endif
462 load_g
463 #ifdef TLS_darwin
464 MOVD R12, R27
465 #endif
466 MOVD g_m(g), R0
467 MOVD m_p(R0), R0
468 MOVD p_raceprocctx(R0), R0
469 MOVD R0, (R1)
470 MOVD R13, g
471 JMP (LR)
472 rest:
473 // Save callee-saved registers (Go code won't respect that).
474 // 8(RSP) and 16(RSP) are for args passed through racecallback
475 SUB $112, RSP
476 MOVD LR, 0(RSP)
477 STP (R19, R20), 24(RSP)
478 STP (R21, R22), 40(RSP)
479 STP (R23, R24), 56(RSP)
480 STP (R25, R26), 72(RSP)
481 STP (R27, g), 88(RSP)
482 // Set g = g0.
483 // load_g will clobber R0, Save R0
484 MOVD R0, R13
485 load_g
486 // restore R0
487 MOVD R13, R0
488 MOVD g_m(g), R13
489 MOVD m_g0(R13), R14
490 CMP R14, g
491 BEQ noswitch // branch if already on g0
492 MOVD R14, g
493
494 MOVD R0, 8(RSP) // func arg
495 MOVD R1, 16(RSP) // func arg
496 BL runtime·racecallback(SB)
497
498 // All registers are smashed after Go code, reload.
499 MOVD g_m(g), R13
500 MOVD m_curg(R13), g // g = m->curg
501 ret:
502 // Restore callee-saved registers.
503 MOVD 0(RSP), LR
504 LDP 24(RSP), (R19, R20)
505 LDP 40(RSP), (R21, R22)
506 LDP 56(RSP), (R23, R24)
507 LDP 72(RSP), (R25, R26)
508 LDP 88(RSP), (R27, g)
509 ADD $112, RSP
510 JMP (LR)
511
512 noswitch:
513 // already on g0
514 MOVD R0, 8(RSP) // func arg
515 MOVD R1, 16(RSP) // func arg
516 BL runtime·racecallback(SB)
517 JMP ret
518
519 #ifndef TLSG_IS_VARIABLE
520 // tls_g, g value for each thread in TLS
521 GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8
522 #endif
523
View as plain text