Text file
src/runtime/race_amd64.s
1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build race
6
7 #include "go_asm.h"
8 #include "go_tls.h"
9 #include "funcdata.h"
10 #include "textflag.h"
11 #include "cgo/abi_amd64.h"
12
13 // The following thunks allow calling the gcc-compiled race runtime directly
14 // from Go code without going all the way through cgo.
15 // First, it's much faster (up to 50% speedup for real Go programs).
16 // Second, it eliminates race-related special cases from cgocall and scheduler.
17 // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
18
19 // A brief recap of the amd64 calling convention.
20 // Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack.
21 // Callee-saved registers are: BX, BP, R12-R15.
22 // SP must be 16-byte aligned.
23 // On Windows:
24 // Arguments are passed in CX, DX, R8, R9, the rest is on stack.
25 // Callee-saved registers are: BX, BP, DI, SI, R12-R15.
26 // SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments:
27 // https://msdn.microsoft.com/en-us/library/ms235286.aspx
28 // We do not do this, because it seems to be intended for vararg/unprototyped functions.
29 // Gcc-compiled race runtime does not try to use that space.
30
31 #ifdef GOOS_windows
32 #define RARG0 CX
33 #define RARG1 DX
34 #define RARG2 R8
35 #define RARG3 R9
36 #else
37 #define RARG0 DI
38 #define RARG1 SI
39 #define RARG2 DX
40 #define RARG3 CX
41 #endif
42
43 // func runtime·raceread(addr uintptr)
44 // Called from instrumented code.
45 // Defined as ABIInternal so as to avoid introducing a wrapper,
46 // which would render runtime.getcallerpc ineffective.
47 TEXT runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
48 MOVQ AX, RARG1
49 MOVQ (SP), RARG2
50 // void __tsan_read(ThreadState *thr, void *addr, void *pc);
51 MOVQ $__tsan_read(SB), AX
52 JMP racecalladdr<>(SB)
53
54 // func runtime·RaceRead(addr uintptr)
55 TEXT runtime·RaceRead(SB), NOSPLIT, $0-8
56 // This needs to be a tail call, because raceread reads caller pc.
57 JMP runtime·raceread(SB)
58
59 // void runtime·racereadpc(void *addr, void *callpc, void *pc)
60 TEXT runtime·racereadpc(SB), NOSPLIT, $0-24
61 MOVQ addr+0(FP), RARG1
62 MOVQ callpc+8(FP), RARG2
63 MOVQ pc+16(FP), RARG3
64 ADDQ $1, RARG3 // pc is function start, tsan wants return address
65 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
66 MOVQ $__tsan_read_pc(SB), AX
67 JMP racecalladdr<>(SB)
68
69 // func runtime·racewrite(addr uintptr)
70 // Called from instrumented code.
71 // Defined as ABIInternal so as to avoid introducing a wrapper,
72 // which would render runtime.getcallerpc ineffective.
73 TEXT runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
74 MOVQ AX, RARG1
75 MOVQ (SP), RARG2
76 // void __tsan_write(ThreadState *thr, void *addr, void *pc);
77 MOVQ $__tsan_write(SB), AX
78 JMP racecalladdr<>(SB)
79
80 // func runtime·RaceWrite(addr uintptr)
81 TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8
82 // This needs to be a tail call, because racewrite reads caller pc.
83 JMP runtime·racewrite(SB)
84
85 // void runtime·racewritepc(void *addr, void *callpc, void *pc)
86 TEXT runtime·racewritepc(SB), NOSPLIT, $0-24
87 MOVQ addr+0(FP), RARG1
88 MOVQ callpc+8(FP), RARG2
89 MOVQ pc+16(FP), RARG3
90 ADDQ $1, RARG3 // pc is function start, tsan wants return address
91 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
92 MOVQ $__tsan_write_pc(SB), AX
93 JMP racecalladdr<>(SB)
94
95 // func runtime·racereadrange(addr, size uintptr)
96 // Called from instrumented code.
97 TEXT runtime·racereadrange(SB), NOSPLIT, $0-16
98 MOVQ addr+0(FP), RARG1
99 MOVQ size+8(FP), RARG2
100 MOVQ (SP), RARG3
101 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
102 MOVQ $__tsan_read_range(SB), AX
103 JMP racecalladdr<>(SB)
104
105 // func runtime·RaceReadRange(addr, size uintptr)
106 TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16
107 // This needs to be a tail call, because racereadrange reads caller pc.
108 JMP runtime·racereadrange(SB)
109
110 // void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
111 TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24
112 MOVQ addr+0(FP), RARG1
113 MOVQ size+8(FP), RARG2
114 MOVQ pc+16(FP), RARG3
115 ADDQ $1, RARG3 // pc is function start, tsan wants return address
116 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
117 MOVQ $__tsan_read_range(SB), AX
118 JMP racecalladdr<>(SB)
119
120 // func runtime·racewriterange(addr, size uintptr)
121 // Called from instrumented code.
122 // Defined as ABIInternal so as to avoid introducing a wrapper,
123 // which would render runtime.getcallerpc ineffective.
124 TEXT runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
125 MOVQ AX, RARG1
126 MOVQ BX, RARG2
127 MOVQ (SP), RARG3
128 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
129 MOVQ $__tsan_write_range(SB), AX
130 JMP racecalladdr<>(SB)
131
132 // func runtime·RaceWriteRange(addr, size uintptr)
133 TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16
134 // This needs to be a tail call, because racewriterange reads caller pc.
135 JMP runtime·racewriterange(SB)
136
137 // void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
138 TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24
139 MOVQ addr+0(FP), RARG1
140 MOVQ size+8(FP), RARG2
141 MOVQ pc+16(FP), RARG3
142 ADDQ $1, RARG3 // pc is function start, tsan wants return address
143 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
144 MOVQ $__tsan_write_range(SB), AX
145 JMP racecalladdr<>(SB)
146
147 // If addr (RARG1) is out of range, do nothing.
148 // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
149 TEXT racecalladdr<>(SB), NOSPLIT, $0-0
150 MOVQ g_racectx(R14), RARG0 // goroutine context
151 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
152 CMPQ RARG1, runtime·racearenastart(SB)
153 JB data
154 CMPQ RARG1, runtime·racearenaend(SB)
155 JB call
156 data:
157 CMPQ RARG1, runtime·racedatastart(SB)
158 JB ret
159 CMPQ RARG1, runtime·racedataend(SB)
160 JAE ret
161 call:
162 MOVQ AX, AX // w/o this 6a miscompiles this function
163 JMP racecall<>(SB)
164 ret:
165 RET
166
167 // func runtime·racefuncenter(pc uintptr)
168 // Called from instrumented code.
169 TEXT runtime·racefuncenter(SB), NOSPLIT, $0-8
170 MOVQ callpc+0(FP), R11
171 JMP racefuncenter<>(SB)
172
173 // Common code for racefuncenter
174 // R11 = caller's return address
175 TEXT racefuncenter<>(SB), NOSPLIT, $0-0
176 MOVQ DX, BX // save function entry context (for closures)
177 MOVQ g_racectx(R14), RARG0 // goroutine context
178 MOVQ R11, RARG1
179 // void __tsan_func_enter(ThreadState *thr, void *pc);
180 MOVQ $__tsan_func_enter(SB), AX
181 // racecall<> preserves BX
182 CALL racecall<>(SB)
183 MOVQ BX, DX // restore function entry context
184 RET
185
186 // func runtime·racefuncexit()
187 // Called from instrumented code.
188 TEXT runtime·racefuncexit(SB), NOSPLIT, $0-0
189 MOVQ g_racectx(R14), RARG0 // goroutine context
190 // void __tsan_func_exit(ThreadState *thr);
191 MOVQ $__tsan_func_exit(SB), AX
192 JMP racecall<>(SB)
193
194 // Atomic operations for sync/atomic package.
195
196 // Load
197 TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12
198 GO_ARGS
199 MOVQ $__tsan_go_atomic32_load(SB), AX
200 CALL racecallatomic<>(SB)
201 RET
202
203 TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16
204 GO_ARGS
205 MOVQ $__tsan_go_atomic64_load(SB), AX
206 CALL racecallatomic<>(SB)
207 RET
208
209 TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
210 GO_ARGS
211 JMP sync∕atomic·LoadInt32(SB)
212
213 TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
214 GO_ARGS
215 JMP sync∕atomic·LoadInt64(SB)
216
217 TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
218 GO_ARGS
219 JMP sync∕atomic·LoadInt64(SB)
220
221 TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
222 GO_ARGS
223 JMP sync∕atomic·LoadInt64(SB)
224
225 // Store
226 TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12
227 GO_ARGS
228 MOVQ $__tsan_go_atomic32_store(SB), AX
229 CALL racecallatomic<>(SB)
230 RET
231
232 TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16
233 GO_ARGS
234 MOVQ $__tsan_go_atomic64_store(SB), AX
235 CALL racecallatomic<>(SB)
236 RET
237
238 TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
239 GO_ARGS
240 JMP sync∕atomic·StoreInt32(SB)
241
242 TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
243 GO_ARGS
244 JMP sync∕atomic·StoreInt64(SB)
245
246 TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
247 GO_ARGS
248 JMP sync∕atomic·StoreInt64(SB)
249
250 // Swap
251 TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20
252 GO_ARGS
253 MOVQ $__tsan_go_atomic32_exchange(SB), AX
254 CALL racecallatomic<>(SB)
255 RET
256
257 TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24
258 GO_ARGS
259 MOVQ $__tsan_go_atomic64_exchange(SB), AX
260 CALL racecallatomic<>(SB)
261 RET
262
263 TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
264 GO_ARGS
265 JMP sync∕atomic·SwapInt32(SB)
266
267 TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
268 GO_ARGS
269 JMP sync∕atomic·SwapInt64(SB)
270
271 TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
272 GO_ARGS
273 JMP sync∕atomic·SwapInt64(SB)
274
275 // Add
276 TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-20
277 GO_ARGS
278 MOVQ $__tsan_go_atomic32_fetch_add(SB), AX
279 CALL racecallatomic<>(SB)
280 MOVL add+8(FP), AX // convert fetch_add to add_fetch
281 ADDL AX, ret+16(FP)
282 RET
283
284 TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-24
285 GO_ARGS
286 MOVQ $__tsan_go_atomic64_fetch_add(SB), AX
287 CALL racecallatomic<>(SB)
288 MOVQ add+8(FP), AX // convert fetch_add to add_fetch
289 ADDQ AX, ret+16(FP)
290 RET
291
292 TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
293 GO_ARGS
294 JMP sync∕atomic·AddInt32(SB)
295
296 TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
297 GO_ARGS
298 JMP sync∕atomic·AddInt64(SB)
299
300 TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
301 GO_ARGS
302 JMP sync∕atomic·AddInt64(SB)
303
304 // CompareAndSwap
305 TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
306 GO_ARGS
307 MOVQ $__tsan_go_atomic32_compare_exchange(SB), AX
308 CALL racecallatomic<>(SB)
309 RET
310
311 TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
312 GO_ARGS
313 MOVQ $__tsan_go_atomic64_compare_exchange(SB), AX
314 CALL racecallatomic<>(SB)
315 RET
316
317 TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
318 GO_ARGS
319 JMP sync∕atomic·CompareAndSwapInt32(SB)
320
321 TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
322 GO_ARGS
323 JMP sync∕atomic·CompareAndSwapInt64(SB)
324
325 TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
326 GO_ARGS
327 JMP sync∕atomic·CompareAndSwapInt64(SB)
328
329 // Generic atomic operation implementation.
330 // AX already contains target function.
331 TEXT racecallatomic<>(SB), NOSPLIT, $0-0
332 // Trigger SIGSEGV early.
333 MOVQ 16(SP), R12
334 MOVL (R12), R13
335 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
336 CMPQ R12, runtime·racearenastart(SB)
337 JB racecallatomic_data
338 CMPQ R12, runtime·racearenaend(SB)
339 JB racecallatomic_ok
340 racecallatomic_data:
341 CMPQ R12, runtime·racedatastart(SB)
342 JB racecallatomic_ignore
343 CMPQ R12, runtime·racedataend(SB)
344 JAE racecallatomic_ignore
345 racecallatomic_ok:
346 // Addr is within the good range, call the atomic function.
347 MOVQ g_racectx(R14), RARG0 // goroutine context
348 MOVQ 8(SP), RARG1 // caller pc
349 MOVQ (SP), RARG2 // pc
350 LEAQ 16(SP), RARG3 // arguments
351 JMP racecall<>(SB) // does not return
352 racecallatomic_ignore:
353 // Addr is outside the good range.
354 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
355 // An attempt to synchronize on the address would cause crash.
356 MOVQ AX, BX // remember the original function
357 MOVQ $__tsan_go_ignore_sync_begin(SB), AX
358 MOVQ g_racectx(R14), RARG0 // goroutine context
359 CALL racecall<>(SB)
360 MOVQ BX, AX // restore the original function
361 // Call the atomic function.
362 MOVQ g_racectx(R14), RARG0 // goroutine context
363 MOVQ 8(SP), RARG1 // caller pc
364 MOVQ (SP), RARG2 // pc
365 LEAQ 16(SP), RARG3 // arguments
366 CALL racecall<>(SB)
367 // Call __tsan_go_ignore_sync_end.
368 MOVQ $__tsan_go_ignore_sync_end(SB), AX
369 MOVQ g_racectx(R14), RARG0 // goroutine context
370 JMP racecall<>(SB)
371
372 // void runtime·racecall(void(*f)(...), ...)
373 // Calls C function f from race runtime and passes up to 4 arguments to it.
374 // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
375 TEXT runtime·racecall(SB), NOSPLIT, $0-0
376 MOVQ fn+0(FP), AX
377 MOVQ arg0+8(FP), RARG0
378 MOVQ arg1+16(FP), RARG1
379 MOVQ arg2+24(FP), RARG2
380 MOVQ arg3+32(FP), RARG3
381 JMP racecall<>(SB)
382
383 // Switches SP to g0 stack and calls (AX). Arguments already set.
384 TEXT racecall<>(SB), NOSPLIT, $0-0
385 MOVQ g_m(R14), R13
386 // Switch to g0 stack.
387 MOVQ SP, R12 // callee-saved, preserved across the CALL
388 MOVQ m_g0(R13), R10
389 CMPQ R10, R14
390 JE call // already on g0
391 MOVQ (g_sched+gobuf_sp)(R10), SP
392 call:
393 ANDQ $~15, SP // alignment for gcc ABI
394 CALL AX
395 MOVQ R12, SP
396 // Back to Go world, set special registers.
397 // The g register (R14) is preserved in C.
398 XORPS X15, X15
399 RET
400
401 // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
402 // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
403 // The overall effect of Go->C->Go call chain is similar to that of mcall.
404 // RARG0 contains command code. RARG1 contains command-specific context.
405 // See racecallback for command codes.
406 TEXT runtime·racecallbackthunk(SB), NOSPLIT, $0-0
407 // Handle command raceGetProcCmd (0) here.
408 // First, code below assumes that we are on curg, while raceGetProcCmd
409 // can be executed on g0. Second, it is called frequently, so will
410 // benefit from this fast path.
411 CMPQ RARG0, $0
412 JNE rest
413 get_tls(RARG0)
414 MOVQ g(RARG0), RARG0
415 MOVQ g_m(RARG0), RARG0
416 MOVQ m_p(RARG0), RARG0
417 MOVQ p_raceprocctx(RARG0), RARG0
418 MOVQ RARG0, (RARG1)
419 RET
420
421 rest:
422 // Transition from C ABI to Go ABI.
423 PUSH_REGS_HOST_TO_ABI0()
424 // Set g = g0.
425 get_tls(R12)
426 MOVQ g(R12), R14
427 MOVQ g_m(R14), R13
428 MOVQ m_g0(R13), R15
429 CMPQ R13, R15
430 JEQ noswitch // branch if already on g0
431 MOVQ R15, g(R12) // g = m->g0
432 MOVQ R15, R14 // set g register
433 PUSHQ RARG1 // func arg
434 PUSHQ RARG0 // func arg
435 CALL runtime·racecallback(SB)
436 POPQ R12
437 POPQ R12
438 // All registers are smashed after Go code, reload.
439 get_tls(R12)
440 MOVQ g(R12), R13
441 MOVQ g_m(R13), R13
442 MOVQ m_curg(R13), R14
443 MOVQ R14, g(R12) // g = m->curg
444 ret:
445 POP_REGS_HOST_TO_ABI0()
446 RET
447
448 noswitch:
449 // already on g0
450 PUSHQ RARG1 // func arg
451 PUSHQ RARG0 // func arg
452 CALL runtime·racecallback(SB)
453 POPQ R12
454 POPQ R12
455 JMP ret
456
View as plain text