Text file src/runtime/race_amd64.s

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build race
     6  
     7  #include "go_asm.h"
     8  #include "go_tls.h"
     9  #include "funcdata.h"
    10  #include "textflag.h"
    11  #include "cgo/abi_amd64.h"
    12  
    13  // The following thunks allow calling the gcc-compiled race runtime directly
    14  // from Go code without going all the way through cgo.
    15  // First, it's much faster (up to 50% speedup for real Go programs).
    16  // Second, it eliminates race-related special cases from cgocall and scheduler.
    17  // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
    18  
    19  // A brief recap of the amd64 calling convention.
    20  // Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack.
    21  // Callee-saved registers are: BX, BP, R12-R15.
    22  // SP must be 16-byte aligned.
    23  // On Windows:
    24  // Arguments are passed in CX, DX, R8, R9, the rest is on stack.
    25  // Callee-saved registers are: BX, BP, DI, SI, R12-R15.
    26  // SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments:
    27  // https://msdn.microsoft.com/en-us/library/ms235286.aspx
    28  // We do not do this, because it seems to be intended for vararg/unprototyped functions.
    29  // Gcc-compiled race runtime does not try to use that space.
    30  
    31  #ifdef GOOS_windows
    32  #define RARG0 CX
    33  #define RARG1 DX
    34  #define RARG2 R8
    35  #define RARG3 R9
    36  #else
    37  #define RARG0 DI
    38  #define RARG1 SI
    39  #define RARG2 DX
    40  #define RARG3 CX
    41  #endif
    42  
    43  // func runtime·raceread(addr uintptr)
    44  // Called from instrumented code.
    45  // Defined as ABIInternal so as to avoid introducing a wrapper,
    46  // which would render runtime.getcallerpc ineffective.
    47  TEXT	runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
    48  	MOVQ	AX, RARG1
    49  	MOVQ	(SP), RARG2
    50  	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
    51  	MOVQ	$__tsan_read(SB), AX
    52  	JMP	racecalladdr<>(SB)
    53  
    54  // func runtime·RaceRead(addr uintptr)
    55  TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
    56  	// This needs to be a tail call, because raceread reads caller pc.
    57  	JMP	runtime·raceread(SB)
    58  
    59  // void runtime·racereadpc(void *addr, void *callpc, void *pc)
    60  TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
    61  	MOVQ	addr+0(FP), RARG1
    62  	MOVQ	callpc+8(FP), RARG2
    63  	MOVQ	pc+16(FP), RARG3
    64  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
    65  	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    66  	MOVQ	$__tsan_read_pc(SB), AX
    67  	JMP	racecalladdr<>(SB)
    68  
    69  // func runtime·racewrite(addr uintptr)
    70  // Called from instrumented code.
    71  // Defined as ABIInternal so as to avoid introducing a wrapper,
    72  // which would render runtime.getcallerpc ineffective.
    73  TEXT	runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
    74  	MOVQ	AX, RARG1
    75  	MOVQ	(SP), RARG2
    76  	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
    77  	MOVQ	$__tsan_write(SB), AX
    78  	JMP	racecalladdr<>(SB)
    79  
    80  // func runtime·RaceWrite(addr uintptr)
    81  TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
    82  	// This needs to be a tail call, because racewrite reads caller pc.
    83  	JMP	runtime·racewrite(SB)
    84  
    85  // void runtime·racewritepc(void *addr, void *callpc, void *pc)
    86  TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
    87  	MOVQ	addr+0(FP), RARG1
    88  	MOVQ	callpc+8(FP), RARG2
    89  	MOVQ	pc+16(FP), RARG3
    90  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
    91  	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    92  	MOVQ	$__tsan_write_pc(SB), AX
    93  	JMP	racecalladdr<>(SB)
    94  
    95  // func runtime·racereadrange(addr, size uintptr)
    96  // Called from instrumented code.
    97  TEXT	runtime·racereadrange(SB), NOSPLIT, $0-16
    98  	MOVQ	addr+0(FP), RARG1
    99  	MOVQ	size+8(FP), RARG2
   100  	MOVQ	(SP), RARG3
   101  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   102  	MOVQ	$__tsan_read_range(SB), AX
   103  	JMP	racecalladdr<>(SB)
   104  
   105  // func runtime·RaceReadRange(addr, size uintptr)
   106  TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
   107  	// This needs to be a tail call, because racereadrange reads caller pc.
   108  	JMP	runtime·racereadrange(SB)
   109  
   110  // void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
   111  TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
   112  	MOVQ	addr+0(FP), RARG1
   113  	MOVQ	size+8(FP), RARG2
   114  	MOVQ	pc+16(FP), RARG3
   115  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
   116  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   117  	MOVQ	$__tsan_read_range(SB), AX
   118  	JMP	racecalladdr<>(SB)
   119  
   120  // func runtime·racewriterange(addr, size uintptr)
   121  // Called from instrumented code.
   122  // Defined as ABIInternal so as to avoid introducing a wrapper,
   123  // which would render runtime.getcallerpc ineffective.
   124  TEXT	runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
   125  	MOVQ	AX, RARG1
   126  	MOVQ	BX, RARG2
   127  	MOVQ	(SP), RARG3
   128  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   129  	MOVQ	$__tsan_write_range(SB), AX
   130  	JMP	racecalladdr<>(SB)
   131  
   132  // func runtime·RaceWriteRange(addr, size uintptr)
   133  TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
   134  	// This needs to be a tail call, because racewriterange reads caller pc.
   135  	JMP	runtime·racewriterange(SB)
   136  
   137  // void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
   138  TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
   139  	MOVQ	addr+0(FP), RARG1
   140  	MOVQ	size+8(FP), RARG2
   141  	MOVQ	pc+16(FP), RARG3
   142  	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
   143  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   144  	MOVQ	$__tsan_write_range(SB), AX
   145  	JMP	racecalladdr<>(SB)
   146  
   147  // If addr (RARG1) is out of range, do nothing.
   148  // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
   149  TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
   150  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   151  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   152  	CMPQ	RARG1, runtime·racearenastart(SB)
   153  	JB	data
   154  	CMPQ	RARG1, runtime·racearenaend(SB)
   155  	JB	call
   156  data:
   157  	CMPQ	RARG1, runtime·racedatastart(SB)
   158  	JB	ret
   159  	CMPQ	RARG1, runtime·racedataend(SB)
   160  	JAE	ret
   161  call:
   162  	MOVQ	AX, AX		// w/o this 6a miscompiles this function
   163  	JMP	racecall<>(SB)
   164  ret:
   165  	RET
   166  
   167  // func runtime·racefuncenter(pc uintptr)
   168  // Called from instrumented code.
   169  TEXT	runtime·racefuncenter(SB), NOSPLIT, $0-8
   170  	MOVQ	callpc+0(FP), R11
   171  	JMP	racefuncenter<>(SB)
   172  
   173  // Common code for racefuncenter
   174  // R11 = caller's return address
   175  TEXT	racefuncenter<>(SB), NOSPLIT, $0-0
   176  	MOVQ	DX, BX		// save function entry context (for closures)
   177  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   178  	MOVQ	R11, RARG1
   179  	// void __tsan_func_enter(ThreadState *thr, void *pc);
   180  	MOVQ	$__tsan_func_enter(SB), AX
   181  	// racecall<> preserves BX
   182  	CALL	racecall<>(SB)
   183  	MOVQ	BX, DX	// restore function entry context
   184  	RET
   185  
   186  // func runtime·racefuncexit()
   187  // Called from instrumented code.
   188  TEXT	runtime·racefuncexit(SB), NOSPLIT, $0-0
   189  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   190  	// void __tsan_func_exit(ThreadState *thr);
   191  	MOVQ	$__tsan_func_exit(SB), AX
   192  	JMP	racecall<>(SB)
   193  
   194  // Atomic operations for sync/atomic package.
   195  
   196  // Load
   197  TEXT	sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12
   198  	GO_ARGS
   199  	MOVQ	$__tsan_go_atomic32_load(SB), AX
   200  	CALL	racecallatomic<>(SB)
   201  	RET
   202  
   203  TEXT	sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16
   204  	GO_ARGS
   205  	MOVQ	$__tsan_go_atomic64_load(SB), AX
   206  	CALL	racecallatomic<>(SB)
   207  	RET
   208  
   209  TEXT	sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
   210  	GO_ARGS
   211  	JMP	sync∕atomic·LoadInt32(SB)
   212  
   213  TEXT	sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
   214  	GO_ARGS
   215  	JMP	sync∕atomic·LoadInt64(SB)
   216  
   217  TEXT	sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
   218  	GO_ARGS
   219  	JMP	sync∕atomic·LoadInt64(SB)
   220  
   221  TEXT	sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
   222  	GO_ARGS
   223  	JMP	sync∕atomic·LoadInt64(SB)
   224  
   225  // Store
   226  TEXT	sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12
   227  	GO_ARGS
   228  	MOVQ	$__tsan_go_atomic32_store(SB), AX
   229  	CALL	racecallatomic<>(SB)
   230  	RET
   231  
   232  TEXT	sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16
   233  	GO_ARGS
   234  	MOVQ	$__tsan_go_atomic64_store(SB), AX
   235  	CALL	racecallatomic<>(SB)
   236  	RET
   237  
   238  TEXT	sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
   239  	GO_ARGS
   240  	JMP	sync∕atomic·StoreInt32(SB)
   241  
   242  TEXT	sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
   243  	GO_ARGS
   244  	JMP	sync∕atomic·StoreInt64(SB)
   245  
   246  TEXT	sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
   247  	GO_ARGS
   248  	JMP	sync∕atomic·StoreInt64(SB)
   249  
   250  // Swap
   251  TEXT	sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20
   252  	GO_ARGS
   253  	MOVQ	$__tsan_go_atomic32_exchange(SB), AX
   254  	CALL	racecallatomic<>(SB)
   255  	RET
   256  
   257  TEXT	sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24
   258  	GO_ARGS
   259  	MOVQ	$__tsan_go_atomic64_exchange(SB), AX
   260  	CALL	racecallatomic<>(SB)
   261  	RET
   262  
   263  TEXT	sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
   264  	GO_ARGS
   265  	JMP	sync∕atomic·SwapInt32(SB)
   266  
   267  TEXT	sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
   268  	GO_ARGS
   269  	JMP	sync∕atomic·SwapInt64(SB)
   270  
   271  TEXT	sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
   272  	GO_ARGS
   273  	JMP	sync∕atomic·SwapInt64(SB)
   274  
   275  // Add
   276  TEXT	sync∕atomic·AddInt32(SB), NOSPLIT, $0-20
   277  	GO_ARGS
   278  	MOVQ	$__tsan_go_atomic32_fetch_add(SB), AX
   279  	CALL	racecallatomic<>(SB)
   280  	MOVL	add+8(FP), AX	// convert fetch_add to add_fetch
   281  	ADDL	AX, ret+16(FP)
   282  	RET
   283  
   284  TEXT	sync∕atomic·AddInt64(SB), NOSPLIT, $0-24
   285  	GO_ARGS
   286  	MOVQ	$__tsan_go_atomic64_fetch_add(SB), AX
   287  	CALL	racecallatomic<>(SB)
   288  	MOVQ	add+8(FP), AX	// convert fetch_add to add_fetch
   289  	ADDQ	AX, ret+16(FP)
   290  	RET
   291  
   292  TEXT	sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
   293  	GO_ARGS
   294  	JMP	sync∕atomic·AddInt32(SB)
   295  
   296  TEXT	sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
   297  	GO_ARGS
   298  	JMP	sync∕atomic·AddInt64(SB)
   299  
   300  TEXT	sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
   301  	GO_ARGS
   302  	JMP	sync∕atomic·AddInt64(SB)
   303  
   304  // CompareAndSwap
   305  TEXT	sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
   306  	GO_ARGS
   307  	MOVQ	$__tsan_go_atomic32_compare_exchange(SB), AX
   308  	CALL	racecallatomic<>(SB)
   309  	RET
   310  
   311  TEXT	sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
   312  	GO_ARGS
   313  	MOVQ	$__tsan_go_atomic64_compare_exchange(SB), AX
   314  	CALL	racecallatomic<>(SB)
   315  	RET
   316  
   317  TEXT	sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
   318  	GO_ARGS
   319  	JMP	sync∕atomic·CompareAndSwapInt32(SB)
   320  
   321  TEXT	sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
   322  	GO_ARGS
   323  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   324  
   325  TEXT	sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
   326  	GO_ARGS
   327  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   328  
   329  // Generic atomic operation implementation.
   330  // AX already contains target function.
   331  TEXT	racecallatomic<>(SB), NOSPLIT, $0-0
   332  	// Trigger SIGSEGV early.
   333  	MOVQ	16(SP), R12
   334  	MOVL	(R12), R13
   335  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   336  	CMPQ	R12, runtime·racearenastart(SB)
   337  	JB	racecallatomic_data
   338  	CMPQ	R12, runtime·racearenaend(SB)
   339  	JB	racecallatomic_ok
   340  racecallatomic_data:
   341  	CMPQ	R12, runtime·racedatastart(SB)
   342  	JB	racecallatomic_ignore
   343  	CMPQ	R12, runtime·racedataend(SB)
   344  	JAE	racecallatomic_ignore
   345  racecallatomic_ok:
   346  	// Addr is within the good range, call the atomic function.
   347  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   348  	MOVQ	8(SP), RARG1	// caller pc
   349  	MOVQ	(SP), RARG2	// pc
   350  	LEAQ	16(SP), RARG3	// arguments
   351  	JMP	racecall<>(SB)	// does not return
   352  racecallatomic_ignore:
   353  	// Addr is outside the good range.
   354  	// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
   355  	// An attempt to synchronize on the address would cause crash.
   356  	MOVQ	AX, BX	// remember the original function
   357  	MOVQ	$__tsan_go_ignore_sync_begin(SB), AX
   358  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   359  	CALL	racecall<>(SB)
   360  	MOVQ	BX, AX	// restore the original function
   361  	// Call the atomic function.
   362  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   363  	MOVQ	8(SP), RARG1	// caller pc
   364  	MOVQ	(SP), RARG2	// pc
   365  	LEAQ	16(SP), RARG3	// arguments
   366  	CALL	racecall<>(SB)
   367  	// Call __tsan_go_ignore_sync_end.
   368  	MOVQ	$__tsan_go_ignore_sync_end(SB), AX
   369  	MOVQ	g_racectx(R14), RARG0	// goroutine context
   370  	JMP	racecall<>(SB)
   371  
   372  // void runtime·racecall(void(*f)(...), ...)
   373  // Calls C function f from race runtime and passes up to 4 arguments to it.
   374  // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
   375  TEXT	runtime·racecall(SB), NOSPLIT, $0-0
   376  	MOVQ	fn+0(FP), AX
   377  	MOVQ	arg0+8(FP), RARG0
   378  	MOVQ	arg1+16(FP), RARG1
   379  	MOVQ	arg2+24(FP), RARG2
   380  	MOVQ	arg3+32(FP), RARG3
   381  	JMP	racecall<>(SB)
   382  
   383  // Switches SP to g0 stack and calls (AX). Arguments already set.
   384  TEXT	racecall<>(SB), NOSPLIT, $0-0
   385  	MOVQ	g_m(R14), R13
   386  	// Switch to g0 stack.
   387  	MOVQ	SP, R12		// callee-saved, preserved across the CALL
   388  	MOVQ	m_g0(R13), R10
   389  	CMPQ	R10, R14
   390  	JE	call	// already on g0
   391  	MOVQ	(g_sched+gobuf_sp)(R10), SP
   392  call:
   393  	ANDQ	$~15, SP	// alignment for gcc ABI
   394  	CALL	AX
   395  	MOVQ	R12, SP
   396  	// Back to Go world, set special registers.
   397  	// The g register (R14) is preserved in C.
   398  	XORPS	X15, X15
   399  	RET
   400  
   401  // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
   402  // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
   403  // The overall effect of Go->C->Go call chain is similar to that of mcall.
   404  // RARG0 contains command code. RARG1 contains command-specific context.
   405  // See racecallback for command codes.
   406  TEXT	runtime·racecallbackthunk(SB), NOSPLIT, $0-0
   407  	// Handle command raceGetProcCmd (0) here.
   408  	// First, code below assumes that we are on curg, while raceGetProcCmd
   409  	// can be executed on g0. Second, it is called frequently, so will
   410  	// benefit from this fast path.
   411  	CMPQ	RARG0, $0
   412  	JNE	rest
   413  	get_tls(RARG0)
   414  	MOVQ	g(RARG0), RARG0
   415  	MOVQ	g_m(RARG0), RARG0
   416  	MOVQ	m_p(RARG0), RARG0
   417  	MOVQ	p_raceprocctx(RARG0), RARG0
   418  	MOVQ	RARG0, (RARG1)
   419  	RET
   420  
   421  rest:
   422  	// Transition from C ABI to Go ABI.
   423  	PUSH_REGS_HOST_TO_ABI0()
   424  	// Set g = g0.
   425  	get_tls(R12)
   426  	MOVQ	g(R12), R14
   427  	MOVQ	g_m(R14), R13
   428  	MOVQ	m_g0(R13), R15
   429  	CMPQ	R13, R15
   430  	JEQ	noswitch	// branch if already on g0
   431  	MOVQ	R15, g(R12)	// g = m->g0
   432  	MOVQ	R15, R14	// set g register
   433  	PUSHQ	RARG1	// func arg
   434  	PUSHQ	RARG0	// func arg
   435  	CALL	runtime·racecallback(SB)
   436  	POPQ	R12
   437  	POPQ	R12
   438  	// All registers are smashed after Go code, reload.
   439  	get_tls(R12)
   440  	MOVQ	g(R12), R13
   441  	MOVQ	g_m(R13), R13
   442  	MOVQ	m_curg(R13), R14
   443  	MOVQ	R14, g(R12)	// g = m->curg
   444  ret:
   445  	POP_REGS_HOST_TO_ABI0()
   446  	RET
   447  
   448  noswitch:
   449  	// already on g0
   450  	PUSHQ	RARG1	// func arg
   451  	PUSHQ	RARG0	// func arg
   452  	CALL	runtime·racecallback(SB)
   453  	POPQ	R12
   454  	POPQ	R12
   455  	JMP	ret
   456  

View as plain text