Text file src/runtime/asm_amd64.s

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  #include "cgo/abi_amd64.h"
    10  
    11  // _rt0_amd64 is common startup code for most amd64 systems when using
    12  // internal linking. This is the entry point for the program from the
    13  // kernel for an ordinary -buildmode=exe program. The stack holds the
    14  // number of arguments and the C-style argv.
    15  TEXT _rt0_amd64(SB),NOSPLIT,$-8
    16  	MOVQ	0(SP), DI	// argc
    17  	LEAQ	8(SP), SI	// argv
    18  	JMP	runtime·rt0_go(SB)
    19  
    20  // main is common startup code for most amd64 systems when using
    21  // external linking. The C startup code will call the symbol "main"
    22  // passing argc and argv in the usual C ABI registers DI and SI.
    23  TEXT main(SB),NOSPLIT,$-8
    24  	JMP	runtime·rt0_go(SB)
    25  
    26  // _rt0_amd64_lib is common startup code for most amd64 systems when
    27  // using -buildmode=c-archive or -buildmode=c-shared. The linker will
    28  // arrange to invoke this function as a global constructor (for
    29  // c-archive) or when the shared library is loaded (for c-shared).
    30  // We expect argc and argv to be passed in the usual C ABI registers
    31  // DI and SI.
    32  TEXT _rt0_amd64_lib(SB),NOSPLIT,$0
    33  	// Transition from C ABI to Go ABI.
    34  	PUSH_REGS_HOST_TO_ABI0()
    35  
    36  	MOVQ	DI, _rt0_amd64_lib_argc<>(SB)
    37  	MOVQ	SI, _rt0_amd64_lib_argv<>(SB)
    38  
    39  	// Synchronous initialization.
    40  	CALL	runtime·libpreinit(SB)
    41  
    42  	// Create a new thread to finish Go runtime initialization.
    43  	MOVQ	_cgo_sys_thread_create(SB), AX
    44  	TESTQ	AX, AX
    45  	JZ	nocgo
    46  
    47  	// We're calling back to C.
    48  	// Align stack per ELF ABI requirements.
    49  	MOVQ	SP, BX  // Callee-save in C ABI
    50  	ANDQ	$~15, SP
    51  	MOVQ	$_rt0_amd64_lib_go(SB), DI
    52  	MOVQ	$0, SI
    53  	CALL	AX
    54  	MOVQ	BX, SP
    55  	JMP	restore
    56  
    57  nocgo:
    58  	ADJSP	$16
    59  	MOVQ	$0x800000, 0(SP)		// stacksize
    60  	MOVQ	$_rt0_amd64_lib_go(SB), AX
    61  	MOVQ	AX, 8(SP)			// fn
    62  	CALL	runtime·newosproc0(SB)
    63  	ADJSP	$-16
    64  
    65  restore:
    66  	POP_REGS_HOST_TO_ABI0()
    67  	RET
    68  
    69  // _rt0_amd64_lib_go initializes the Go runtime.
    70  // This is started in a separate thread by _rt0_amd64_lib.
    71  TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0
    72  	MOVQ	_rt0_amd64_lib_argc<>(SB), DI
    73  	MOVQ	_rt0_amd64_lib_argv<>(SB), SI
    74  	JMP	runtime·rt0_go(SB)
    75  
    76  DATA _rt0_amd64_lib_argc<>(SB)/8, $0
    77  GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
    78  DATA _rt0_amd64_lib_argv<>(SB)/8, $0
    79  GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
    80  
    81  #ifdef GOAMD64_v2
    82  DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v2 microarchitecture support.\n"
    83  #endif
    84  
    85  #ifdef GOAMD64_v3
    86  DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v3 microarchitecture support.\n"
    87  #endif
    88  
    89  #ifdef GOAMD64_v4
    90  DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v4 microarchitecture support.\n"
    91  #endif
    92  
    93  GLOBL bad_cpu_msg<>(SB), RODATA, $84
    94  
    95  // Define a list of AMD64 microarchitecture level features
    96  // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
    97  
    98                       // SSE3     SSSE3    CMPXCHNG16 SSE4.1    SSE4.2    POPCNT
    99  #define V2_FEATURES_CX (1 << 0 | 1 << 9 | 1 << 13  | 1 << 19 | 1 << 20 | 1 << 23)
   100                           // LAHF/SAHF
   101  #define V2_EXT_FEATURES_CX (1 << 0)
   102                                        // FMA       MOVBE     OSXSAVE   AVX       F16C
   103  #define V3_FEATURES_CX (V2_FEATURES_CX | 1 << 12 | 1 << 22 | 1 << 27 | 1 << 28 | 1 << 29)
   104                                                // ABM (FOR LZNCT)
   105  #define V3_EXT_FEATURES_CX (V2_EXT_FEATURES_CX | 1 << 5)
   106                           // BMI1     AVX2     BMI2
   107  #define V3_EXT_FEATURES_BX (1 << 3 | 1 << 5 | 1 << 8)
   108                         // XMM      YMM
   109  #define V3_OS_SUPPORT_AX (1 << 1 | 1 << 2)
   110  
   111  #define V4_FEATURES_CX V3_FEATURES_CX
   112  
   113  #define V4_EXT_FEATURES_CX V3_EXT_FEATURES_CX
   114                                                // AVX512F   AVX512DQ  AVX512CD  AVX512BW  AVX512VL
   115  #define V4_EXT_FEATURES_BX (V3_EXT_FEATURES_BX | 1 << 16 | 1 << 17 | 1 << 28 | 1 << 30 | 1 << 31)
   116                                            // OPMASK   ZMM
   117  #define V4_OS_SUPPORT_AX (V3_OS_SUPPORT_AX | 1 << 5 | (1 << 6 | 1 << 7))
   118  
   119  #ifdef GOAMD64_v2
   120  #define NEED_MAX_CPUID 0x80000001
   121  #define NEED_FEATURES_CX V2_FEATURES_CX
   122  #define NEED_EXT_FEATURES_CX V2_EXT_FEATURES_CX
   123  #endif
   124  
   125  #ifdef GOAMD64_v3
   126  #define NEED_MAX_CPUID 0x80000001
   127  #define NEED_FEATURES_CX V3_FEATURES_CX
   128  #define NEED_EXT_FEATURES_CX V3_EXT_FEATURES_CX
   129  #define NEED_EXT_FEATURES_BX V3_EXT_FEATURES_BX
   130  #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
   131  #endif
   132  
   133  #ifdef GOAMD64_v4
   134  #define NEED_MAX_CPUID 0x80000001
   135  #define NEED_FEATURES_CX V4_FEATURES_CX
   136  #define NEED_EXT_FEATURES_CX V4_EXT_FEATURES_CX
   137  #define NEED_EXT_FEATURES_BX V4_EXT_FEATURES_BX
   138  
   139  // Downgrading v4 OS checks on Darwin for now, see CL 285572.
   140  #ifdef GOOS_darwin
   141  #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
   142  #else
   143  #define NEED_OS_SUPPORT_AX V4_OS_SUPPORT_AX
   144  #endif
   145  
   146  #endif
   147  
   148  TEXT runtime·rt0_go(SB),NOSPLIT|TOPFRAME,$0
   149  	// copy arguments forward on an even stack
   150  	MOVQ	DI, AX		// argc
   151  	MOVQ	SI, BX		// argv
   152  	SUBQ	$(5*8), SP		// 3args 2auto
   153  	ANDQ	$~15, SP
   154  	MOVQ	AX, 24(SP)
   155  	MOVQ	BX, 32(SP)
   156  
   157  	// create istack out of the given (operating system) stack.
   158  	// _cgo_init may update stackguard.
   159  	MOVQ	$runtime·g0(SB), DI
   160  	LEAQ	(-64*1024+104)(SP), BX
   161  	MOVQ	BX, g_stackguard0(DI)
   162  	MOVQ	BX, g_stackguard1(DI)
   163  	MOVQ	BX, (g_stack+stack_lo)(DI)
   164  	MOVQ	SP, (g_stack+stack_hi)(DI)
   165  
   166  	// find out information about the processor we're on
   167  	MOVL	$0, AX
   168  	CPUID
   169  	CMPL	AX, $0
   170  	JE	nocpuinfo
   171  
   172  	CMPL	BX, $0x756E6547  // "Genu"
   173  	JNE	notintel
   174  	CMPL	DX, $0x49656E69  // "ineI"
   175  	JNE	notintel
   176  	CMPL	CX, $0x6C65746E  // "ntel"
   177  	JNE	notintel
   178  	MOVB	$1, runtime·isIntel(SB)
   179  
   180  notintel:
   181  	// Load EAX=1 cpuid flags
   182  	MOVL	$1, AX
   183  	CPUID
   184  	MOVL	AX, runtime·processorVersionInfo(SB)
   185  
   186  nocpuinfo:
   187  	// if there is an _cgo_init, call it.
   188  	MOVQ	_cgo_init(SB), AX
   189  	TESTQ	AX, AX
   190  	JZ	needtls
   191  	// arg 1: g0, already in DI
   192  	MOVQ	$setg_gcc<>(SB), SI // arg 2: setg_gcc
   193  #ifdef GOOS_android
   194  	MOVQ	$runtime·tls_g(SB), DX 	// arg 3: &tls_g
   195  	// arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
   196  	// Compensate for tls_g (+16).
   197  	MOVQ	-16(TLS), CX
   198  #else
   199  	MOVQ	$0, DX	// arg 3, 4: not used when using platform's TLS
   200  	MOVQ	$0, CX
   201  #endif
   202  #ifdef GOOS_windows
   203  	// Adjust for the Win64 calling convention.
   204  	MOVQ	CX, R9 // arg 4
   205  	MOVQ	DX, R8 // arg 3
   206  	MOVQ	SI, DX // arg 2
   207  	MOVQ	DI, CX // arg 1
   208  #endif
   209  	CALL	AX
   210  
   211  	// update stackguard after _cgo_init
   212  	MOVQ	$runtime·g0(SB), CX
   213  	MOVQ	(g_stack+stack_lo)(CX), AX
   214  	ADDQ	$const__StackGuard, AX
   215  	MOVQ	AX, g_stackguard0(CX)
   216  	MOVQ	AX, g_stackguard1(CX)
   217  
   218  #ifndef GOOS_windows
   219  	JMP ok
   220  #endif
   221  needtls:
   222  #ifdef GOOS_plan9
   223  	// skip TLS setup on Plan 9
   224  	JMP ok
   225  #endif
   226  #ifdef GOOS_solaris
   227  	// skip TLS setup on Solaris
   228  	JMP ok
   229  #endif
   230  #ifdef GOOS_illumos
   231  	// skip TLS setup on illumos
   232  	JMP ok
   233  #endif
   234  #ifdef GOOS_darwin
   235  	// skip TLS setup on Darwin
   236  	JMP ok
   237  #endif
   238  #ifdef GOOS_openbsd
   239  	// skip TLS setup on OpenBSD
   240  	JMP ok
   241  #endif
   242  
   243  	LEAQ	runtime·m0+m_tls(SB), DI
   244  	CALL	runtime·settls(SB)
   245  
   246  	// store through it, to make sure it works
   247  	get_tls(BX)
   248  	MOVQ	$0x123, g(BX)
   249  	MOVQ	runtime·m0+m_tls(SB), AX
   250  	CMPQ	AX, $0x123
   251  	JEQ 2(PC)
   252  	CALL	runtime·abort(SB)
   253  ok:
   254  	// set the per-goroutine and per-mach "registers"
   255  	get_tls(BX)
   256  	LEAQ	runtime·g0(SB), CX
   257  	MOVQ	CX, g(BX)
   258  	LEAQ	runtime·m0(SB), AX
   259  
   260  	// save m->g0 = g0
   261  	MOVQ	CX, m_g0(AX)
   262  	// save m0 to g0->m
   263  	MOVQ	AX, g_m(CX)
   264  
   265  	CLD				// convention is D is always left cleared
   266  
   267  	// Check GOAMD64 reqirements
   268  	// We need to do this after setting up TLS, so that
   269  	// we can report an error if there is a failure. See issue 49586.
   270  #ifdef NEED_FEATURES_CX
   271  	MOVL	$0, AX
   272  	CPUID
   273  	CMPL	AX, $0
   274  	JE	bad_cpu
   275  	MOVL	$1, AX
   276  	CPUID
   277  	ANDL	$NEED_FEATURES_CX, CX
   278  	CMPL	CX, $NEED_FEATURES_CX
   279  	JNE	bad_cpu
   280  #endif
   281  
   282  #ifdef NEED_MAX_CPUID
   283  	MOVL	$0x80000000, AX
   284  	CPUID
   285  	CMPL	AX, $NEED_MAX_CPUID
   286  	JL	bad_cpu
   287  #endif
   288  
   289  #ifdef NEED_EXT_FEATURES_BX
   290  	MOVL	$7, AX
   291  	MOVL	$0, CX
   292  	CPUID
   293  	ANDL	$NEED_EXT_FEATURES_BX, BX
   294  	CMPL	BX, $NEED_EXT_FEATURES_BX
   295  	JNE	bad_cpu
   296  #endif
   297  
   298  #ifdef NEED_EXT_FEATURES_CX
   299  	MOVL	$0x80000001, AX
   300  	CPUID
   301  	ANDL	$NEED_EXT_FEATURES_CX, CX
   302  	CMPL	CX, $NEED_EXT_FEATURES_CX
   303  	JNE	bad_cpu
   304  #endif
   305  
   306  #ifdef NEED_OS_SUPPORT_AX
   307  	XORL    CX, CX
   308  	XGETBV
   309  	ANDL	$NEED_OS_SUPPORT_AX, AX
   310  	CMPL	AX, $NEED_OS_SUPPORT_AX
   311  	JNE	bad_cpu
   312  #endif
   313  
   314  	CALL	runtime·check(SB)
   315  
   316  	MOVL	24(SP), AX		// copy argc
   317  	MOVL	AX, 0(SP)
   318  	MOVQ	32(SP), AX		// copy argv
   319  	MOVQ	AX, 8(SP)
   320  	CALL	runtime·args(SB)
   321  	CALL	runtime·osinit(SB)
   322  	CALL	runtime·schedinit(SB)
   323  
   324  	// create a new goroutine to start program
   325  	MOVQ	$runtime·mainPC(SB), AX		// entry
   326  	PUSHQ	AX
   327  	CALL	runtime·newproc(SB)
   328  	POPQ	AX
   329  
   330  	// start this M
   331  	CALL	runtime·mstart(SB)
   332  
   333  	CALL	runtime·abort(SB)	// mstart should never return
   334  	RET
   335  
   336  bad_cpu: // show that the program requires a certain microarchitecture level.
   337  	MOVQ	$2, 0(SP)
   338  	MOVQ	$bad_cpu_msg<>(SB), AX
   339  	MOVQ	AX, 8(SP)
   340  	MOVQ	$84, 16(SP)
   341  	CALL	runtime·write(SB)
   342  	MOVQ	$1, 0(SP)
   343  	CALL	runtime·exit(SB)
   344  	CALL	runtime·abort(SB)
   345  	RET
   346  
   347  	// Prevent dead-code elimination of debugCallV2, which is
   348  	// intended to be called by debuggers.
   349  	MOVQ	$runtime·debugCallV2<ABIInternal>(SB), AX
   350  	RET
   351  
   352  // mainPC is a function value for runtime.main, to be passed to newproc.
   353  // The reference to runtime.main is made via ABIInternal, since the
   354  // actual function (not the ABI0 wrapper) is needed by newproc.
   355  DATA	runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
   356  GLOBL	runtime·mainPC(SB),RODATA,$8
   357  
   358  TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   359  	BYTE	$0xcc
   360  	RET
   361  
   362  TEXT runtime·asminit(SB),NOSPLIT,$0-0
   363  	// No per-thread init.
   364  	RET
   365  
   366  TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
   367  	CALL	runtime·mstart0(SB)
   368  	RET // not reached
   369  
   370  /*
   371   *  go-routine
   372   */
   373  
   374  // func gogo(buf *gobuf)
   375  // restore state from Gobuf; longjmp
   376  TEXT runtime·gogo(SB), NOSPLIT, $0-8
   377  	MOVQ	buf+0(FP), BX		// gobuf
   378  	MOVQ	gobuf_g(BX), DX
   379  	MOVQ	0(DX), CX		// make sure g != nil
   380  	JMP	gogo<>(SB)
   381  
   382  TEXT gogo<>(SB), NOSPLIT, $0
   383  	get_tls(CX)
   384  	MOVQ	DX, g(CX)
   385  	MOVQ	DX, R14		// set the g register
   386  	MOVQ	gobuf_sp(BX), SP	// restore SP
   387  	MOVQ	gobuf_ret(BX), AX
   388  	MOVQ	gobuf_ctxt(BX), DX
   389  	MOVQ	gobuf_bp(BX), BP
   390  	MOVQ	$0, gobuf_sp(BX)	// clear to help garbage collector
   391  	MOVQ	$0, gobuf_ret(BX)
   392  	MOVQ	$0, gobuf_ctxt(BX)
   393  	MOVQ	$0, gobuf_bp(BX)
   394  	MOVQ	gobuf_pc(BX), BX
   395  	JMP	BX
   396  
   397  // func mcall(fn func(*g))
   398  // Switch to m->g0's stack, call fn(g).
   399  // Fn must never return. It should gogo(&g->sched)
   400  // to keep running g.
   401  TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT, $0-8
   402  	MOVQ	AX, DX	// DX = fn
   403  
   404  	// save state in g->sched
   405  	MOVQ	0(SP), BX	// caller's PC
   406  	MOVQ	BX, (g_sched+gobuf_pc)(R14)
   407  	LEAQ	fn+0(FP), BX	// caller's SP
   408  	MOVQ	BX, (g_sched+gobuf_sp)(R14)
   409  	MOVQ	BP, (g_sched+gobuf_bp)(R14)
   410  
   411  	// switch to m->g0 & its stack, call fn
   412  	MOVQ	g_m(R14), BX
   413  	MOVQ	m_g0(BX), SI	// SI = g.m.g0
   414  	CMPQ	SI, R14	// if g == m->g0 call badmcall
   415  	JNE	goodm
   416  	JMP	runtime·badmcall(SB)
   417  goodm:
   418  	MOVQ	R14, AX		// AX (and arg 0) = g
   419  	MOVQ	SI, R14		// g = g.m.g0
   420  	get_tls(CX)		// Set G in TLS
   421  	MOVQ	R14, g(CX)
   422  	MOVQ	(g_sched+gobuf_sp)(R14), SP	// sp = g0.sched.sp
   423  	PUSHQ	AX	// open up space for fn's arg spill slot
   424  	MOVQ	0(DX), R12
   425  	CALL	R12		// fn(g)
   426  	POPQ	AX
   427  	JMP	runtime·badmcall2(SB)
   428  	RET
   429  
   430  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   431  // of the G stack. We need to distinguish the routine that
   432  // lives at the bottom of the G stack from the one that lives
   433  // at the top of the system stack because the one at the top of
   434  // the system stack terminates the stack walk (see topofstack()).
   435  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   436  	RET
   437  
   438  // func systemstack(fn func())
   439  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   440  	MOVQ	fn+0(FP), DI	// DI = fn
   441  	get_tls(CX)
   442  	MOVQ	g(CX), AX	// AX = g
   443  	MOVQ	g_m(AX), BX	// BX = m
   444  
   445  	CMPQ	AX, m_gsignal(BX)
   446  	JEQ	noswitch
   447  
   448  	MOVQ	m_g0(BX), DX	// DX = g0
   449  	CMPQ	AX, DX
   450  	JEQ	noswitch
   451  
   452  	CMPQ	AX, m_curg(BX)
   453  	JNE	bad
   454  
   455  	// switch stacks
   456  	// save our state in g->sched. Pretend to
   457  	// be systemstack_switch if the G stack is scanned.
   458  	CALL	gosave_systemstack_switch<>(SB)
   459  
   460  	// switch to g0
   461  	MOVQ	DX, g(CX)
   462  	MOVQ	DX, R14 // set the g register
   463  	MOVQ	(g_sched+gobuf_sp)(DX), BX
   464  	MOVQ	BX, SP
   465  
   466  	// call target function
   467  	MOVQ	DI, DX
   468  	MOVQ	0(DI), DI
   469  	CALL	DI
   470  
   471  	// switch back to g
   472  	get_tls(CX)
   473  	MOVQ	g(CX), AX
   474  	MOVQ	g_m(AX), BX
   475  	MOVQ	m_curg(BX), AX
   476  	MOVQ	AX, g(CX)
   477  	MOVQ	(g_sched+gobuf_sp)(AX), SP
   478  	MOVQ	$0, (g_sched+gobuf_sp)(AX)
   479  	RET
   480  
   481  noswitch:
   482  	// already on m stack; tail call the function
   483  	// Using a tail call here cleans up tracebacks since we won't stop
   484  	// at an intermediate systemstack.
   485  	MOVQ	DI, DX
   486  	MOVQ	0(DI), DI
   487  	JMP	DI
   488  
   489  bad:
   490  	// Bad: g is not gsignal, not g0, not curg. What is it?
   491  	MOVQ	$runtime·badsystemstack(SB), AX
   492  	CALL	AX
   493  	INT	$3
   494  
   495  
   496  /*
   497   * support for morestack
   498   */
   499  
   500  // Called during function prolog when more stack is needed.
   501  //
   502  // The traceback routines see morestack on a g0 as being
   503  // the top of a stack (for example, morestack calling newstack
   504  // calling the scheduler calling newm calling gc), so we must
   505  // record an argument size. For that purpose, it has no arguments.
   506  TEXT runtime·morestack(SB),NOSPLIT,$0-0
   507  	// Cannot grow scheduler stack (m->g0).
   508  	get_tls(CX)
   509  	MOVQ	g(CX), BX
   510  	MOVQ	g_m(BX), BX
   511  	MOVQ	m_g0(BX), SI
   512  	CMPQ	g(CX), SI
   513  	JNE	3(PC)
   514  	CALL	runtime·badmorestackg0(SB)
   515  	CALL	runtime·abort(SB)
   516  
   517  	// Cannot grow signal stack (m->gsignal).
   518  	MOVQ	m_gsignal(BX), SI
   519  	CMPQ	g(CX), SI
   520  	JNE	3(PC)
   521  	CALL	runtime·badmorestackgsignal(SB)
   522  	CALL	runtime·abort(SB)
   523  
   524  	// Called from f.
   525  	// Set m->morebuf to f's caller.
   526  	NOP	SP	// tell vet SP changed - stop checking offsets
   527  	MOVQ	8(SP), AX	// f's caller's PC
   528  	MOVQ	AX, (m_morebuf+gobuf_pc)(BX)
   529  	LEAQ	16(SP), AX	// f's caller's SP
   530  	MOVQ	AX, (m_morebuf+gobuf_sp)(BX)
   531  	get_tls(CX)
   532  	MOVQ	g(CX), SI
   533  	MOVQ	SI, (m_morebuf+gobuf_g)(BX)
   534  
   535  	// Set g->sched to context in f.
   536  	MOVQ	0(SP), AX // f's PC
   537  	MOVQ	AX, (g_sched+gobuf_pc)(SI)
   538  	LEAQ	8(SP), AX // f's SP
   539  	MOVQ	AX, (g_sched+gobuf_sp)(SI)
   540  	MOVQ	BP, (g_sched+gobuf_bp)(SI)
   541  	MOVQ	DX, (g_sched+gobuf_ctxt)(SI)
   542  
   543  	// Call newstack on m->g0's stack.
   544  	MOVQ	m_g0(BX), BX
   545  	MOVQ	BX, g(CX)
   546  	MOVQ	(g_sched+gobuf_sp)(BX), SP
   547  	CALL	runtime·newstack(SB)
   548  	CALL	runtime·abort(SB)	// crash if newstack returns
   549  	RET
   550  
   551  // morestack but not preserving ctxt.
   552  TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
   553  	MOVL	$0, DX
   554  	JMP	runtime·morestack(SB)
   555  
   556  // spillArgs stores return values from registers to a *internal/abi.RegArgs in R12.
   557  TEXT ·spillArgs(SB),NOSPLIT,$0-0
   558  	MOVQ AX, 0(R12)
   559  	MOVQ BX, 8(R12)
   560  	MOVQ CX, 16(R12)
   561  	MOVQ DI, 24(R12)
   562  	MOVQ SI, 32(R12)
   563  	MOVQ R8, 40(R12)
   564  	MOVQ R9, 48(R12)
   565  	MOVQ R10, 56(R12)
   566  	MOVQ R11, 64(R12)
   567  	MOVQ X0, 72(R12)
   568  	MOVQ X1, 80(R12)
   569  	MOVQ X2, 88(R12)
   570  	MOVQ X3, 96(R12)
   571  	MOVQ X4, 104(R12)
   572  	MOVQ X5, 112(R12)
   573  	MOVQ X6, 120(R12)
   574  	MOVQ X7, 128(R12)
   575  	MOVQ X8, 136(R12)
   576  	MOVQ X9, 144(R12)
   577  	MOVQ X10, 152(R12)
   578  	MOVQ X11, 160(R12)
   579  	MOVQ X12, 168(R12)
   580  	MOVQ X13, 176(R12)
   581  	MOVQ X14, 184(R12)
   582  	RET
   583  
   584  // unspillArgs loads args into registers from a *internal/abi.RegArgs in R12.
   585  TEXT ·unspillArgs(SB),NOSPLIT,$0-0
   586  	MOVQ 0(R12), AX
   587  	MOVQ 8(R12), BX
   588  	MOVQ 16(R12), CX
   589  	MOVQ 24(R12), DI
   590  	MOVQ 32(R12), SI
   591  	MOVQ 40(R12), R8
   592  	MOVQ 48(R12), R9
   593  	MOVQ 56(R12), R10
   594  	MOVQ 64(R12), R11
   595  	MOVQ 72(R12), X0
   596  	MOVQ 80(R12), X1
   597  	MOVQ 88(R12), X2
   598  	MOVQ 96(R12), X3
   599  	MOVQ 104(R12), X4
   600  	MOVQ 112(R12), X5
   601  	MOVQ 120(R12), X6
   602  	MOVQ 128(R12), X7
   603  	MOVQ 136(R12), X8
   604  	MOVQ 144(R12), X9
   605  	MOVQ 152(R12), X10
   606  	MOVQ 160(R12), X11
   607  	MOVQ 168(R12), X12
   608  	MOVQ 176(R12), X13
   609  	MOVQ 184(R12), X14
   610  	RET
   611  
   612  // reflectcall: call a function with the given argument list
   613  // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
   614  // we don't have variable-sized frames, so we use a small number
   615  // of constant-sized-frame functions to encode a few bits of size in the pc.
   616  // Caution: ugly multiline assembly macros in your future!
   617  
   618  #define DISPATCH(NAME,MAXSIZE)		\
   619  	CMPQ	CX, $MAXSIZE;		\
   620  	JA	3(PC);			\
   621  	MOVQ	$NAME(SB), AX;		\
   622  	JMP	AX
   623  // Note: can't just "JMP NAME(SB)" - bad inlining results.
   624  
   625  TEXT ·reflectcall(SB), NOSPLIT, $0-48
   626  	MOVLQZX frameSize+32(FP), CX
   627  	DISPATCH(runtime·call16, 16)
   628  	DISPATCH(runtime·call32, 32)
   629  	DISPATCH(runtime·call64, 64)
   630  	DISPATCH(runtime·call128, 128)
   631  	DISPATCH(runtime·call256, 256)
   632  	DISPATCH(runtime·call512, 512)
   633  	DISPATCH(runtime·call1024, 1024)
   634  	DISPATCH(runtime·call2048, 2048)
   635  	DISPATCH(runtime·call4096, 4096)
   636  	DISPATCH(runtime·call8192, 8192)
   637  	DISPATCH(runtime·call16384, 16384)
   638  	DISPATCH(runtime·call32768, 32768)
   639  	DISPATCH(runtime·call65536, 65536)
   640  	DISPATCH(runtime·call131072, 131072)
   641  	DISPATCH(runtime·call262144, 262144)
   642  	DISPATCH(runtime·call524288, 524288)
   643  	DISPATCH(runtime·call1048576, 1048576)
   644  	DISPATCH(runtime·call2097152, 2097152)
   645  	DISPATCH(runtime·call4194304, 4194304)
   646  	DISPATCH(runtime·call8388608, 8388608)
   647  	DISPATCH(runtime·call16777216, 16777216)
   648  	DISPATCH(runtime·call33554432, 33554432)
   649  	DISPATCH(runtime·call67108864, 67108864)
   650  	DISPATCH(runtime·call134217728, 134217728)
   651  	DISPATCH(runtime·call268435456, 268435456)
   652  	DISPATCH(runtime·call536870912, 536870912)
   653  	DISPATCH(runtime·call1073741824, 1073741824)
   654  	MOVQ	$runtime·badreflectcall(SB), AX
   655  	JMP	AX
   656  
   657  #define CALLFN(NAME,MAXSIZE)			\
   658  TEXT NAME(SB), WRAPPER, $MAXSIZE-48;		\
   659  	NO_LOCAL_POINTERS;			\
   660  	/* copy arguments to stack */		\
   661  	MOVQ	stackArgs+16(FP), SI;		\
   662  	MOVLQZX stackArgsSize+24(FP), CX;		\
   663  	MOVQ	SP, DI;				\
   664  	REP;MOVSB;				\
   665  	/* set up argument registers */		\
   666  	MOVQ    regArgs+40(FP), R12;		\
   667  	CALL    ·unspillArgs(SB);		\
   668  	/* call function */			\
   669  	MOVQ	f+8(FP), DX;			\
   670  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   671  	MOVQ	(DX), R12;			\
   672  	CALL	R12;				\
   673  	/* copy register return values back */		\
   674  	MOVQ    regArgs+40(FP), R12;		\
   675  	CALL    ·spillArgs(SB);		\
   676  	MOVLQZX	stackArgsSize+24(FP), CX;		\
   677  	MOVLQZX	stackRetOffset+28(FP), BX;		\
   678  	MOVQ	stackArgs+16(FP), DI;		\
   679  	MOVQ	stackArgsType+0(FP), DX;		\
   680  	MOVQ	SP, SI;				\
   681  	ADDQ	BX, DI;				\
   682  	ADDQ	BX, SI;				\
   683  	SUBQ	BX, CX;				\
   684  	CALL	callRet<>(SB);			\
   685  	RET
   686  
   687  // callRet copies return values back at the end of call*. This is a
   688  // separate function so it can allocate stack space for the arguments
   689  // to reflectcallmove. It does not follow the Go ABI; it expects its
   690  // arguments in registers.
   691  TEXT callRet<>(SB), NOSPLIT, $40-0
   692  	NO_LOCAL_POINTERS
   693  	MOVQ	DX, 0(SP)
   694  	MOVQ	DI, 8(SP)
   695  	MOVQ	SI, 16(SP)
   696  	MOVQ	CX, 24(SP)
   697  	MOVQ	R12, 32(SP)
   698  	CALL	runtime·reflectcallmove(SB)
   699  	RET
   700  
   701  CALLFN(·call16, 16)
   702  CALLFN(·call32, 32)
   703  CALLFN(·call64, 64)
   704  CALLFN(·call128, 128)
   705  CALLFN(·call256, 256)
   706  CALLFN(·call512, 512)
   707  CALLFN(·call1024, 1024)
   708  CALLFN(·call2048, 2048)
   709  CALLFN(·call4096, 4096)
   710  CALLFN(·call8192, 8192)
   711  CALLFN(·call16384, 16384)
   712  CALLFN(·call32768, 32768)
   713  CALLFN(·call65536, 65536)
   714  CALLFN(·call131072, 131072)
   715  CALLFN(·call262144, 262144)
   716  CALLFN(·call524288, 524288)
   717  CALLFN(·call1048576, 1048576)
   718  CALLFN(·call2097152, 2097152)
   719  CALLFN(·call4194304, 4194304)
   720  CALLFN(·call8388608, 8388608)
   721  CALLFN(·call16777216, 16777216)
   722  CALLFN(·call33554432, 33554432)
   723  CALLFN(·call67108864, 67108864)
   724  CALLFN(·call134217728, 134217728)
   725  CALLFN(·call268435456, 268435456)
   726  CALLFN(·call536870912, 536870912)
   727  CALLFN(·call1073741824, 1073741824)
   728  
   729  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   730  	MOVL	cycles+0(FP), AX
   731  again:
   732  	PAUSE
   733  	SUBL	$1, AX
   734  	JNZ	again
   735  	RET
   736  
   737  
   738  TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   739  	// Stores are already ordered on x86, so this is just a
   740  	// compile barrier.
   741  	RET
   742  
   743  // Save state of caller into g->sched,
   744  // but using fake PC from systemstack_switch.
   745  // Must only be called from functions with no locals ($0)
   746  // or else unwinding from systemstack_switch is incorrect.
   747  // Smashes R9.
   748  TEXT gosave_systemstack_switch<>(SB),NOSPLIT,$0
   749  	MOVQ	$runtime·systemstack_switch(SB), R9
   750  	MOVQ	R9, (g_sched+gobuf_pc)(R14)
   751  	LEAQ	8(SP), R9
   752  	MOVQ	R9, (g_sched+gobuf_sp)(R14)
   753  	MOVQ	$0, (g_sched+gobuf_ret)(R14)
   754  	MOVQ	BP, (g_sched+gobuf_bp)(R14)
   755  	// Assert ctxt is zero. See func save.
   756  	MOVQ	(g_sched+gobuf_ctxt)(R14), R9
   757  	TESTQ	R9, R9
   758  	JZ	2(PC)
   759  	CALL	runtime·abort(SB)
   760  	RET
   761  
   762  // func asmcgocall_no_g(fn, arg unsafe.Pointer)
   763  // Call fn(arg) aligned appropriately for the gcc ABI.
   764  // Called on a system stack, and there may be no g yet (during needm).
   765  TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-16
   766  	MOVQ	fn+0(FP), AX
   767  	MOVQ	arg+8(FP), BX
   768  	MOVQ	SP, DX
   769  	SUBQ	$32, SP
   770  	ANDQ	$~15, SP	// alignment
   771  	MOVQ	DX, 8(SP)
   772  	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   773  	MOVQ	BX, CX		// CX = first argument in Win64
   774  	CALL	AX
   775  	MOVQ	8(SP), DX
   776  	MOVQ	DX, SP
   777  	RET
   778  
   779  // func asmcgocall(fn, arg unsafe.Pointer) int32
   780  // Call fn(arg) on the scheduler stack,
   781  // aligned appropriately for the gcc ABI.
   782  // See cgocall.go for more details.
   783  TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   784  	MOVQ	fn+0(FP), AX
   785  	MOVQ	arg+8(FP), BX
   786  
   787  	MOVQ	SP, DX
   788  
   789  	// Figure out if we need to switch to m->g0 stack.
   790  	// We get called to create new OS threads too, and those
   791  	// come in on the m->g0 stack already. Or we might already
   792  	// be on the m->gsignal stack.
   793  	get_tls(CX)
   794  	MOVQ	g(CX), DI
   795  	CMPQ	DI, $0
   796  	JEQ	nosave
   797  	MOVQ	g_m(DI), R8
   798  	MOVQ	m_gsignal(R8), SI
   799  	CMPQ	DI, SI
   800  	JEQ	nosave
   801  	MOVQ	m_g0(R8), SI
   802  	CMPQ	DI, SI
   803  	JEQ	nosave
   804  
   805  	// Switch to system stack.
   806  	CALL	gosave_systemstack_switch<>(SB)
   807  	MOVQ	SI, g(CX)
   808  	MOVQ	(g_sched+gobuf_sp)(SI), SP
   809  
   810  	// Now on a scheduling stack (a pthread-created stack).
   811  	// Make sure we have enough room for 4 stack-backed fast-call
   812  	// registers as per windows amd64 calling convention.
   813  	SUBQ	$64, SP
   814  	ANDQ	$~15, SP	// alignment for gcc ABI
   815  	MOVQ	DI, 48(SP)	// save g
   816  	MOVQ	(g_stack+stack_hi)(DI), DI
   817  	SUBQ	DX, DI
   818  	MOVQ	DI, 40(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   819  	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   820  	MOVQ	BX, CX		// CX = first argument in Win64
   821  	CALL	AX
   822  
   823  	// Restore registers, g, stack pointer.
   824  	get_tls(CX)
   825  	MOVQ	48(SP), DI
   826  	MOVQ	(g_stack+stack_hi)(DI), SI
   827  	SUBQ	40(SP), SI
   828  	MOVQ	DI, g(CX)
   829  	MOVQ	SI, SP
   830  
   831  	MOVL	AX, ret+16(FP)
   832  	RET
   833  
   834  nosave:
   835  	// Running on a system stack, perhaps even without a g.
   836  	// Having no g can happen during thread creation or thread teardown
   837  	// (see needm/dropm on Solaris, for example).
   838  	// This code is like the above sequence but without saving/restoring g
   839  	// and without worrying about the stack moving out from under us
   840  	// (because we're on a system stack, not a goroutine stack).
   841  	// The above code could be used directly if already on a system stack,
   842  	// but then the only path through this code would be a rare case on Solaris.
   843  	// Using this code for all "already on system stack" calls exercises it more,
   844  	// which should help keep it correct.
   845  	SUBQ	$64, SP
   846  	ANDQ	$~15, SP
   847  	MOVQ	$0, 48(SP)		// where above code stores g, in case someone looks during debugging
   848  	MOVQ	DX, 40(SP)	// save original stack pointer
   849  	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   850  	MOVQ	BX, CX		// CX = first argument in Win64
   851  	CALL	AX
   852  	MOVQ	40(SP), SI	// restore original stack pointer
   853  	MOVQ	SI, SP
   854  	MOVL	AX, ret+16(FP)
   855  	RET
   856  
   857  #ifdef GOOS_windows
   858  // Dummy TLS that's used on Windows so that we don't crash trying
   859  // to restore the G register in needm. needm and its callees are
   860  // very careful never to actually use the G, the TLS just can't be
   861  // unset since we're in Go code.
   862  GLOBL zeroTLS<>(SB),RODATA,$const_tlsSize
   863  #endif
   864  
   865  // func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
   866  // See cgocall.go for more details.
   867  TEXT ·cgocallback(SB),NOSPLIT,$24-24
   868  	NO_LOCAL_POINTERS
   869  
   870  	// If g is nil, Go did not create the current thread.
   871  	// Call needm to obtain one m for temporary use.
   872  	// In this case, we're running on the thread stack, so there's
   873  	// lots of space, but the linker doesn't know. Hide the call from
   874  	// the linker analysis by using an indirect call through AX.
   875  	get_tls(CX)
   876  #ifdef GOOS_windows
   877  	MOVL	$0, BX
   878  	CMPQ	CX, $0
   879  	JEQ	2(PC)
   880  #endif
   881  	MOVQ	g(CX), BX
   882  	CMPQ	BX, $0
   883  	JEQ	needm
   884  	MOVQ	g_m(BX), BX
   885  	MOVQ	BX, savedm-8(SP)	// saved copy of oldm
   886  	JMP	havem
   887  needm:
   888  #ifdef GOOS_windows
   889  	// Set up a dummy TLS value. needm is careful not to use it,
   890  	// but it needs to be there to prevent autogenerated code from
   891  	// crashing when it loads from it.
   892  	// We don't need to clear it or anything later because needm
   893  	// will set up TLS properly.
   894  	MOVQ	$zeroTLS<>(SB), DI
   895  	CALL	runtime·settls(SB)
   896  #endif
   897  	// On some platforms (Windows) we cannot call needm through
   898  	// an ABI wrapper because there's no TLS set up, and the ABI
   899  	// wrapper will try to restore the G register (R14) from TLS.
   900  	// Clear X15 because Go expects it and we're not calling
   901  	// through a wrapper, but otherwise avoid setting the G
   902  	// register in the wrapper and call needm directly. It
   903  	// takes no arguments and doesn't return any values so
   904  	// there's no need to handle that. Clear R14 so that there's
   905  	// a bad value in there, in case needm tries to use it.
   906  	XORPS	X15, X15
   907  	XORQ    R14, R14
   908  	MOVQ	$runtime·needm<ABIInternal>(SB), AX
   909  	CALL	AX
   910  	MOVQ	$0, savedm-8(SP) // dropm on return
   911  	get_tls(CX)
   912  	MOVQ	g(CX), BX
   913  	MOVQ	g_m(BX), BX
   914  
   915  	// Set m->sched.sp = SP, so that if a panic happens
   916  	// during the function we are about to execute, it will
   917  	// have a valid SP to run on the g0 stack.
   918  	// The next few lines (after the havem label)
   919  	// will save this SP onto the stack and then write
   920  	// the same SP back to m->sched.sp. That seems redundant,
   921  	// but if an unrecovered panic happens, unwindm will
   922  	// restore the g->sched.sp from the stack location
   923  	// and then systemstack will try to use it. If we don't set it here,
   924  	// that restored SP will be uninitialized (typically 0) and
   925  	// will not be usable.
   926  	MOVQ	m_g0(BX), SI
   927  	MOVQ	SP, (g_sched+gobuf_sp)(SI)
   928  
   929  havem:
   930  	// Now there's a valid m, and we're running on its m->g0.
   931  	// Save current m->g0->sched.sp on stack and then set it to SP.
   932  	// Save current sp in m->g0->sched.sp in preparation for
   933  	// switch back to m->curg stack.
   934  	// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
   935  	MOVQ	m_g0(BX), SI
   936  	MOVQ	(g_sched+gobuf_sp)(SI), AX
   937  	MOVQ	AX, 0(SP)
   938  	MOVQ	SP, (g_sched+gobuf_sp)(SI)
   939  
   940  	// Switch to m->curg stack and call runtime.cgocallbackg.
   941  	// Because we are taking over the execution of m->curg
   942  	// but *not* resuming what had been running, we need to
   943  	// save that information (m->curg->sched) so we can restore it.
   944  	// We can restore m->curg->sched.sp easily, because calling
   945  	// runtime.cgocallbackg leaves SP unchanged upon return.
   946  	// To save m->curg->sched.pc, we push it onto the curg stack and
   947  	// open a frame the same size as cgocallback's g0 frame.
   948  	// Once we switch to the curg stack, the pushed PC will appear
   949  	// to be the return PC of cgocallback, so that the traceback
   950  	// will seamlessly trace back into the earlier calls.
   951  	MOVQ	m_curg(BX), SI
   952  	MOVQ	SI, g(CX)
   953  	MOVQ	(g_sched+gobuf_sp)(SI), DI  // prepare stack as DI
   954  	MOVQ	(g_sched+gobuf_pc)(SI), BX
   955  	MOVQ	BX, -8(DI)  // "push" return PC on the g stack
   956  	// Gather our arguments into registers.
   957  	MOVQ	fn+0(FP), BX
   958  	MOVQ	frame+8(FP), CX
   959  	MOVQ	ctxt+16(FP), DX
   960  	// Compute the size of the frame, including return PC and, if
   961  	// GOEXPERIMENT=framepointer, the saved base pointer
   962  	LEAQ	fn+0(FP), AX
   963  	SUBQ	SP, AX   // AX is our actual frame size
   964  	SUBQ	AX, DI   // Allocate the same frame size on the g stack
   965  	MOVQ	DI, SP
   966  
   967  	MOVQ	BX, 0(SP)
   968  	MOVQ	CX, 8(SP)
   969  	MOVQ	DX, 16(SP)
   970  	MOVQ	$runtime·cgocallbackg(SB), AX
   971  	CALL	AX	// indirect call to bypass nosplit check. We're on a different stack now.
   972  
   973  	// Compute the size of the frame again. FP and SP have
   974  	// completely different values here than they did above,
   975  	// but only their difference matters.
   976  	LEAQ	fn+0(FP), AX
   977  	SUBQ	SP, AX
   978  
   979  	// Restore g->sched (== m->curg->sched) from saved values.
   980  	get_tls(CX)
   981  	MOVQ	g(CX), SI
   982  	MOVQ	SP, DI
   983  	ADDQ	AX, DI
   984  	MOVQ	-8(DI), BX
   985  	MOVQ	BX, (g_sched+gobuf_pc)(SI)
   986  	MOVQ	DI, (g_sched+gobuf_sp)(SI)
   987  
   988  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   989  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   990  	// so we do not have to restore it.)
   991  	MOVQ	g(CX), BX
   992  	MOVQ	g_m(BX), BX
   993  	MOVQ	m_g0(BX), SI
   994  	MOVQ	SI, g(CX)
   995  	MOVQ	(g_sched+gobuf_sp)(SI), SP
   996  	MOVQ	0(SP), AX
   997  	MOVQ	AX, (g_sched+gobuf_sp)(SI)
   998  
   999  	// If the m on entry was nil, we called needm above to borrow an m
  1000  	// for the duration of the call. Since the call is over, return it with dropm.
  1001  	MOVQ	savedm-8(SP), BX
  1002  	CMPQ	BX, $0
  1003  	JNE	done
  1004  	MOVQ	$runtime·dropm(SB), AX
  1005  	CALL	AX
  1006  #ifdef GOOS_windows
  1007  	// We need to clear the TLS pointer in case the next
  1008  	// thread that comes into Go tries to reuse that space
  1009  	// but uses the same M.
  1010  	XORQ	DI, DI
  1011  	CALL	runtime·settls(SB)
  1012  #endif
  1013  done:
  1014  
  1015  	// Done!
  1016  	RET
  1017  
  1018  // func setg(gg *g)
  1019  // set g. for use by needm.
  1020  TEXT runtime·setg(SB), NOSPLIT, $0-8
  1021  	MOVQ	gg+0(FP), BX
  1022  	get_tls(CX)
  1023  	MOVQ	BX, g(CX)
  1024  	RET
  1025  
  1026  // void setg_gcc(G*); set g called from gcc.
  1027  TEXT setg_gcc<>(SB),NOSPLIT,$0
  1028  	get_tls(AX)
  1029  	MOVQ	DI, g(AX)
  1030  	MOVQ	DI, R14 // set the g register
  1031  	RET
  1032  
  1033  TEXT runtime·abort(SB),NOSPLIT,$0-0
  1034  	INT	$3
  1035  loop:
  1036  	JMP	loop
  1037  
  1038  // check that SP is in range [g->stack.lo, g->stack.hi)
  1039  TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
  1040  	get_tls(CX)
  1041  	MOVQ	g(CX), AX
  1042  	CMPQ	(g_stack+stack_hi)(AX), SP
  1043  	JHI	2(PC)
  1044  	CALL	runtime·abort(SB)
  1045  	CMPQ	SP, (g_stack+stack_lo)(AX)
  1046  	JHI	2(PC)
  1047  	CALL	runtime·abort(SB)
  1048  	RET
  1049  
  1050  // func cputicks() int64
  1051  TEXT runtime·cputicks(SB),NOSPLIT,$0-0
  1052  	CMPB	internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1
  1053  	JNE	fences
  1054  	// Instruction stream serializing RDTSCP is supported.
  1055  	// RDTSCP is supported by Intel Nehalem (2008) and
  1056  	// AMD K8 Rev. F (2006) and newer.
  1057  	RDTSCP
  1058  done:
  1059  	SHLQ	$32, DX
  1060  	ADDQ	DX, AX
  1061  	MOVQ	AX, ret+0(FP)
  1062  	RET
  1063  fences:
  1064  	// MFENCE is instruction stream serializing and flushes the
  1065  	// store buffers on AMD. The serialization semantics of LFENCE on AMD
  1066  	// are dependent on MSR C001_1029 and CPU generation.
  1067  	// LFENCE on Intel does wait for all previous instructions to have executed.
  1068  	// Intel recommends MFENCE;LFENCE in its manuals before RDTSC to have all
  1069  	// previous instructions executed and all previous loads and stores to globally visible.
  1070  	// Using MFENCE;LFENCE here aligns the serializing properties without
  1071  	// runtime detection of CPU manufacturer.
  1072  	MFENCE
  1073  	LFENCE
  1074  	RDTSC
  1075  	JMP done
  1076  
  1077  // func memhash(p unsafe.Pointer, h, s uintptr) uintptr
  1078  // hash function using AES hardware instructions
  1079  TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT,$0-32
  1080  	// AX = ptr to data
  1081  	// BX = seed
  1082  	// CX = size
  1083  	CMPB	runtime·useAeshash(SB), $0
  1084  	JEQ	noaes
  1085  	JMP	aeshashbody<>(SB)
  1086  noaes:
  1087  	JMP	runtime·memhashFallback<ABIInternal>(SB)
  1088  
  1089  // func strhash(p unsafe.Pointer, h uintptr) uintptr
  1090  TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT,$0-24
  1091  	// AX = ptr to string struct
  1092  	// BX = seed
  1093  	CMPB	runtime·useAeshash(SB), $0
  1094  	JEQ	noaes
  1095  	MOVQ	8(AX), CX	// length of string
  1096  	MOVQ	(AX), AX	// string data
  1097  	JMP	aeshashbody<>(SB)
  1098  noaes:
  1099  	JMP	runtime·strhashFallback<ABIInternal>(SB)
  1100  
  1101  // AX: data
  1102  // BX: hash seed
  1103  // CX: length
  1104  // At return: AX = return value
  1105  TEXT aeshashbody<>(SB),NOSPLIT,$0-0
  1106  	// Fill an SSE register with our seeds.
  1107  	MOVQ	BX, X0				// 64 bits of per-table hash seed
  1108  	PINSRW	$4, CX, X0			// 16 bits of length
  1109  	PSHUFHW $0, X0, X0			// repeat length 4 times total
  1110  	MOVO	X0, X1				// save unscrambled seed
  1111  	PXOR	runtime·aeskeysched(SB), X0	// xor in per-process seed
  1112  	AESENC	X0, X0				// scramble seed
  1113  
  1114  	CMPQ	CX, $16
  1115  	JB	aes0to15
  1116  	JE	aes16
  1117  	CMPQ	CX, $32
  1118  	JBE	aes17to32
  1119  	CMPQ	CX, $64
  1120  	JBE	aes33to64
  1121  	CMPQ	CX, $128
  1122  	JBE	aes65to128
  1123  	JMP	aes129plus
  1124  
  1125  aes0to15:
  1126  	TESTQ	CX, CX
  1127  	JE	aes0
  1128  
  1129  	ADDQ	$16, AX
  1130  	TESTW	$0xff0, AX
  1131  	JE	endofpage
  1132  
  1133  	// 16 bytes loaded at this address won't cross
  1134  	// a page boundary, so we can load it directly.
  1135  	MOVOU	-16(AX), X1
  1136  	ADDQ	CX, CX
  1137  	MOVQ	$masks<>(SB), AX
  1138  	PAND	(AX)(CX*8), X1
  1139  final1:
  1140  	PXOR	X0, X1	// xor data with seed
  1141  	AESENC	X1, X1	// scramble combo 3 times
  1142  	AESENC	X1, X1
  1143  	AESENC	X1, X1
  1144  	MOVQ	X1, AX	// return X1
  1145  	RET
  1146  
  1147  endofpage:
  1148  	// address ends in 1111xxxx. Might be up against
  1149  	// a page boundary, so load ending at last byte.
  1150  	// Then shift bytes down using pshufb.
  1151  	MOVOU	-32(AX)(CX*1), X1
  1152  	ADDQ	CX, CX
  1153  	MOVQ	$shifts<>(SB), AX
  1154  	PSHUFB	(AX)(CX*8), X1
  1155  	JMP	final1
  1156  
  1157  aes0:
  1158  	// Return scrambled input seed
  1159  	AESENC	X0, X0
  1160  	MOVQ	X0, AX	// return X0
  1161  	RET
  1162  
  1163  aes16:
  1164  	MOVOU	(AX), X1
  1165  	JMP	final1
  1166  
  1167  aes17to32:
  1168  	// make second starting seed
  1169  	PXOR	runtime·aeskeysched+16(SB), X1
  1170  	AESENC	X1, X1
  1171  
  1172  	// load data to be hashed
  1173  	MOVOU	(AX), X2
  1174  	MOVOU	-16(AX)(CX*1), X3
  1175  
  1176  	// xor with seed
  1177  	PXOR	X0, X2
  1178  	PXOR	X1, X3
  1179  
  1180  	// scramble 3 times
  1181  	AESENC	X2, X2
  1182  	AESENC	X3, X3
  1183  	AESENC	X2, X2
  1184  	AESENC	X3, X3
  1185  	AESENC	X2, X2
  1186  	AESENC	X3, X3
  1187  
  1188  	// combine results
  1189  	PXOR	X3, X2
  1190  	MOVQ	X2, AX	// return X2
  1191  	RET
  1192  
  1193  aes33to64:
  1194  	// make 3 more starting seeds
  1195  	MOVO	X1, X2
  1196  	MOVO	X1, X3
  1197  	PXOR	runtime·aeskeysched+16(SB), X1
  1198  	PXOR	runtime·aeskeysched+32(SB), X2
  1199  	PXOR	runtime·aeskeysched+48(SB), X3
  1200  	AESENC	X1, X1
  1201  	AESENC	X2, X2
  1202  	AESENC	X3, X3
  1203  
  1204  	MOVOU	(AX), X4
  1205  	MOVOU	16(AX), X5
  1206  	MOVOU	-32(AX)(CX*1), X6
  1207  	MOVOU	-16(AX)(CX*1), X7
  1208  
  1209  	PXOR	X0, X4
  1210  	PXOR	X1, X5
  1211  	PXOR	X2, X6
  1212  	PXOR	X3, X7
  1213  
  1214  	AESENC	X4, X4
  1215  	AESENC	X5, X5
  1216  	AESENC	X6, X6
  1217  	AESENC	X7, X7
  1218  
  1219  	AESENC	X4, X4
  1220  	AESENC	X5, X5
  1221  	AESENC	X6, X6
  1222  	AESENC	X7, X7
  1223  
  1224  	AESENC	X4, X4
  1225  	AESENC	X5, X5
  1226  	AESENC	X6, X6
  1227  	AESENC	X7, X7
  1228  
  1229  	PXOR	X6, X4
  1230  	PXOR	X7, X5
  1231  	PXOR	X5, X4
  1232  	MOVQ	X4, AX	// return X4
  1233  	RET
  1234  
  1235  aes65to128:
  1236  	// make 7 more starting seeds
  1237  	MOVO	X1, X2
  1238  	MOVO	X1, X3
  1239  	MOVO	X1, X4
  1240  	MOVO	X1, X5
  1241  	MOVO	X1, X6
  1242  	MOVO	X1, X7
  1243  	PXOR	runtime·aeskeysched+16(SB), X1
  1244  	PXOR	runtime·aeskeysched+32(SB), X2
  1245  	PXOR	runtime·aeskeysched+48(SB), X3
  1246  	PXOR	runtime·aeskeysched+64(SB), X4
  1247  	PXOR	runtime·aeskeysched+80(SB), X5
  1248  	PXOR	runtime·aeskeysched+96(SB), X6
  1249  	PXOR	runtime·aeskeysched+112(SB), X7
  1250  	AESENC	X1, X1
  1251  	AESENC	X2, X2
  1252  	AESENC	X3, X3
  1253  	AESENC	X4, X4
  1254  	AESENC	X5, X5
  1255  	AESENC	X6, X6
  1256  	AESENC	X7, X7
  1257  
  1258  	// load data
  1259  	MOVOU	(AX), X8
  1260  	MOVOU	16(AX), X9
  1261  	MOVOU	32(AX), X10
  1262  	MOVOU	48(AX), X11
  1263  	MOVOU	-64(AX)(CX*1), X12
  1264  	MOVOU	-48(AX)(CX*1), X13
  1265  	MOVOU	-32(AX)(CX*1), X14
  1266  	MOVOU	-16(AX)(CX*1), X15
  1267  
  1268  	// xor with seed
  1269  	PXOR	X0, X8
  1270  	PXOR	X1, X9
  1271  	PXOR	X2, X10
  1272  	PXOR	X3, X11
  1273  	PXOR	X4, X12
  1274  	PXOR	X5, X13
  1275  	PXOR	X6, X14
  1276  	PXOR	X7, X15
  1277  
  1278  	// scramble 3 times
  1279  	AESENC	X8, X8
  1280  	AESENC	X9, X9
  1281  	AESENC	X10, X10
  1282  	AESENC	X11, X11
  1283  	AESENC	X12, X12
  1284  	AESENC	X13, X13
  1285  	AESENC	X14, X14
  1286  	AESENC	X15, X15
  1287  
  1288  	AESENC	X8, X8
  1289  	AESENC	X9, X9
  1290  	AESENC	X10, X10
  1291  	AESENC	X11, X11
  1292  	AESENC	X12, X12
  1293  	AESENC	X13, X13
  1294  	AESENC	X14, X14
  1295  	AESENC	X15, X15
  1296  
  1297  	AESENC	X8, X8
  1298  	AESENC	X9, X9
  1299  	AESENC	X10, X10
  1300  	AESENC	X11, X11
  1301  	AESENC	X12, X12
  1302  	AESENC	X13, X13
  1303  	AESENC	X14, X14
  1304  	AESENC	X15, X15
  1305  
  1306  	// combine results
  1307  	PXOR	X12, X8
  1308  	PXOR	X13, X9
  1309  	PXOR	X14, X10
  1310  	PXOR	X15, X11
  1311  	PXOR	X10, X8
  1312  	PXOR	X11, X9
  1313  	PXOR	X9, X8
  1314  	// X15 must be zero on return
  1315  	PXOR	X15, X15
  1316  	MOVQ	X8, AX	// return X8
  1317  	RET
  1318  
  1319  aes129plus:
  1320  	// make 7 more starting seeds
  1321  	MOVO	X1, X2
  1322  	MOVO	X1, X3
  1323  	MOVO	X1, X4
  1324  	MOVO	X1, X5
  1325  	MOVO	X1, X6
  1326  	MOVO	X1, X7
  1327  	PXOR	runtime·aeskeysched+16(SB), X1
  1328  	PXOR	runtime·aeskeysched+32(SB), X2
  1329  	PXOR	runtime·aeskeysched+48(SB), X3
  1330  	PXOR	runtime·aeskeysched+64(SB), X4
  1331  	PXOR	runtime·aeskeysched+80(SB), X5
  1332  	PXOR	runtime·aeskeysched+96(SB), X6
  1333  	PXOR	runtime·aeskeysched+112(SB), X7
  1334  	AESENC	X1, X1
  1335  	AESENC	X2, X2
  1336  	AESENC	X3, X3
  1337  	AESENC	X4, X4
  1338  	AESENC	X5, X5
  1339  	AESENC	X6, X6
  1340  	AESENC	X7, X7
  1341  
  1342  	// start with last (possibly overlapping) block
  1343  	MOVOU	-128(AX)(CX*1), X8
  1344  	MOVOU	-112(AX)(CX*1), X9
  1345  	MOVOU	-96(AX)(CX*1), X10
  1346  	MOVOU	-80(AX)(CX*1), X11
  1347  	MOVOU	-64(AX)(CX*1), X12
  1348  	MOVOU	-48(AX)(CX*1), X13
  1349  	MOVOU	-32(AX)(CX*1), X14
  1350  	MOVOU	-16(AX)(CX*1), X15
  1351  
  1352  	// xor in seed
  1353  	PXOR	X0, X8
  1354  	PXOR	X1, X9
  1355  	PXOR	X2, X10
  1356  	PXOR	X3, X11
  1357  	PXOR	X4, X12
  1358  	PXOR	X5, X13
  1359  	PXOR	X6, X14
  1360  	PXOR	X7, X15
  1361  
  1362  	// compute number of remaining 128-byte blocks
  1363  	DECQ	CX
  1364  	SHRQ	$7, CX
  1365  
  1366  aesloop:
  1367  	// scramble state
  1368  	AESENC	X8, X8
  1369  	AESENC	X9, X9
  1370  	AESENC	X10, X10
  1371  	AESENC	X11, X11
  1372  	AESENC	X12, X12
  1373  	AESENC	X13, X13
  1374  	AESENC	X14, X14
  1375  	AESENC	X15, X15
  1376  
  1377  	// scramble state, xor in a block
  1378  	MOVOU	(AX), X0
  1379  	MOVOU	16(AX), X1
  1380  	MOVOU	32(AX), X2
  1381  	MOVOU	48(AX), X3
  1382  	AESENC	X0, X8
  1383  	AESENC	X1, X9
  1384  	AESENC	X2, X10
  1385  	AESENC	X3, X11
  1386  	MOVOU	64(AX), X4
  1387  	MOVOU	80(AX), X5
  1388  	MOVOU	96(AX), X6
  1389  	MOVOU	112(AX), X7
  1390  	AESENC	X4, X12
  1391  	AESENC	X5, X13
  1392  	AESENC	X6, X14
  1393  	AESENC	X7, X15
  1394  
  1395  	ADDQ	$128, AX
  1396  	DECQ	CX
  1397  	JNE	aesloop
  1398  
  1399  	// 3 more scrambles to finish
  1400  	AESENC	X8, X8
  1401  	AESENC	X9, X9
  1402  	AESENC	X10, X10
  1403  	AESENC	X11, X11
  1404  	AESENC	X12, X12
  1405  	AESENC	X13, X13
  1406  	AESENC	X14, X14
  1407  	AESENC	X15, X15
  1408  	AESENC	X8, X8
  1409  	AESENC	X9, X9
  1410  	AESENC	X10, X10
  1411  	AESENC	X11, X11
  1412  	AESENC	X12, X12
  1413  	AESENC	X13, X13
  1414  	AESENC	X14, X14
  1415  	AESENC	X15, X15
  1416  	AESENC	X8, X8
  1417  	AESENC	X9, X9
  1418  	AESENC	X10, X10
  1419  	AESENC	X11, X11
  1420  	AESENC	X12, X12
  1421  	AESENC	X13, X13
  1422  	AESENC	X14, X14
  1423  	AESENC	X15, X15
  1424  
  1425  	PXOR	X12, X8
  1426  	PXOR	X13, X9
  1427  	PXOR	X14, X10
  1428  	PXOR	X15, X11
  1429  	PXOR	X10, X8
  1430  	PXOR	X11, X9
  1431  	PXOR	X9, X8
  1432  	// X15 must be zero on return
  1433  	PXOR	X15, X15
  1434  	MOVQ	X8, AX	// return X8
  1435  	RET
  1436  
  1437  // func memhash32(p unsafe.Pointer, h uintptr) uintptr
  1438  // ABIInternal for performance.
  1439  TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT,$0-24
  1440  	// AX = ptr to data
  1441  	// BX = seed
  1442  	CMPB	runtime·useAeshash(SB), $0
  1443  	JEQ	noaes
  1444  	MOVQ	BX, X0	// X0 = seed
  1445  	PINSRD	$2, (AX), X0	// data
  1446  	AESENC	runtime·aeskeysched+0(SB), X0
  1447  	AESENC	runtime·aeskeysched+16(SB), X0
  1448  	AESENC	runtime·aeskeysched+32(SB), X0
  1449  	MOVQ	X0, AX	// return X0
  1450  	RET
  1451  noaes:
  1452  	JMP	runtime·memhash32Fallback<ABIInternal>(SB)
  1453  
  1454  // func memhash64(p unsafe.Pointer, h uintptr) uintptr
  1455  // ABIInternal for performance.
  1456  TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT,$0-24
  1457  	// AX = ptr to data
  1458  	// BX = seed
  1459  	CMPB	runtime·useAeshash(SB), $0
  1460  	JEQ	noaes
  1461  	MOVQ	BX, X0	// X0 = seed
  1462  	PINSRQ	$1, (AX), X0	// data
  1463  	AESENC	runtime·aeskeysched+0(SB), X0
  1464  	AESENC	runtime·aeskeysched+16(SB), X0
  1465  	AESENC	runtime·aeskeysched+32(SB), X0
  1466  	MOVQ	X0, AX	// return X0
  1467  	RET
  1468  noaes:
  1469  	JMP	runtime·memhash64Fallback<ABIInternal>(SB)
  1470  
  1471  // simple mask to get rid of data in the high part of the register.
  1472  DATA masks<>+0x00(SB)/8, $0x0000000000000000
  1473  DATA masks<>+0x08(SB)/8, $0x0000000000000000
  1474  DATA masks<>+0x10(SB)/8, $0x00000000000000ff
  1475  DATA masks<>+0x18(SB)/8, $0x0000000000000000
  1476  DATA masks<>+0x20(SB)/8, $0x000000000000ffff
  1477  DATA masks<>+0x28(SB)/8, $0x0000000000000000
  1478  DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
  1479  DATA masks<>+0x38(SB)/8, $0x0000000000000000
  1480  DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
  1481  DATA masks<>+0x48(SB)/8, $0x0000000000000000
  1482  DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
  1483  DATA masks<>+0x58(SB)/8, $0x0000000000000000
  1484  DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
  1485  DATA masks<>+0x68(SB)/8, $0x0000000000000000
  1486  DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
  1487  DATA masks<>+0x78(SB)/8, $0x0000000000000000
  1488  DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
  1489  DATA masks<>+0x88(SB)/8, $0x0000000000000000
  1490  DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
  1491  DATA masks<>+0x98(SB)/8, $0x00000000000000ff
  1492  DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
  1493  DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
  1494  DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
  1495  DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
  1496  DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
  1497  DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
  1498  DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
  1499  DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
  1500  DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
  1501  DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
  1502  DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
  1503  DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
  1504  GLOBL masks<>(SB),RODATA,$256
  1505  
  1506  // func checkASM() bool
  1507  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1508  	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
  1509  	MOVQ	$masks<>(SB), AX
  1510  	MOVQ	$shifts<>(SB), BX
  1511  	ORQ	BX, AX
  1512  	TESTQ	$15, AX
  1513  	SETEQ	ret+0(FP)
  1514  	RET
  1515  
  1516  // these are arguments to pshufb. They move data down from
  1517  // the high bytes of the register to the low bytes of the register.
  1518  // index is how many bytes to move.
  1519  DATA shifts<>+0x00(SB)/8, $0x0000000000000000
  1520  DATA shifts<>+0x08(SB)/8, $0x0000000000000000
  1521  DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
  1522  DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
  1523  DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
  1524  DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
  1525  DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
  1526  DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
  1527  DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
  1528  DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
  1529  DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
  1530  DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
  1531  DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
  1532  DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
  1533  DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
  1534  DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
  1535  DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
  1536  DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
  1537  DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
  1538  DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
  1539  DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
  1540  DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
  1541  DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
  1542  DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
  1543  DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
  1544  DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
  1545  DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
  1546  DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
  1547  DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
  1548  DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
  1549  DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
  1550  DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
  1551  GLOBL shifts<>(SB),RODATA,$256
  1552  
  1553  TEXT runtime·return0(SB), NOSPLIT, $0
  1554  	MOVL	$0, AX
  1555  	RET
  1556  
  1557  
  1558  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1559  // Must obey the gcc calling convention.
  1560  TEXT _cgo_topofstack(SB),NOSPLIT,$0
  1561  	get_tls(CX)
  1562  	MOVQ	g(CX), AX
  1563  	MOVQ	g_m(AX), AX
  1564  	MOVQ	m_curg(AX), AX
  1565  	MOVQ	(g_stack+stack_hi)(AX), AX
  1566  	RET
  1567  
  1568  // The top-most function running on a goroutine
  1569  // returns to goexit+PCQuantum.
  1570  TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME,$0-0
  1571  	BYTE	$0x90	// NOP
  1572  	CALL	runtime·goexit1(SB)	// does not return
  1573  	// traceback from goexit1 must hit code range of goexit
  1574  	BYTE	$0x90	// NOP
  1575  
  1576  // This is called from .init_array and follows the platform, not Go, ABI.
  1577  TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1578  	PUSHQ	R15 // The access to global variables below implicitly uses R15, which is callee-save
  1579  	MOVQ	runtime·lastmoduledatap(SB), AX
  1580  	MOVQ	DI, moduledata_next(AX)
  1581  	MOVQ	DI, runtime·lastmoduledatap(SB)
  1582  	POPQ	R15
  1583  	RET
  1584  
  1585  // Initialize special registers then jump to sigpanic.
  1586  // This function is injected from the signal handler for panicking
  1587  // signals. It is quite painful to set X15 in the signal context,
  1588  // so we do it here.
  1589  TEXT ·sigpanic0(SB),NOSPLIT,$0-0
  1590  	get_tls(R14)
  1591  	MOVQ	g(R14), R14
  1592  #ifndef GOOS_plan9
  1593  	XORPS	X15, X15
  1594  #endif
  1595  	JMP	·sigpanic<ABIInternal>(SB)
  1596  
  1597  // gcWriteBarrier performs a heap pointer write and informs the GC.
  1598  //
  1599  // gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
  1600  // - DI is the destination of the write
  1601  // - AX is the value being written at DI
  1602  // It clobbers FLAGS. It does not clobber any general-purpose registers,
  1603  // but may clobber others (e.g., SSE registers).
  1604  // Defined as ABIInternal since it does not use the stack-based Go ABI.
  1605  TEXT runtime·gcWriteBarrier<ABIInternal>(SB),NOSPLIT,$112
  1606  	// Save the registers clobbered by the fast path. This is slightly
  1607  	// faster than having the caller spill these.
  1608  	MOVQ	R12, 96(SP)
  1609  	MOVQ	R13, 104(SP)
  1610  	// TODO: Consider passing g.m.p in as an argument so they can be shared
  1611  	// across a sequence of write barriers.
  1612  	MOVQ	g_m(R14), R13
  1613  	MOVQ	m_p(R13), R13
  1614  	MOVQ	(p_wbBuf+wbBuf_next)(R13), R12
  1615  	// Increment wbBuf.next position.
  1616  	LEAQ	16(R12), R12
  1617  	MOVQ	R12, (p_wbBuf+wbBuf_next)(R13)
  1618  	CMPQ	R12, (p_wbBuf+wbBuf_end)(R13)
  1619  	// Record the write.
  1620  	MOVQ	AX, -16(R12)	// Record value
  1621  	// Note: This turns bad pointer writes into bad
  1622  	// pointer reads, which could be confusing. We could avoid
  1623  	// reading from obviously bad pointers, which would
  1624  	// take care of the vast majority of these. We could
  1625  	// patch this up in the signal handler, or use XCHG to
  1626  	// combine the read and the write.
  1627  	MOVQ	(DI), R13
  1628  	MOVQ	R13, -8(R12)	// Record *slot
  1629  	// Is the buffer full? (flags set in CMPQ above)
  1630  	JEQ	flush
  1631  ret:
  1632  	MOVQ	96(SP), R12
  1633  	MOVQ	104(SP), R13
  1634  	// Do the write.
  1635  	MOVQ	AX, (DI)
  1636  	RET
  1637  
  1638  flush:
  1639  	// Save all general purpose registers since these could be
  1640  	// clobbered by wbBufFlush and were not saved by the caller.
  1641  	// It is possible for wbBufFlush to clobber other registers
  1642  	// (e.g., SSE registers), but the compiler takes care of saving
  1643  	// those in the caller if necessary. This strikes a balance
  1644  	// with registers that are likely to be used.
  1645  	//
  1646  	// We don't have type information for these, but all code under
  1647  	// here is NOSPLIT, so nothing will observe these.
  1648  	//
  1649  	// TODO: We could strike a different balance; e.g., saving X0
  1650  	// and not saving GP registers that are less likely to be used.
  1651  	MOVQ	DI, 0(SP)	// Also first argument to wbBufFlush
  1652  	MOVQ	AX, 8(SP)	// Also second argument to wbBufFlush
  1653  	MOVQ	BX, 16(SP)
  1654  	MOVQ	CX, 24(SP)
  1655  	MOVQ	DX, 32(SP)
  1656  	// DI already saved
  1657  	MOVQ	SI, 40(SP)
  1658  	MOVQ	BP, 48(SP)
  1659  	MOVQ	R8, 56(SP)
  1660  	MOVQ	R9, 64(SP)
  1661  	MOVQ	R10, 72(SP)
  1662  	MOVQ	R11, 80(SP)
  1663  	// R12 already saved
  1664  	// R13 already saved
  1665  	// R14 is g
  1666  	MOVQ	R15, 88(SP)
  1667  
  1668  	// This takes arguments DI and AX
  1669  	CALL	runtime·wbBufFlush(SB)
  1670  
  1671  	MOVQ	0(SP), DI
  1672  	MOVQ	8(SP), AX
  1673  	MOVQ	16(SP), BX
  1674  	MOVQ	24(SP), CX
  1675  	MOVQ	32(SP), DX
  1676  	MOVQ	40(SP), SI
  1677  	MOVQ	48(SP), BP
  1678  	MOVQ	56(SP), R8
  1679  	MOVQ	64(SP), R9
  1680  	MOVQ	72(SP), R10
  1681  	MOVQ	80(SP), R11
  1682  	MOVQ	88(SP), R15
  1683  	JMP	ret
  1684  
  1685  // gcWriteBarrierCX is gcWriteBarrier, but with args in DI and CX.
  1686  // Defined as ABIInternal since it does not use the stable Go ABI.
  1687  TEXT runtime·gcWriteBarrierCX<ABIInternal>(SB),NOSPLIT,$0
  1688  	XCHGQ CX, AX
  1689  	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1690  	XCHGQ CX, AX
  1691  	RET
  1692  
  1693  // gcWriteBarrierDX is gcWriteBarrier, but with args in DI and DX.
  1694  // Defined as ABIInternal since it does not use the stable Go ABI.
  1695  TEXT runtime·gcWriteBarrierDX<ABIInternal>(SB),NOSPLIT,$0
  1696  	XCHGQ DX, AX
  1697  	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1698  	XCHGQ DX, AX
  1699  	RET
  1700  
  1701  // gcWriteBarrierBX is gcWriteBarrier, but with args in DI and BX.
  1702  // Defined as ABIInternal since it does not use the stable Go ABI.
  1703  TEXT runtime·gcWriteBarrierBX<ABIInternal>(SB),NOSPLIT,$0
  1704  	XCHGQ BX, AX
  1705  	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1706  	XCHGQ BX, AX
  1707  	RET
  1708  
  1709  // gcWriteBarrierBP is gcWriteBarrier, but with args in DI and BP.
  1710  // Defined as ABIInternal since it does not use the stable Go ABI.
  1711  TEXT runtime·gcWriteBarrierBP<ABIInternal>(SB),NOSPLIT,$0
  1712  	XCHGQ BP, AX
  1713  	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1714  	XCHGQ BP, AX
  1715  	RET
  1716  
  1717  // gcWriteBarrierSI is gcWriteBarrier, but with args in DI and SI.
  1718  // Defined as ABIInternal since it does not use the stable Go ABI.
  1719  TEXT runtime·gcWriteBarrierSI<ABIInternal>(SB),NOSPLIT,$0
  1720  	XCHGQ SI, AX
  1721  	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1722  	XCHGQ SI, AX
  1723  	RET
  1724  
  1725  // gcWriteBarrierR8 is gcWriteBarrier, but with args in DI and R8.
  1726  // Defined as ABIInternal since it does not use the stable Go ABI.
  1727  TEXT runtime·gcWriteBarrierR8<ABIInternal>(SB),NOSPLIT,$0
  1728  	XCHGQ R8, AX
  1729  	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1730  	XCHGQ R8, AX
  1731  	RET
  1732  
  1733  // gcWriteBarrierR9 is gcWriteBarrier, but with args in DI and R9.
  1734  // Defined as ABIInternal since it does not use the stable Go ABI.
  1735  TEXT runtime·gcWriteBarrierR9<ABIInternal>(SB),NOSPLIT,$0
  1736  	XCHGQ R9, AX
  1737  	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1738  	XCHGQ R9, AX
  1739  	RET
  1740  
  1741  DATA	debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
  1742  GLOBL	debugCallFrameTooLarge<>(SB), RODATA, $20	// Size duplicated below
  1743  
  1744  // debugCallV2 is the entry point for debugger-injected function
  1745  // calls on running goroutines. It informs the runtime that a
  1746  // debug call has been injected and creates a call frame for the
  1747  // debugger to fill in.
  1748  //
  1749  // To inject a function call, a debugger should:
  1750  // 1. Check that the goroutine is in state _Grunning and that
  1751  //    there are at least 256 bytes free on the stack.
  1752  // 2. Push the current PC on the stack (updating SP).
  1753  // 3. Write the desired argument frame size at SP-16 (using the SP
  1754  //    after step 2).
  1755  // 4. Save all machine registers (including flags and XMM reigsters)
  1756  //    so they can be restored later by the debugger.
  1757  // 5. Set the PC to debugCallV2 and resume execution.
  1758  //
  1759  // If the goroutine is in state _Grunnable, then it's not generally
  1760  // safe to inject a call because it may return out via other runtime
  1761  // operations. Instead, the debugger should unwind the stack to find
  1762  // the return to non-runtime code, add a temporary breakpoint there,
  1763  // and inject the call once that breakpoint is hit.
  1764  //
  1765  // If the goroutine is in any other state, it's not safe to inject a call.
  1766  //
  1767  // This function communicates back to the debugger by setting R12 and
  1768  // invoking INT3 to raise a breakpoint signal. See the comments in the
  1769  // implementation for the protocol the debugger is expected to
  1770  // follow. InjectDebugCall in the runtime tests demonstrates this protocol.
  1771  //
  1772  // The debugger must ensure that any pointers passed to the function
  1773  // obey escape analysis requirements. Specifically, it must not pass
  1774  // a stack pointer to an escaping argument. debugCallV2 cannot check
  1775  // this invariant.
  1776  //
  1777  // This is ABIInternal because Go code injects its PC directly into new
  1778  // goroutine stacks.
  1779  TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT,$152-0
  1780  	// Save all registers that may contain pointers so they can be
  1781  	// conservatively scanned.
  1782  	//
  1783  	// We can't do anything that might clobber any of these
  1784  	// registers before this.
  1785  	MOVQ	R15, r15-(14*8+8)(SP)
  1786  	MOVQ	R14, r14-(13*8+8)(SP)
  1787  	MOVQ	R13, r13-(12*8+8)(SP)
  1788  	MOVQ	R12, r12-(11*8+8)(SP)
  1789  	MOVQ	R11, r11-(10*8+8)(SP)
  1790  	MOVQ	R10, r10-(9*8+8)(SP)
  1791  	MOVQ	R9, r9-(8*8+8)(SP)
  1792  	MOVQ	R8, r8-(7*8+8)(SP)
  1793  	MOVQ	DI, di-(6*8+8)(SP)
  1794  	MOVQ	SI, si-(5*8+8)(SP)
  1795  	MOVQ	BP, bp-(4*8+8)(SP)
  1796  	MOVQ	BX, bx-(3*8+8)(SP)
  1797  	MOVQ	DX, dx-(2*8+8)(SP)
  1798  	// Save the frame size before we clobber it. Either of the last
  1799  	// saves could clobber this depending on whether there's a saved BP.
  1800  	MOVQ	frameSize-24(FP), DX	// aka -16(RSP) before prologue
  1801  	MOVQ	CX, cx-(1*8+8)(SP)
  1802  	MOVQ	AX, ax-(0*8+8)(SP)
  1803  
  1804  	// Save the argument frame size.
  1805  	MOVQ	DX, frameSize-128(SP)
  1806  
  1807  	// Perform a safe-point check.
  1808  	MOVQ	retpc-8(FP), AX	// Caller's PC
  1809  	MOVQ	AX, 0(SP)
  1810  	CALL	runtime·debugCallCheck(SB)
  1811  	MOVQ	8(SP), AX
  1812  	TESTQ	AX, AX
  1813  	JZ	good
  1814  	// The safety check failed. Put the reason string at the top
  1815  	// of the stack.
  1816  	MOVQ	AX, 0(SP)
  1817  	MOVQ	16(SP), AX
  1818  	MOVQ	AX, 8(SP)
  1819  	// Set R12 to 8 and invoke INT3. The debugger should get the
  1820  	// reason a call can't be injected from the top of the stack
  1821  	// and resume execution.
  1822  	MOVQ	$8, R12
  1823  	BYTE	$0xcc
  1824  	JMP	restore
  1825  
  1826  good:
  1827  	// Registers are saved and it's safe to make a call.
  1828  	// Open up a call frame, moving the stack if necessary.
  1829  	//
  1830  	// Once the frame is allocated, this will set R12 to 0 and
  1831  	// invoke INT3. The debugger should write the argument
  1832  	// frame for the call at SP, set up argument registers, push
  1833  	// the trapping PC on the stack, set the PC to the function to
  1834  	// call, set RDX to point to the closure (if a closure call),
  1835  	// and resume execution.
  1836  	//
  1837  	// If the function returns, this will set R12 to 1 and invoke
  1838  	// INT3. The debugger can then inspect any return value saved
  1839  	// on the stack at SP and in registers and resume execution again.
  1840  	//
  1841  	// If the function panics, this will set R12 to 2 and invoke INT3.
  1842  	// The interface{} value of the panic will be at SP. The debugger
  1843  	// can inspect the panic value and resume execution again.
  1844  #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE)	\
  1845  	CMPQ	AX, $MAXSIZE;			\
  1846  	JA	5(PC);				\
  1847  	MOVQ	$NAME(SB), AX;			\
  1848  	MOVQ	AX, 0(SP);			\
  1849  	CALL	runtime·debugCallWrap(SB);	\
  1850  	JMP	restore
  1851  
  1852  	MOVQ	frameSize-128(SP), AX
  1853  	DEBUG_CALL_DISPATCH(debugCall32<>, 32)
  1854  	DEBUG_CALL_DISPATCH(debugCall64<>, 64)
  1855  	DEBUG_CALL_DISPATCH(debugCall128<>, 128)
  1856  	DEBUG_CALL_DISPATCH(debugCall256<>, 256)
  1857  	DEBUG_CALL_DISPATCH(debugCall512<>, 512)
  1858  	DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
  1859  	DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
  1860  	DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
  1861  	DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
  1862  	DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
  1863  	DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
  1864  	DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
  1865  	// The frame size is too large. Report the error.
  1866  	MOVQ	$debugCallFrameTooLarge<>(SB), AX
  1867  	MOVQ	AX, 0(SP)
  1868  	MOVQ	$20, 8(SP) // length of debugCallFrameTooLarge string
  1869  	MOVQ	$8, R12
  1870  	BYTE	$0xcc
  1871  	JMP	restore
  1872  
  1873  restore:
  1874  	// Calls and failures resume here.
  1875  	//
  1876  	// Set R12 to 16 and invoke INT3. The debugger should restore
  1877  	// all registers except RIP and RSP and resume execution.
  1878  	MOVQ	$16, R12
  1879  	BYTE	$0xcc
  1880  	// We must not modify flags after this point.
  1881  
  1882  	// Restore pointer-containing registers, which may have been
  1883  	// modified from the debugger's copy by stack copying.
  1884  	MOVQ	ax-(0*8+8)(SP), AX
  1885  	MOVQ	cx-(1*8+8)(SP), CX
  1886  	MOVQ	dx-(2*8+8)(SP), DX
  1887  	MOVQ	bx-(3*8+8)(SP), BX
  1888  	MOVQ	bp-(4*8+8)(SP), BP
  1889  	MOVQ	si-(5*8+8)(SP), SI
  1890  	MOVQ	di-(6*8+8)(SP), DI
  1891  	MOVQ	r8-(7*8+8)(SP), R8
  1892  	MOVQ	r9-(8*8+8)(SP), R9
  1893  	MOVQ	r10-(9*8+8)(SP), R10
  1894  	MOVQ	r11-(10*8+8)(SP), R11
  1895  	MOVQ	r12-(11*8+8)(SP), R12
  1896  	MOVQ	r13-(12*8+8)(SP), R13
  1897  	MOVQ	r14-(13*8+8)(SP), R14
  1898  	MOVQ	r15-(14*8+8)(SP), R15
  1899  
  1900  	RET
  1901  
  1902  // runtime.debugCallCheck assumes that functions defined with the
  1903  // DEBUG_CALL_FN macro are safe points to inject calls.
  1904  #define DEBUG_CALL_FN(NAME,MAXSIZE)		\
  1905  TEXT NAME(SB),WRAPPER,$MAXSIZE-0;		\
  1906  	NO_LOCAL_POINTERS;			\
  1907  	MOVQ	$0, R12;				\
  1908  	BYTE	$0xcc;				\
  1909  	MOVQ	$1, R12;				\
  1910  	BYTE	$0xcc;				\
  1911  	RET
  1912  DEBUG_CALL_FN(debugCall32<>, 32)
  1913  DEBUG_CALL_FN(debugCall64<>, 64)
  1914  DEBUG_CALL_FN(debugCall128<>, 128)
  1915  DEBUG_CALL_FN(debugCall256<>, 256)
  1916  DEBUG_CALL_FN(debugCall512<>, 512)
  1917  DEBUG_CALL_FN(debugCall1024<>, 1024)
  1918  DEBUG_CALL_FN(debugCall2048<>, 2048)
  1919  DEBUG_CALL_FN(debugCall4096<>, 4096)
  1920  DEBUG_CALL_FN(debugCall8192<>, 8192)
  1921  DEBUG_CALL_FN(debugCall16384<>, 16384)
  1922  DEBUG_CALL_FN(debugCall32768<>, 32768)
  1923  DEBUG_CALL_FN(debugCall65536<>, 65536)
  1924  
  1925  // func debugCallPanicked(val interface{})
  1926  TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
  1927  	// Copy the panic value to the top of stack.
  1928  	MOVQ	val_type+0(FP), AX
  1929  	MOVQ	AX, 0(SP)
  1930  	MOVQ	val_data+8(FP), AX
  1931  	MOVQ	AX, 8(SP)
  1932  	MOVQ	$2, R12
  1933  	BYTE	$0xcc
  1934  	RET
  1935  
  1936  // Note: these functions use a special calling convention to save generated code space.
  1937  // Arguments are passed in registers, but the space for those arguments are allocated
  1938  // in the caller's stack frame. These stubs write the args into that stack space and
  1939  // then tail call to the corresponding runtime handler.
  1940  // The tail call makes these stubs disappear in backtraces.
  1941  // Defined as ABIInternal since they do not use the stack-based Go ABI.
  1942  TEXT runtime·panicIndex<ABIInternal>(SB),NOSPLIT,$0-16
  1943  	MOVQ	CX, BX
  1944  	JMP	runtime·goPanicIndex<ABIInternal>(SB)
  1945  TEXT runtime·panicIndexU<ABIInternal>(SB),NOSPLIT,$0-16
  1946  	MOVQ	CX, BX
  1947  	JMP	runtime·goPanicIndexU<ABIInternal>(SB)
  1948  TEXT runtime·panicSliceAlen<ABIInternal>(SB),NOSPLIT,$0-16
  1949  	MOVQ	CX, AX
  1950  	MOVQ	DX, BX
  1951  	JMP	runtime·goPanicSliceAlen<ABIInternal>(SB)
  1952  TEXT runtime·panicSliceAlenU<ABIInternal>(SB),NOSPLIT,$0-16
  1953  	MOVQ	CX, AX
  1954  	MOVQ	DX, BX
  1955  	JMP	runtime·goPanicSliceAlenU<ABIInternal>(SB)
  1956  TEXT runtime·panicSliceAcap<ABIInternal>(SB),NOSPLIT,$0-16
  1957  	MOVQ	CX, AX
  1958  	MOVQ	DX, BX
  1959  	JMP	runtime·goPanicSliceAcap<ABIInternal>(SB)
  1960  TEXT runtime·panicSliceAcapU<ABIInternal>(SB),NOSPLIT,$0-16
  1961  	MOVQ	CX, AX
  1962  	MOVQ	DX, BX
  1963  	JMP	runtime·goPanicSliceAcapU<ABIInternal>(SB)
  1964  TEXT runtime·panicSliceB<ABIInternal>(SB),NOSPLIT,$0-16
  1965  	MOVQ	CX, BX
  1966  	JMP	runtime·goPanicSliceB<ABIInternal>(SB)
  1967  TEXT runtime·panicSliceBU<ABIInternal>(SB),NOSPLIT,$0-16
  1968  	MOVQ	CX, BX
  1969  	JMP	runtime·goPanicSliceBU<ABIInternal>(SB)
  1970  TEXT runtime·panicSlice3Alen<ABIInternal>(SB),NOSPLIT,$0-16
  1971  	MOVQ	DX, AX
  1972  	JMP	runtime·goPanicSlice3Alen<ABIInternal>(SB)
  1973  TEXT runtime·panicSlice3AlenU<ABIInternal>(SB),NOSPLIT,$0-16
  1974  	MOVQ	DX, AX
  1975  	JMP	runtime·goPanicSlice3AlenU<ABIInternal>(SB)
  1976  TEXT runtime·panicSlice3Acap<ABIInternal>(SB),NOSPLIT,$0-16
  1977  	MOVQ	DX, AX
  1978  	JMP	runtime·goPanicSlice3Acap<ABIInternal>(SB)
  1979  TEXT runtime·panicSlice3AcapU<ABIInternal>(SB),NOSPLIT,$0-16
  1980  	MOVQ	DX, AX
  1981  	JMP	runtime·goPanicSlice3AcapU<ABIInternal>(SB)
  1982  TEXT runtime·panicSlice3B<ABIInternal>(SB),NOSPLIT,$0-16
  1983  	MOVQ	CX, AX
  1984  	MOVQ	DX, BX
  1985  	JMP	runtime·goPanicSlice3B<ABIInternal>(SB)
  1986  TEXT runtime·panicSlice3BU<ABIInternal>(SB),NOSPLIT,$0-16
  1987  	MOVQ	CX, AX
  1988  	MOVQ	DX, BX
  1989  	JMP	runtime·goPanicSlice3BU<ABIInternal>(SB)
  1990  TEXT runtime·panicSlice3C<ABIInternal>(SB),NOSPLIT,$0-16
  1991  	MOVQ	CX, BX
  1992  	JMP	runtime·goPanicSlice3C<ABIInternal>(SB)
  1993  TEXT runtime·panicSlice3CU<ABIInternal>(SB),NOSPLIT,$0-16
  1994  	MOVQ	CX, BX
  1995  	JMP	runtime·goPanicSlice3CU<ABIInternal>(SB)
  1996  TEXT runtime·panicSliceConvert<ABIInternal>(SB),NOSPLIT,$0-16
  1997  	MOVQ	DX, AX
  1998  	JMP	runtime·goPanicSliceConvert<ABIInternal>(SB)
  1999  
  2000  #ifdef GOOS_android
  2001  // Use the free TLS_SLOT_APP slot #2 on Android Q.
  2002  // Earlier androids are set up in gcc_android.c.
  2003  DATA runtime·tls_g+0(SB)/8, $16
  2004  GLOBL runtime·tls_g+0(SB), NOPTR, $8
  2005  #endif
  2006  
  2007  // The compiler and assembler's -spectre=ret mode rewrites
  2008  // all indirect CALL AX / JMP AX instructions to be
  2009  // CALL retpolineAX / JMP retpolineAX.
  2010  // See https://support.google.com/faqs/answer/7625886.
  2011  #define RETPOLINE(reg) \
  2012  	/*   CALL setup */     BYTE $0xE8; BYTE $(2+2); BYTE $0; BYTE $0; BYTE $0;	\
  2013  	/* nospec: */									\
  2014  	/*   PAUSE */           BYTE $0xF3; BYTE $0x90;					\
  2015  	/*   JMP nospec */      BYTE $0xEB; BYTE $-(2+2);				\
  2016  	/* setup: */									\
  2017  	/*   MOVQ AX, 0(SP) */  BYTE $0x48|((reg&8)>>1); BYTE $0x89;			\
  2018  	                        BYTE $0x04|((reg&7)<<3); BYTE $0x24;			\
  2019  	/*   RET */             BYTE $0xC3
  2020  
  2021  TEXT runtime·retpolineAX(SB),NOSPLIT,$0; RETPOLINE(0)
  2022  TEXT runtime·retpolineCX(SB),NOSPLIT,$0; RETPOLINE(1)
  2023  TEXT runtime·retpolineDX(SB),NOSPLIT,$0; RETPOLINE(2)
  2024  TEXT runtime·retpolineBX(SB),NOSPLIT,$0; RETPOLINE(3)
  2025  /* SP is 4, can't happen / magic encodings */
  2026  TEXT runtime·retpolineBP(SB),NOSPLIT,$0; RETPOLINE(5)
  2027  TEXT runtime·retpolineSI(SB),NOSPLIT,$0; RETPOLINE(6)
  2028  TEXT runtime·retpolineDI(SB),NOSPLIT,$0; RETPOLINE(7)
  2029  TEXT runtime·retpolineR8(SB),NOSPLIT,$0; RETPOLINE(8)
  2030  TEXT runtime·retpolineR9(SB),NOSPLIT,$0; RETPOLINE(9)
  2031  TEXT runtime·retpolineR10(SB),NOSPLIT,$0; RETPOLINE(10)
  2032  TEXT runtime·retpolineR11(SB),NOSPLIT,$0; RETPOLINE(11)
  2033  TEXT runtime·retpolineR12(SB),NOSPLIT,$0; RETPOLINE(12)
  2034  TEXT runtime·retpolineR13(SB),NOSPLIT,$0; RETPOLINE(13)
  2035  TEXT runtime·retpolineR14(SB),NOSPLIT,$0; RETPOLINE(14)
  2036  TEXT runtime·retpolineR15(SB),NOSPLIT,$0; RETPOLINE(15)
  2037  

View as plain text