Text file
src/runtime/asm_386.s
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "go_tls.h"
7 #include "funcdata.h"
8 #include "textflag.h"
9
10 // _rt0_386 is common startup code for most 386 systems when using
11 // internal linking. This is the entry point for the program from the
12 // kernel for an ordinary -buildmode=exe program. The stack holds the
13 // number of arguments and the C-style argv.
14 TEXT _rt0_386(SB),NOSPLIT,$8
15 MOVL 8(SP), AX // argc
16 LEAL 12(SP), BX // argv
17 MOVL AX, 0(SP)
18 MOVL BX, 4(SP)
19 JMP runtime·rt0_go(SB)
20
21 // _rt0_386_lib is common startup code for most 386 systems when
22 // using -buildmode=c-archive or -buildmode=c-shared. The linker will
23 // arrange to invoke this function as a global constructor (for
24 // c-archive) or when the shared library is loaded (for c-shared).
25 // We expect argc and argv to be passed on the stack following the
26 // usual C ABI.
27 TEXT _rt0_386_lib(SB),NOSPLIT,$0
28 PUSHL BP
29 MOVL SP, BP
30 PUSHL BX
31 PUSHL SI
32 PUSHL DI
33
34 MOVL 8(BP), AX
35 MOVL AX, _rt0_386_lib_argc<>(SB)
36 MOVL 12(BP), AX
37 MOVL AX, _rt0_386_lib_argv<>(SB)
38
39 // Synchronous initialization.
40 CALL runtime·libpreinit(SB)
41
42 SUBL $8, SP
43
44 // Create a new thread to do the runtime initialization.
45 MOVL _cgo_sys_thread_create(SB), AX
46 TESTL AX, AX
47 JZ nocgo
48
49 // Align stack to call C function.
50 // We moved SP to BP above, but BP was clobbered by the libpreinit call.
51 MOVL SP, BP
52 ANDL $~15, SP
53
54 MOVL $_rt0_386_lib_go(SB), BX
55 MOVL BX, 0(SP)
56 MOVL $0, 4(SP)
57
58 CALL AX
59
60 MOVL BP, SP
61
62 JMP restore
63
64 nocgo:
65 MOVL $0x800000, 0(SP) // stacksize = 8192KB
66 MOVL $_rt0_386_lib_go(SB), AX
67 MOVL AX, 4(SP) // fn
68 CALL runtime·newosproc0(SB)
69
70 restore:
71 ADDL $8, SP
72 POPL DI
73 POPL SI
74 POPL BX
75 POPL BP
76 RET
77
78 // _rt0_386_lib_go initializes the Go runtime.
79 // This is started in a separate thread by _rt0_386_lib.
80 TEXT _rt0_386_lib_go(SB),NOSPLIT,$8
81 MOVL _rt0_386_lib_argc<>(SB), AX
82 MOVL AX, 0(SP)
83 MOVL _rt0_386_lib_argv<>(SB), AX
84 MOVL AX, 4(SP)
85 JMP runtime·rt0_go(SB)
86
87 DATA _rt0_386_lib_argc<>(SB)/4, $0
88 GLOBL _rt0_386_lib_argc<>(SB),NOPTR, $4
89 DATA _rt0_386_lib_argv<>(SB)/4, $0
90 GLOBL _rt0_386_lib_argv<>(SB),NOPTR, $4
91
92 TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0
93 // Copy arguments forward on an even stack.
94 // Users of this function jump to it, they don't call it.
95 MOVL 0(SP), AX
96 MOVL 4(SP), BX
97 SUBL $128, SP // plenty of scratch
98 ANDL $~15, SP
99 MOVL AX, 120(SP) // save argc, argv away
100 MOVL BX, 124(SP)
101
102 // set default stack bounds.
103 // _cgo_init may update stackguard.
104 MOVL $runtime·g0(SB), BP
105 LEAL (-64*1024+104)(SP), BX
106 MOVL BX, g_stackguard0(BP)
107 MOVL BX, g_stackguard1(BP)
108 MOVL BX, (g_stack+stack_lo)(BP)
109 MOVL SP, (g_stack+stack_hi)(BP)
110
111 // find out information about the processor we're on
112 // first see if CPUID instruction is supported.
113 PUSHFL
114 PUSHFL
115 XORL $(1<<21), 0(SP) // flip ID bit
116 POPFL
117 PUSHFL
118 POPL AX
119 XORL 0(SP), AX
120 POPFL // restore EFLAGS
121 TESTL $(1<<21), AX
122 JNE has_cpuid
123
124 bad_proc: // show that the program requires MMX.
125 MOVL $2, 0(SP)
126 MOVL $bad_proc_msg<>(SB), 4(SP)
127 MOVL $0x3d, 8(SP)
128 CALL runtime·write(SB)
129 MOVL $1, 0(SP)
130 CALL runtime·exit(SB)
131 CALL runtime·abort(SB)
132
133 has_cpuid:
134 MOVL $0, AX
135 CPUID
136 MOVL AX, SI
137 CMPL AX, $0
138 JE nocpuinfo
139
140 CMPL BX, $0x756E6547 // "Genu"
141 JNE notintel
142 CMPL DX, $0x49656E69 // "ineI"
143 JNE notintel
144 CMPL CX, $0x6C65746E // "ntel"
145 JNE notintel
146 MOVB $1, runtime·isIntel(SB)
147 notintel:
148
149 // Load EAX=1 cpuid flags
150 MOVL $1, AX
151 CPUID
152 MOVL CX, DI // Move to global variable clobbers CX when generating PIC
153 MOVL AX, runtime·processorVersionInfo(SB)
154
155 // Check for MMX support
156 TESTL $(1<<23), DX // MMX
157 JZ bad_proc
158
159 nocpuinfo:
160 // if there is an _cgo_init, call it to let it
161 // initialize and to set up GS. if not,
162 // we set up GS ourselves.
163 MOVL _cgo_init(SB), AX
164 TESTL AX, AX
165 JZ needtls
166 #ifdef GOOS_android
167 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
168 // Compensate for tls_g (+8).
169 MOVL -8(TLS), BX
170 MOVL BX, 12(SP)
171 MOVL $runtime·tls_g(SB), 8(SP) // arg 3: &tls_g
172 #else
173 MOVL $0, BX
174 MOVL BX, 12(SP) // arg 3,4: not used when using platform's TLS
175 MOVL BX, 8(SP)
176 #endif
177 MOVL $setg_gcc<>(SB), BX
178 MOVL BX, 4(SP) // arg 2: setg_gcc
179 MOVL BP, 0(SP) // arg 1: g0
180 CALL AX
181
182 // update stackguard after _cgo_init
183 MOVL $runtime·g0(SB), CX
184 MOVL (g_stack+stack_lo)(CX), AX
185 ADDL $const__StackGuard, AX
186 MOVL AX, g_stackguard0(CX)
187 MOVL AX, g_stackguard1(CX)
188
189 #ifndef GOOS_windows
190 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
191 JMP ok
192 #endif
193 needtls:
194 #ifdef GOOS_openbsd
195 // skip runtime·ldt0setup(SB) and tls test on OpenBSD in all cases
196 JMP ok
197 #endif
198 #ifdef GOOS_plan9
199 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
200 JMP ok
201 #endif
202
203 // set up %gs
204 CALL ldt0setup<>(SB)
205
206 // store through it, to make sure it works
207 get_tls(BX)
208 MOVL $0x123, g(BX)
209 MOVL runtime·m0+m_tls(SB), AX
210 CMPL AX, $0x123
211 JEQ ok
212 MOVL AX, 0 // abort
213 ok:
214 // set up m and g "registers"
215 get_tls(BX)
216 LEAL runtime·g0(SB), DX
217 MOVL DX, g(BX)
218 LEAL runtime·m0(SB), AX
219
220 // save m->g0 = g0
221 MOVL DX, m_g0(AX)
222 // save g0->m = m0
223 MOVL AX, g_m(DX)
224
225 CALL runtime·emptyfunc(SB) // fault if stack check is wrong
226
227 // convention is D is always cleared
228 CLD
229
230 CALL runtime·check(SB)
231
232 // saved argc, argv
233 MOVL 120(SP), AX
234 MOVL AX, 0(SP)
235 MOVL 124(SP), AX
236 MOVL AX, 4(SP)
237 CALL runtime·args(SB)
238 CALL runtime·osinit(SB)
239 CALL runtime·schedinit(SB)
240
241 // create a new goroutine to start program
242 PUSHL $runtime·mainPC(SB) // entry
243 CALL runtime·newproc(SB)
244 POPL AX
245
246 // start this M
247 CALL runtime·mstart(SB)
248
249 CALL runtime·abort(SB)
250 RET
251
252 DATA bad_proc_msg<>+0x00(SB)/61, $"This program can only be run on processors with MMX support.\n"
253 GLOBL bad_proc_msg<>(SB), RODATA, $61
254
255 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB)
256 GLOBL runtime·mainPC(SB),RODATA,$4
257
258 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
259 INT $3
260 RET
261
262 TEXT runtime·asminit(SB),NOSPLIT,$0-0
263 // Linux and MinGW start the FPU in extended double precision.
264 // Other operating systems use double precision.
265 // Change to double precision to match them,
266 // and to match other hardware that only has double.
267 FLDCW runtime·controlWord64(SB)
268 RET
269
270 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
271 CALL runtime·mstart0(SB)
272 RET // not reached
273
274 /*
275 * go-routine
276 */
277
278 // void gogo(Gobuf*)
279 // restore state from Gobuf; longjmp
280 TEXT runtime·gogo(SB), NOSPLIT, $0-4
281 MOVL buf+0(FP), BX // gobuf
282 MOVL gobuf_g(BX), DX
283 MOVL 0(DX), CX // make sure g != nil
284 JMP gogo<>(SB)
285
286 TEXT gogo<>(SB), NOSPLIT, $0
287 get_tls(CX)
288 MOVL DX, g(CX)
289 MOVL gobuf_sp(BX), SP // restore SP
290 MOVL gobuf_ret(BX), AX
291 MOVL gobuf_ctxt(BX), DX
292 MOVL $0, gobuf_sp(BX) // clear to help garbage collector
293 MOVL $0, gobuf_ret(BX)
294 MOVL $0, gobuf_ctxt(BX)
295 MOVL gobuf_pc(BX), BX
296 JMP BX
297
298 // func mcall(fn func(*g))
299 // Switch to m->g0's stack, call fn(g).
300 // Fn must never return. It should gogo(&g->sched)
301 // to keep running g.
302 TEXT runtime·mcall(SB), NOSPLIT, $0-4
303 MOVL fn+0(FP), DI
304
305 get_tls(DX)
306 MOVL g(DX), AX // save state in g->sched
307 MOVL 0(SP), BX // caller's PC
308 MOVL BX, (g_sched+gobuf_pc)(AX)
309 LEAL fn+0(FP), BX // caller's SP
310 MOVL BX, (g_sched+gobuf_sp)(AX)
311
312 // switch to m->g0 & its stack, call fn
313 MOVL g(DX), BX
314 MOVL g_m(BX), BX
315 MOVL m_g0(BX), SI
316 CMPL SI, AX // if g == m->g0 call badmcall
317 JNE 3(PC)
318 MOVL $runtime·badmcall(SB), AX
319 JMP AX
320 MOVL SI, g(DX) // g = m->g0
321 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
322 PUSHL AX
323 MOVL DI, DX
324 MOVL 0(DI), DI
325 CALL DI
326 POPL AX
327 MOVL $runtime·badmcall2(SB), AX
328 JMP AX
329 RET
330
331 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
332 // of the G stack. We need to distinguish the routine that
333 // lives at the bottom of the G stack from the one that lives
334 // at the top of the system stack because the one at the top of
335 // the system stack terminates the stack walk (see topofstack()).
336 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
337 RET
338
339 // func systemstack(fn func())
340 TEXT runtime·systemstack(SB), NOSPLIT, $0-4
341 MOVL fn+0(FP), DI // DI = fn
342 get_tls(CX)
343 MOVL g(CX), AX // AX = g
344 MOVL g_m(AX), BX // BX = m
345
346 CMPL AX, m_gsignal(BX)
347 JEQ noswitch
348
349 MOVL m_g0(BX), DX // DX = g0
350 CMPL AX, DX
351 JEQ noswitch
352
353 CMPL AX, m_curg(BX)
354 JNE bad
355
356 // switch stacks
357 // save our state in g->sched. Pretend to
358 // be systemstack_switch if the G stack is scanned.
359 CALL gosave_systemstack_switch<>(SB)
360
361 // switch to g0
362 get_tls(CX)
363 MOVL DX, g(CX)
364 MOVL (g_sched+gobuf_sp)(DX), BX
365 MOVL BX, SP
366
367 // call target function
368 MOVL DI, DX
369 MOVL 0(DI), DI
370 CALL DI
371
372 // switch back to g
373 get_tls(CX)
374 MOVL g(CX), AX
375 MOVL g_m(AX), BX
376 MOVL m_curg(BX), AX
377 MOVL AX, g(CX)
378 MOVL (g_sched+gobuf_sp)(AX), SP
379 MOVL $0, (g_sched+gobuf_sp)(AX)
380 RET
381
382 noswitch:
383 // already on system stack; tail call the function
384 // Using a tail call here cleans up tracebacks since we won't stop
385 // at an intermediate systemstack.
386 MOVL DI, DX
387 MOVL 0(DI), DI
388 JMP DI
389
390 bad:
391 // Bad: g is not gsignal, not g0, not curg. What is it?
392 // Hide call from linker nosplit analysis.
393 MOVL $runtime·badsystemstack(SB), AX
394 CALL AX
395 INT $3
396
397 /*
398 * support for morestack
399 */
400
401 // Called during function prolog when more stack is needed.
402 //
403 // The traceback routines see morestack on a g0 as being
404 // the top of a stack (for example, morestack calling newstack
405 // calling the scheduler calling newm calling gc), so we must
406 // record an argument size. For that purpose, it has no arguments.
407 TEXT runtime·morestack(SB),NOSPLIT,$0-0
408 // Cannot grow scheduler stack (m->g0).
409 get_tls(CX)
410 MOVL g(CX), BX
411 MOVL g_m(BX), BX
412 MOVL m_g0(BX), SI
413 CMPL g(CX), SI
414 JNE 3(PC)
415 CALL runtime·badmorestackg0(SB)
416 CALL runtime·abort(SB)
417
418 // Cannot grow signal stack.
419 MOVL m_gsignal(BX), SI
420 CMPL g(CX), SI
421 JNE 3(PC)
422 CALL runtime·badmorestackgsignal(SB)
423 CALL runtime·abort(SB)
424
425 // Called from f.
426 // Set m->morebuf to f's caller.
427 NOP SP // tell vet SP changed - stop checking offsets
428 MOVL 4(SP), DI // f's caller's PC
429 MOVL DI, (m_morebuf+gobuf_pc)(BX)
430 LEAL 8(SP), CX // f's caller's SP
431 MOVL CX, (m_morebuf+gobuf_sp)(BX)
432 get_tls(CX)
433 MOVL g(CX), SI
434 MOVL SI, (m_morebuf+gobuf_g)(BX)
435
436 // Set g->sched to context in f.
437 MOVL 0(SP), AX // f's PC
438 MOVL AX, (g_sched+gobuf_pc)(SI)
439 LEAL 4(SP), AX // f's SP
440 MOVL AX, (g_sched+gobuf_sp)(SI)
441 MOVL DX, (g_sched+gobuf_ctxt)(SI)
442
443 // Call newstack on m->g0's stack.
444 MOVL m_g0(BX), BP
445 MOVL BP, g(CX)
446 MOVL (g_sched+gobuf_sp)(BP), AX
447 MOVL -4(AX), BX // fault if CALL would, before smashing SP
448 MOVL AX, SP
449 CALL runtime·newstack(SB)
450 CALL runtime·abort(SB) // crash if newstack returns
451 RET
452
453 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
454 MOVL $0, DX
455 JMP runtime·morestack(SB)
456
457 // reflectcall: call a function with the given argument list
458 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
459 // we don't have variable-sized frames, so we use a small number
460 // of constant-sized-frame functions to encode a few bits of size in the pc.
461 // Caution: ugly multiline assembly macros in your future!
462
463 #define DISPATCH(NAME,MAXSIZE) \
464 CMPL CX, $MAXSIZE; \
465 JA 3(PC); \
466 MOVL $NAME(SB), AX; \
467 JMP AX
468 // Note: can't just "JMP NAME(SB)" - bad inlining results.
469
470 TEXT ·reflectcall(SB), NOSPLIT, $0-28
471 MOVL frameSize+20(FP), CX
472 DISPATCH(runtime·call16, 16)
473 DISPATCH(runtime·call32, 32)
474 DISPATCH(runtime·call64, 64)
475 DISPATCH(runtime·call128, 128)
476 DISPATCH(runtime·call256, 256)
477 DISPATCH(runtime·call512, 512)
478 DISPATCH(runtime·call1024, 1024)
479 DISPATCH(runtime·call2048, 2048)
480 DISPATCH(runtime·call4096, 4096)
481 DISPATCH(runtime·call8192, 8192)
482 DISPATCH(runtime·call16384, 16384)
483 DISPATCH(runtime·call32768, 32768)
484 DISPATCH(runtime·call65536, 65536)
485 DISPATCH(runtime·call131072, 131072)
486 DISPATCH(runtime·call262144, 262144)
487 DISPATCH(runtime·call524288, 524288)
488 DISPATCH(runtime·call1048576, 1048576)
489 DISPATCH(runtime·call2097152, 2097152)
490 DISPATCH(runtime·call4194304, 4194304)
491 DISPATCH(runtime·call8388608, 8388608)
492 DISPATCH(runtime·call16777216, 16777216)
493 DISPATCH(runtime·call33554432, 33554432)
494 DISPATCH(runtime·call67108864, 67108864)
495 DISPATCH(runtime·call134217728, 134217728)
496 DISPATCH(runtime·call268435456, 268435456)
497 DISPATCH(runtime·call536870912, 536870912)
498 DISPATCH(runtime·call1073741824, 1073741824)
499 MOVL $runtime·badreflectcall(SB), AX
500 JMP AX
501
502 #define CALLFN(NAME,MAXSIZE) \
503 TEXT NAME(SB), WRAPPER, $MAXSIZE-28; \
504 NO_LOCAL_POINTERS; \
505 /* copy arguments to stack */ \
506 MOVL stackArgs+8(FP), SI; \
507 MOVL stackArgsSize+12(FP), CX; \
508 MOVL SP, DI; \
509 REP;MOVSB; \
510 /* call function */ \
511 MOVL f+4(FP), DX; \
512 MOVL (DX), AX; \
513 PCDATA $PCDATA_StackMapIndex, $0; \
514 CALL AX; \
515 /* copy return values back */ \
516 MOVL stackArgsType+0(FP), DX; \
517 MOVL stackArgs+8(FP), DI; \
518 MOVL stackArgsSize+12(FP), CX; \
519 MOVL stackRetOffset+16(FP), BX; \
520 MOVL SP, SI; \
521 ADDL BX, DI; \
522 ADDL BX, SI; \
523 SUBL BX, CX; \
524 CALL callRet<>(SB); \
525 RET
526
527 // callRet copies return values back at the end of call*. This is a
528 // separate function so it can allocate stack space for the arguments
529 // to reflectcallmove. It does not follow the Go ABI; it expects its
530 // arguments in registers.
531 TEXT callRet<>(SB), NOSPLIT, $20-0
532 MOVL DX, 0(SP)
533 MOVL DI, 4(SP)
534 MOVL SI, 8(SP)
535 MOVL CX, 12(SP)
536 MOVL $0, 16(SP)
537 CALL runtime·reflectcallmove(SB)
538 RET
539
540 CALLFN(·call16, 16)
541 CALLFN(·call32, 32)
542 CALLFN(·call64, 64)
543 CALLFN(·call128, 128)
544 CALLFN(·call256, 256)
545 CALLFN(·call512, 512)
546 CALLFN(·call1024, 1024)
547 CALLFN(·call2048, 2048)
548 CALLFN(·call4096, 4096)
549 CALLFN(·call8192, 8192)
550 CALLFN(·call16384, 16384)
551 CALLFN(·call32768, 32768)
552 CALLFN(·call65536, 65536)
553 CALLFN(·call131072, 131072)
554 CALLFN(·call262144, 262144)
555 CALLFN(·call524288, 524288)
556 CALLFN(·call1048576, 1048576)
557 CALLFN(·call2097152, 2097152)
558 CALLFN(·call4194304, 4194304)
559 CALLFN(·call8388608, 8388608)
560 CALLFN(·call16777216, 16777216)
561 CALLFN(·call33554432, 33554432)
562 CALLFN(·call67108864, 67108864)
563 CALLFN(·call134217728, 134217728)
564 CALLFN(·call268435456, 268435456)
565 CALLFN(·call536870912, 536870912)
566 CALLFN(·call1073741824, 1073741824)
567
568 TEXT runtime·procyield(SB),NOSPLIT,$0-0
569 MOVL cycles+0(FP), AX
570 again:
571 PAUSE
572 SUBL $1, AX
573 JNZ again
574 RET
575
576 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
577 // Stores are already ordered on x86, so this is just a
578 // compile barrier.
579 RET
580
581 // Save state of caller into g->sched,
582 // but using fake PC from systemstack_switch.
583 // Must only be called from functions with no locals ($0)
584 // or else unwinding from systemstack_switch is incorrect.
585 TEXT gosave_systemstack_switch<>(SB),NOSPLIT,$0
586 PUSHL AX
587 PUSHL BX
588 get_tls(BX)
589 MOVL g(BX), BX
590 LEAL arg+0(FP), AX
591 MOVL AX, (g_sched+gobuf_sp)(BX)
592 MOVL $runtime·systemstack_switch(SB), AX
593 MOVL AX, (g_sched+gobuf_pc)(BX)
594 MOVL $0, (g_sched+gobuf_ret)(BX)
595 // Assert ctxt is zero. See func save.
596 MOVL (g_sched+gobuf_ctxt)(BX), AX
597 TESTL AX, AX
598 JZ 2(PC)
599 CALL runtime·abort(SB)
600 POPL BX
601 POPL AX
602 RET
603
604 // func asmcgocall_no_g(fn, arg unsafe.Pointer)
605 // Call fn(arg) aligned appropriately for the gcc ABI.
606 // Called on a system stack, and there may be no g yet (during needm).
607 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-8
608 MOVL fn+0(FP), AX
609 MOVL arg+4(FP), BX
610 MOVL SP, DX
611 SUBL $32, SP
612 ANDL $~15, SP // alignment, perhaps unnecessary
613 MOVL DX, 8(SP) // save old SP
614 MOVL BX, 0(SP) // first argument in x86-32 ABI
615 CALL AX
616 MOVL 8(SP), DX
617 MOVL DX, SP
618 RET
619
620 // func asmcgocall(fn, arg unsafe.Pointer) int32
621 // Call fn(arg) on the scheduler stack,
622 // aligned appropriately for the gcc ABI.
623 // See cgocall.go for more details.
624 TEXT ·asmcgocall(SB),NOSPLIT,$0-12
625 MOVL fn+0(FP), AX
626 MOVL arg+4(FP), BX
627
628 MOVL SP, DX
629
630 // Figure out if we need to switch to m->g0 stack.
631 // We get called to create new OS threads too, and those
632 // come in on the m->g0 stack already. Or we might already
633 // be on the m->gsignal stack.
634 get_tls(CX)
635 MOVL g(CX), DI
636 CMPL DI, $0
637 JEQ nosave // Don't even have a G yet.
638 MOVL g_m(DI), BP
639 CMPL DI, m_gsignal(BP)
640 JEQ noswitch
641 MOVL m_g0(BP), SI
642 CMPL DI, SI
643 JEQ noswitch
644 CALL gosave_systemstack_switch<>(SB)
645 get_tls(CX)
646 MOVL SI, g(CX)
647 MOVL (g_sched+gobuf_sp)(SI), SP
648
649 noswitch:
650 // Now on a scheduling stack (a pthread-created stack).
651 SUBL $32, SP
652 ANDL $~15, SP // alignment, perhaps unnecessary
653 MOVL DI, 8(SP) // save g
654 MOVL (g_stack+stack_hi)(DI), DI
655 SUBL DX, DI
656 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
657 MOVL BX, 0(SP) // first argument in x86-32 ABI
658 CALL AX
659
660 // Restore registers, g, stack pointer.
661 get_tls(CX)
662 MOVL 8(SP), DI
663 MOVL (g_stack+stack_hi)(DI), SI
664 SUBL 4(SP), SI
665 MOVL DI, g(CX)
666 MOVL SI, SP
667
668 MOVL AX, ret+8(FP)
669 RET
670 nosave:
671 // Now on a scheduling stack (a pthread-created stack).
672 SUBL $32, SP
673 ANDL $~15, SP // alignment, perhaps unnecessary
674 MOVL DX, 4(SP) // save original stack pointer
675 MOVL BX, 0(SP) // first argument in x86-32 ABI
676 CALL AX
677
678 MOVL 4(SP), CX // restore original stack pointer
679 MOVL CX, SP
680 MOVL AX, ret+8(FP)
681 RET
682
683 // cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
684 // See cgocall.go for more details.
685 TEXT ·cgocallback(SB),NOSPLIT,$12-12 // Frame size must match commented places below
686 NO_LOCAL_POINTERS
687
688 // If g is nil, Go did not create the current thread.
689 // Call needm to obtain one for temporary use.
690 // In this case, we're running on the thread stack, so there's
691 // lots of space, but the linker doesn't know. Hide the call from
692 // the linker analysis by using an indirect call through AX.
693 get_tls(CX)
694 #ifdef GOOS_windows
695 MOVL $0, BP
696 CMPL CX, $0
697 JEQ 2(PC) // TODO
698 #endif
699 MOVL g(CX), BP
700 CMPL BP, $0
701 JEQ needm
702 MOVL g_m(BP), BP
703 MOVL BP, savedm-4(SP) // saved copy of oldm
704 JMP havem
705 needm:
706 MOVL $runtime·needm(SB), AX
707 CALL AX
708 MOVL $0, savedm-4(SP) // dropm on return
709 get_tls(CX)
710 MOVL g(CX), BP
711 MOVL g_m(BP), BP
712
713 // Set m->sched.sp = SP, so that if a panic happens
714 // during the function we are about to execute, it will
715 // have a valid SP to run on the g0 stack.
716 // The next few lines (after the havem label)
717 // will save this SP onto the stack and then write
718 // the same SP back to m->sched.sp. That seems redundant,
719 // but if an unrecovered panic happens, unwindm will
720 // restore the g->sched.sp from the stack location
721 // and then systemstack will try to use it. If we don't set it here,
722 // that restored SP will be uninitialized (typically 0) and
723 // will not be usable.
724 MOVL m_g0(BP), SI
725 MOVL SP, (g_sched+gobuf_sp)(SI)
726
727 havem:
728 // Now there's a valid m, and we're running on its m->g0.
729 // Save current m->g0->sched.sp on stack and then set it to SP.
730 // Save current sp in m->g0->sched.sp in preparation for
731 // switch back to m->curg stack.
732 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
733 MOVL m_g0(BP), SI
734 MOVL (g_sched+gobuf_sp)(SI), AX
735 MOVL AX, 0(SP)
736 MOVL SP, (g_sched+gobuf_sp)(SI)
737
738 // Switch to m->curg stack and call runtime.cgocallbackg.
739 // Because we are taking over the execution of m->curg
740 // but *not* resuming what had been running, we need to
741 // save that information (m->curg->sched) so we can restore it.
742 // We can restore m->curg->sched.sp easily, because calling
743 // runtime.cgocallbackg leaves SP unchanged upon return.
744 // To save m->curg->sched.pc, we push it onto the curg stack and
745 // open a frame the same size as cgocallback's g0 frame.
746 // Once we switch to the curg stack, the pushed PC will appear
747 // to be the return PC of cgocallback, so that the traceback
748 // will seamlessly trace back into the earlier calls.
749 MOVL m_curg(BP), SI
750 MOVL SI, g(CX)
751 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
752 MOVL (g_sched+gobuf_pc)(SI), BP
753 MOVL BP, -4(DI) // "push" return PC on the g stack
754 // Gather our arguments into registers.
755 MOVL fn+0(FP), AX
756 MOVL frame+4(FP), BX
757 MOVL ctxt+8(FP), CX
758 LEAL -(4+12)(DI), SP // Must match declared frame size
759 MOVL AX, 0(SP)
760 MOVL BX, 4(SP)
761 MOVL CX, 8(SP)
762 CALL runtime·cgocallbackg(SB)
763
764 // Restore g->sched (== m->curg->sched) from saved values.
765 get_tls(CX)
766 MOVL g(CX), SI
767 MOVL 12(SP), BP // Must match declared frame size
768 MOVL BP, (g_sched+gobuf_pc)(SI)
769 LEAL (12+4)(SP), DI // Must match declared frame size
770 MOVL DI, (g_sched+gobuf_sp)(SI)
771
772 // Switch back to m->g0's stack and restore m->g0->sched.sp.
773 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
774 // so we do not have to restore it.)
775 MOVL g(CX), BP
776 MOVL g_m(BP), BP
777 MOVL m_g0(BP), SI
778 MOVL SI, g(CX)
779 MOVL (g_sched+gobuf_sp)(SI), SP
780 MOVL 0(SP), AX
781 MOVL AX, (g_sched+gobuf_sp)(SI)
782
783 // If the m on entry was nil, we called needm above to borrow an m
784 // for the duration of the call. Since the call is over, return it with dropm.
785 MOVL savedm-4(SP), DX
786 CMPL DX, $0
787 JNE 3(PC)
788 MOVL $runtime·dropm(SB), AX
789 CALL AX
790
791 // Done!
792 RET
793
794 // void setg(G*); set g. for use by needm.
795 TEXT runtime·setg(SB), NOSPLIT, $0-4
796 MOVL gg+0(FP), BX
797 #ifdef GOOS_windows
798 CMPL BX, $0
799 JNE settls
800 MOVL $0, 0x14(FS)
801 RET
802 settls:
803 MOVL g_m(BX), AX
804 LEAL m_tls(AX), AX
805 MOVL AX, 0x14(FS)
806 #endif
807 get_tls(CX)
808 MOVL BX, g(CX)
809 RET
810
811 // void setg_gcc(G*); set g. for use by gcc
812 TEXT setg_gcc<>(SB), NOSPLIT, $0
813 get_tls(AX)
814 MOVL gg+0(FP), DX
815 MOVL DX, g(AX)
816 RET
817
818 TEXT runtime·abort(SB),NOSPLIT,$0-0
819 INT $3
820 loop:
821 JMP loop
822
823 // check that SP is in range [g->stack.lo, g->stack.hi)
824 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
825 get_tls(CX)
826 MOVL g(CX), AX
827 CMPL (g_stack+stack_hi)(AX), SP
828 JHI 2(PC)
829 CALL runtime·abort(SB)
830 CMPL SP, (g_stack+stack_lo)(AX)
831 JHI 2(PC)
832 CALL runtime·abort(SB)
833 RET
834
835 // func cputicks() int64
836 TEXT runtime·cputicks(SB),NOSPLIT,$0-8
837 // LFENCE/MFENCE instruction support is dependent on SSE2.
838 // When no SSE2 support is present do not enforce any serialization
839 // since using CPUID to serialize the instruction stream is
840 // very costly.
841 #ifdef GO386_softfloat
842 JMP rdtsc // no fence instructions available
843 #endif
844 CMPB internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1
845 JNE fences
846 // Instruction stream serializing RDTSCP is supported.
847 // RDTSCP is supported by Intel Nehalem (2008) and
848 // AMD K8 Rev. F (2006) and newer.
849 RDTSCP
850 done:
851 MOVL AX, ret_lo+0(FP)
852 MOVL DX, ret_hi+4(FP)
853 RET
854 fences:
855 // MFENCE is instruction stream serializing and flushes the
856 // store buffers on AMD. The serialization semantics of LFENCE on AMD
857 // are dependent on MSR C001_1029 and CPU generation.
858 // LFENCE on Intel does wait for all previous instructions to have executed.
859 // Intel recommends MFENCE;LFENCE in its manuals before RDTSC to have all
860 // previous instructions executed and all previous loads and stores to globally visible.
861 // Using MFENCE;LFENCE here aligns the serializing properties without
862 // runtime detection of CPU manufacturer.
863 MFENCE
864 LFENCE
865 rdtsc:
866 RDTSC
867 JMP done
868
869 TEXT ldt0setup<>(SB),NOSPLIT,$16-0
870 // set up ldt 7 to point at m0.tls
871 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
872 // the entry number is just a hint. setldt will set up GS with what it used.
873 MOVL $7, 0(SP)
874 LEAL runtime·m0+m_tls(SB), AX
875 MOVL AX, 4(SP)
876 MOVL $32, 8(SP) // sizeof(tls array)
877 CALL runtime·setldt(SB)
878 RET
879
880 TEXT runtime·emptyfunc(SB),0,$0-0
881 RET
882
883 // hash function using AES hardware instructions
884 TEXT runtime·memhash(SB),NOSPLIT,$0-16
885 CMPB runtime·useAeshash(SB), $0
886 JEQ noaes
887 MOVL p+0(FP), AX // ptr to data
888 MOVL s+8(FP), BX // size
889 LEAL ret+12(FP), DX
890 JMP aeshashbody<>(SB)
891 noaes:
892 JMP runtime·memhashFallback(SB)
893
894 TEXT runtime·strhash(SB),NOSPLIT,$0-12
895 CMPB runtime·useAeshash(SB), $0
896 JEQ noaes
897 MOVL p+0(FP), AX // ptr to string object
898 MOVL 4(AX), BX // length of string
899 MOVL (AX), AX // string data
900 LEAL ret+8(FP), DX
901 JMP aeshashbody<>(SB)
902 noaes:
903 JMP runtime·strhashFallback(SB)
904
905 // AX: data
906 // BX: length
907 // DX: address to put return value
908 TEXT aeshashbody<>(SB),NOSPLIT,$0-0
909 MOVL h+4(FP), X0 // 32 bits of per-table hash seed
910 PINSRW $4, BX, X0 // 16 bits of length
911 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times
912 MOVO X0, X1 // save unscrambled seed
913 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
914 AESENC X0, X0 // scramble seed
915
916 CMPL BX, $16
917 JB aes0to15
918 JE aes16
919 CMPL BX, $32
920 JBE aes17to32
921 CMPL BX, $64
922 JBE aes33to64
923 JMP aes65plus
924
925 aes0to15:
926 TESTL BX, BX
927 JE aes0
928
929 ADDL $16, AX
930 TESTW $0xff0, AX
931 JE endofpage
932
933 // 16 bytes loaded at this address won't cross
934 // a page boundary, so we can load it directly.
935 MOVOU -16(AX), X1
936 ADDL BX, BX
937 PAND masks<>(SB)(BX*8), X1
938
939 final1:
940 PXOR X0, X1 // xor data with seed
941 AESENC X1, X1 // scramble combo 3 times
942 AESENC X1, X1
943 AESENC X1, X1
944 MOVL X1, (DX)
945 RET
946
947 endofpage:
948 // address ends in 1111xxxx. Might be up against
949 // a page boundary, so load ending at last byte.
950 // Then shift bytes down using pshufb.
951 MOVOU -32(AX)(BX*1), X1
952 ADDL BX, BX
953 PSHUFB shifts<>(SB)(BX*8), X1
954 JMP final1
955
956 aes0:
957 // Return scrambled input seed
958 AESENC X0, X0
959 MOVL X0, (DX)
960 RET
961
962 aes16:
963 MOVOU (AX), X1
964 JMP final1
965
966 aes17to32:
967 // make second starting seed
968 PXOR runtime·aeskeysched+16(SB), X1
969 AESENC X1, X1
970
971 // load data to be hashed
972 MOVOU (AX), X2
973 MOVOU -16(AX)(BX*1), X3
974
975 // xor with seed
976 PXOR X0, X2
977 PXOR X1, X3
978
979 // scramble 3 times
980 AESENC X2, X2
981 AESENC X3, X3
982 AESENC X2, X2
983 AESENC X3, X3
984 AESENC X2, X2
985 AESENC X3, X3
986
987 // combine results
988 PXOR X3, X2
989 MOVL X2, (DX)
990 RET
991
992 aes33to64:
993 // make 3 more starting seeds
994 MOVO X1, X2
995 MOVO X1, X3
996 PXOR runtime·aeskeysched+16(SB), X1
997 PXOR runtime·aeskeysched+32(SB), X2
998 PXOR runtime·aeskeysched+48(SB), X3
999 AESENC X1, X1
1000 AESENC X2, X2
1001 AESENC X3, X3
1002
1003 MOVOU (AX), X4
1004 MOVOU 16(AX), X5
1005 MOVOU -32(AX)(BX*1), X6
1006 MOVOU -16(AX)(BX*1), X7
1007
1008 PXOR X0, X4
1009 PXOR X1, X5
1010 PXOR X2, X6
1011 PXOR X3, X7
1012
1013 AESENC X4, X4
1014 AESENC X5, X5
1015 AESENC X6, X6
1016 AESENC X7, X7
1017
1018 AESENC X4, X4
1019 AESENC X5, X5
1020 AESENC X6, X6
1021 AESENC X7, X7
1022
1023 AESENC X4, X4
1024 AESENC X5, X5
1025 AESENC X6, X6
1026 AESENC X7, X7
1027
1028 PXOR X6, X4
1029 PXOR X7, X5
1030 PXOR X5, X4
1031 MOVL X4, (DX)
1032 RET
1033
1034 aes65plus:
1035 // make 3 more starting seeds
1036 MOVO X1, X2
1037 MOVO X1, X3
1038 PXOR runtime·aeskeysched+16(SB), X1
1039 PXOR runtime·aeskeysched+32(SB), X2
1040 PXOR runtime·aeskeysched+48(SB), X3
1041 AESENC X1, X1
1042 AESENC X2, X2
1043 AESENC X3, X3
1044
1045 // start with last (possibly overlapping) block
1046 MOVOU -64(AX)(BX*1), X4
1047 MOVOU -48(AX)(BX*1), X5
1048 MOVOU -32(AX)(BX*1), X6
1049 MOVOU -16(AX)(BX*1), X7
1050
1051 // scramble state once
1052 AESENC X0, X4
1053 AESENC X1, X5
1054 AESENC X2, X6
1055 AESENC X3, X7
1056
1057 // compute number of remaining 64-byte blocks
1058 DECL BX
1059 SHRL $6, BX
1060
1061 aesloop:
1062 // scramble state, xor in a block
1063 MOVOU (AX), X0
1064 MOVOU 16(AX), X1
1065 MOVOU 32(AX), X2
1066 MOVOU 48(AX), X3
1067 AESENC X0, X4
1068 AESENC X1, X5
1069 AESENC X2, X6
1070 AESENC X3, X7
1071
1072 // scramble state
1073 AESENC X4, X4
1074 AESENC X5, X5
1075 AESENC X6, X6
1076 AESENC X7, X7
1077
1078 ADDL $64, AX
1079 DECL BX
1080 JNE aesloop
1081
1082 // 3 more scrambles to finish
1083 AESENC X4, X4
1084 AESENC X5, X5
1085 AESENC X6, X6
1086 AESENC X7, X7
1087
1088 AESENC X4, X4
1089 AESENC X5, X5
1090 AESENC X6, X6
1091 AESENC X7, X7
1092
1093 AESENC X4, X4
1094 AESENC X5, X5
1095 AESENC X6, X6
1096 AESENC X7, X7
1097
1098 PXOR X6, X4
1099 PXOR X7, X5
1100 PXOR X5, X4
1101 MOVL X4, (DX)
1102 RET
1103
1104 TEXT runtime·memhash32(SB),NOSPLIT,$0-12
1105 CMPB runtime·useAeshash(SB), $0
1106 JEQ noaes
1107 MOVL p+0(FP), AX // ptr to data
1108 MOVL h+4(FP), X0 // seed
1109 PINSRD $1, (AX), X0 // data
1110 AESENC runtime·aeskeysched+0(SB), X0
1111 AESENC runtime·aeskeysched+16(SB), X0
1112 AESENC runtime·aeskeysched+32(SB), X0
1113 MOVL X0, ret+8(FP)
1114 RET
1115 noaes:
1116 JMP runtime·memhash32Fallback(SB)
1117
1118 TEXT runtime·memhash64(SB),NOSPLIT,$0-12
1119 CMPB runtime·useAeshash(SB), $0
1120 JEQ noaes
1121 MOVL p+0(FP), AX // ptr to data
1122 MOVQ (AX), X0 // data
1123 PINSRD $2, h+4(FP), X0 // seed
1124 AESENC runtime·aeskeysched+0(SB), X0
1125 AESENC runtime·aeskeysched+16(SB), X0
1126 AESENC runtime·aeskeysched+32(SB), X0
1127 MOVL X0, ret+8(FP)
1128 RET
1129 noaes:
1130 JMP runtime·memhash64Fallback(SB)
1131
1132 // simple mask to get rid of data in the high part of the register.
1133 DATA masks<>+0x00(SB)/4, $0x00000000
1134 DATA masks<>+0x04(SB)/4, $0x00000000
1135 DATA masks<>+0x08(SB)/4, $0x00000000
1136 DATA masks<>+0x0c(SB)/4, $0x00000000
1137
1138 DATA masks<>+0x10(SB)/4, $0x000000ff
1139 DATA masks<>+0x14(SB)/4, $0x00000000
1140 DATA masks<>+0x18(SB)/4, $0x00000000
1141 DATA masks<>+0x1c(SB)/4, $0x00000000
1142
1143 DATA masks<>+0x20(SB)/4, $0x0000ffff
1144 DATA masks<>+0x24(SB)/4, $0x00000000
1145 DATA masks<>+0x28(SB)/4, $0x00000000
1146 DATA masks<>+0x2c(SB)/4, $0x00000000
1147
1148 DATA masks<>+0x30(SB)/4, $0x00ffffff
1149 DATA masks<>+0x34(SB)/4, $0x00000000
1150 DATA masks<>+0x38(SB)/4, $0x00000000
1151 DATA masks<>+0x3c(SB)/4, $0x00000000
1152
1153 DATA masks<>+0x40(SB)/4, $0xffffffff
1154 DATA masks<>+0x44(SB)/4, $0x00000000
1155 DATA masks<>+0x48(SB)/4, $0x00000000
1156 DATA masks<>+0x4c(SB)/4, $0x00000000
1157
1158 DATA masks<>+0x50(SB)/4, $0xffffffff
1159 DATA masks<>+0x54(SB)/4, $0x000000ff
1160 DATA masks<>+0x58(SB)/4, $0x00000000
1161 DATA masks<>+0x5c(SB)/4, $0x00000000
1162
1163 DATA masks<>+0x60(SB)/4, $0xffffffff
1164 DATA masks<>+0x64(SB)/4, $0x0000ffff
1165 DATA masks<>+0x68(SB)/4, $0x00000000
1166 DATA masks<>+0x6c(SB)/4, $0x00000000
1167
1168 DATA masks<>+0x70(SB)/4, $0xffffffff
1169 DATA masks<>+0x74(SB)/4, $0x00ffffff
1170 DATA masks<>+0x78(SB)/4, $0x00000000
1171 DATA masks<>+0x7c(SB)/4, $0x00000000
1172
1173 DATA masks<>+0x80(SB)/4, $0xffffffff
1174 DATA masks<>+0x84(SB)/4, $0xffffffff
1175 DATA masks<>+0x88(SB)/4, $0x00000000
1176 DATA masks<>+0x8c(SB)/4, $0x00000000
1177
1178 DATA masks<>+0x90(SB)/4, $0xffffffff
1179 DATA masks<>+0x94(SB)/4, $0xffffffff
1180 DATA masks<>+0x98(SB)/4, $0x000000ff
1181 DATA masks<>+0x9c(SB)/4, $0x00000000
1182
1183 DATA masks<>+0xa0(SB)/4, $0xffffffff
1184 DATA masks<>+0xa4(SB)/4, $0xffffffff
1185 DATA masks<>+0xa8(SB)/4, $0x0000ffff
1186 DATA masks<>+0xac(SB)/4, $0x00000000
1187
1188 DATA masks<>+0xb0(SB)/4, $0xffffffff
1189 DATA masks<>+0xb4(SB)/4, $0xffffffff
1190 DATA masks<>+0xb8(SB)/4, $0x00ffffff
1191 DATA masks<>+0xbc(SB)/4, $0x00000000
1192
1193 DATA masks<>+0xc0(SB)/4, $0xffffffff
1194 DATA masks<>+0xc4(SB)/4, $0xffffffff
1195 DATA masks<>+0xc8(SB)/4, $0xffffffff
1196 DATA masks<>+0xcc(SB)/4, $0x00000000
1197
1198 DATA masks<>+0xd0(SB)/4, $0xffffffff
1199 DATA masks<>+0xd4(SB)/4, $0xffffffff
1200 DATA masks<>+0xd8(SB)/4, $0xffffffff
1201 DATA masks<>+0xdc(SB)/4, $0x000000ff
1202
1203 DATA masks<>+0xe0(SB)/4, $0xffffffff
1204 DATA masks<>+0xe4(SB)/4, $0xffffffff
1205 DATA masks<>+0xe8(SB)/4, $0xffffffff
1206 DATA masks<>+0xec(SB)/4, $0x0000ffff
1207
1208 DATA masks<>+0xf0(SB)/4, $0xffffffff
1209 DATA masks<>+0xf4(SB)/4, $0xffffffff
1210 DATA masks<>+0xf8(SB)/4, $0xffffffff
1211 DATA masks<>+0xfc(SB)/4, $0x00ffffff
1212
1213 GLOBL masks<>(SB),RODATA,$256
1214
1215 // these are arguments to pshufb. They move data down from
1216 // the high bytes of the register to the low bytes of the register.
1217 // index is how many bytes to move.
1218 DATA shifts<>+0x00(SB)/4, $0x00000000
1219 DATA shifts<>+0x04(SB)/4, $0x00000000
1220 DATA shifts<>+0x08(SB)/4, $0x00000000
1221 DATA shifts<>+0x0c(SB)/4, $0x00000000
1222
1223 DATA shifts<>+0x10(SB)/4, $0xffffff0f
1224 DATA shifts<>+0x14(SB)/4, $0xffffffff
1225 DATA shifts<>+0x18(SB)/4, $0xffffffff
1226 DATA shifts<>+0x1c(SB)/4, $0xffffffff
1227
1228 DATA shifts<>+0x20(SB)/4, $0xffff0f0e
1229 DATA shifts<>+0x24(SB)/4, $0xffffffff
1230 DATA shifts<>+0x28(SB)/4, $0xffffffff
1231 DATA shifts<>+0x2c(SB)/4, $0xffffffff
1232
1233 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
1234 DATA shifts<>+0x34(SB)/4, $0xffffffff
1235 DATA shifts<>+0x38(SB)/4, $0xffffffff
1236 DATA shifts<>+0x3c(SB)/4, $0xffffffff
1237
1238 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
1239 DATA shifts<>+0x44(SB)/4, $0xffffffff
1240 DATA shifts<>+0x48(SB)/4, $0xffffffff
1241 DATA shifts<>+0x4c(SB)/4, $0xffffffff
1242
1243 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
1244 DATA shifts<>+0x54(SB)/4, $0xffffff0f
1245 DATA shifts<>+0x58(SB)/4, $0xffffffff
1246 DATA shifts<>+0x5c(SB)/4, $0xffffffff
1247
1248 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
1249 DATA shifts<>+0x64(SB)/4, $0xffff0f0e
1250 DATA shifts<>+0x68(SB)/4, $0xffffffff
1251 DATA shifts<>+0x6c(SB)/4, $0xffffffff
1252
1253 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
1254 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
1255 DATA shifts<>+0x78(SB)/4, $0xffffffff
1256 DATA shifts<>+0x7c(SB)/4, $0xffffffff
1257
1258 DATA shifts<>+0x80(SB)/4, $0x0b0a0908
1259 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
1260 DATA shifts<>+0x88(SB)/4, $0xffffffff
1261 DATA shifts<>+0x8c(SB)/4, $0xffffffff
1262
1263 DATA shifts<>+0x90(SB)/4, $0x0a090807
1264 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
1265 DATA shifts<>+0x98(SB)/4, $0xffffff0f
1266 DATA shifts<>+0x9c(SB)/4, $0xffffffff
1267
1268 DATA shifts<>+0xa0(SB)/4, $0x09080706
1269 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
1270 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
1271 DATA shifts<>+0xac(SB)/4, $0xffffffff
1272
1273 DATA shifts<>+0xb0(SB)/4, $0x08070605
1274 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
1275 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
1276 DATA shifts<>+0xbc(SB)/4, $0xffffffff
1277
1278 DATA shifts<>+0xc0(SB)/4, $0x07060504
1279 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
1280 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
1281 DATA shifts<>+0xcc(SB)/4, $0xffffffff
1282
1283 DATA shifts<>+0xd0(SB)/4, $0x06050403
1284 DATA shifts<>+0xd4(SB)/4, $0x0a090807
1285 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
1286 DATA shifts<>+0xdc(SB)/4, $0xffffff0f
1287
1288 DATA shifts<>+0xe0(SB)/4, $0x05040302
1289 DATA shifts<>+0xe4(SB)/4, $0x09080706
1290 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
1291 DATA shifts<>+0xec(SB)/4, $0xffff0f0e
1292
1293 DATA shifts<>+0xf0(SB)/4, $0x04030201
1294 DATA shifts<>+0xf4(SB)/4, $0x08070605
1295 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
1296 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
1297
1298 GLOBL shifts<>(SB),RODATA,$256
1299
1300 TEXT ·checkASM(SB),NOSPLIT,$0-1
1301 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
1302 MOVL $masks<>(SB), AX
1303 MOVL $shifts<>(SB), BX
1304 ORL BX, AX
1305 TESTL $15, AX
1306 SETEQ ret+0(FP)
1307 RET
1308
1309 TEXT runtime·return0(SB), NOSPLIT, $0
1310 MOVL $0, AX
1311 RET
1312
1313 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1314 // Must obey the gcc calling convention.
1315 TEXT _cgo_topofstack(SB),NOSPLIT,$0
1316 get_tls(CX)
1317 MOVL g(CX), AX
1318 MOVL g_m(AX), AX
1319 MOVL m_curg(AX), AX
1320 MOVL (g_stack+stack_hi)(AX), AX
1321 RET
1322
1323 // The top-most function running on a goroutine
1324 // returns to goexit+PCQuantum.
1325 TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME,$0-0
1326 BYTE $0x90 // NOP
1327 CALL runtime·goexit1(SB) // does not return
1328 // traceback from goexit1 must hit code range of goexit
1329 BYTE $0x90 // NOP
1330
1331 // Add a module's moduledata to the linked list of moduledata objects. This
1332 // is called from .init_array by a function generated in the linker and so
1333 // follows the platform ABI wrt register preservation -- it only touches AX,
1334 // CX (implicitly) and DX, but it does not follow the ABI wrt arguments:
1335 // instead the pointer to the moduledata is passed in AX.
1336 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1337 MOVL runtime·lastmoduledatap(SB), DX
1338 MOVL AX, moduledata_next(DX)
1339 MOVL AX, runtime·lastmoduledatap(SB)
1340 RET
1341
1342 TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12
1343 MOVL a+0(FP), AX
1344 MOVL AX, 0(SP)
1345 MOVL $0, 4(SP)
1346 FMOVV 0(SP), F0
1347 FMOVDP F0, ret+4(FP)
1348 RET
1349
1350 TEXT runtime·float64touint32(SB),NOSPLIT,$12-12
1351 FMOVD a+0(FP), F0
1352 FSTCW 0(SP)
1353 FLDCW runtime·controlWord64trunc(SB)
1354 FMOVVP F0, 4(SP)
1355 FLDCW 0(SP)
1356 MOVL 4(SP), AX
1357 MOVL AX, ret+8(FP)
1358 RET
1359
1360 // gcWriteBarrier performs a heap pointer write and informs the GC.
1361 //
1362 // gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
1363 // - DI is the destination of the write
1364 // - AX is the value being written at DI
1365 // It clobbers FLAGS. It does not clobber any general-purpose registers,
1366 // but may clobber others (e.g., SSE registers).
1367 TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$28
1368 // Save the registers clobbered by the fast path. This is slightly
1369 // faster than having the caller spill these.
1370 MOVL CX, 20(SP)
1371 MOVL BX, 24(SP)
1372 // TODO: Consider passing g.m.p in as an argument so they can be shared
1373 // across a sequence of write barriers.
1374 get_tls(BX)
1375 MOVL g(BX), BX
1376 MOVL g_m(BX), BX
1377 MOVL m_p(BX), BX
1378 MOVL (p_wbBuf+wbBuf_next)(BX), CX
1379 // Increment wbBuf.next position.
1380 LEAL 8(CX), CX
1381 MOVL CX, (p_wbBuf+wbBuf_next)(BX)
1382 CMPL CX, (p_wbBuf+wbBuf_end)(BX)
1383 // Record the write.
1384 MOVL AX, -8(CX) // Record value
1385 MOVL (DI), BX // TODO: This turns bad writes into bad reads.
1386 MOVL BX, -4(CX) // Record *slot
1387 // Is the buffer full? (flags set in CMPL above)
1388 JEQ flush
1389 ret:
1390 MOVL 20(SP), CX
1391 MOVL 24(SP), BX
1392 // Do the write.
1393 MOVL AX, (DI)
1394 RET
1395
1396 flush:
1397 // Save all general purpose registers since these could be
1398 // clobbered by wbBufFlush and were not saved by the caller.
1399 MOVL DI, 0(SP) // Also first argument to wbBufFlush
1400 MOVL AX, 4(SP) // Also second argument to wbBufFlush
1401 // BX already saved
1402 // CX already saved
1403 MOVL DX, 8(SP)
1404 MOVL BP, 12(SP)
1405 MOVL SI, 16(SP)
1406 // DI already saved
1407
1408 // This takes arguments DI and AX
1409 CALL runtime·wbBufFlush(SB)
1410
1411 MOVL 0(SP), DI
1412 MOVL 4(SP), AX
1413 MOVL 8(SP), DX
1414 MOVL 12(SP), BP
1415 MOVL 16(SP), SI
1416 JMP ret
1417
1418 // Note: these functions use a special calling convention to save generated code space.
1419 // Arguments are passed in registers, but the space for those arguments are allocated
1420 // in the caller's stack frame. These stubs write the args into that stack space and
1421 // then tail call to the corresponding runtime handler.
1422 // The tail call makes these stubs disappear in backtraces.
1423 TEXT runtime·panicIndex(SB),NOSPLIT,$0-8
1424 MOVL AX, x+0(FP)
1425 MOVL CX, y+4(FP)
1426 JMP runtime·goPanicIndex(SB)
1427 TEXT runtime·panicIndexU(SB),NOSPLIT,$0-8
1428 MOVL AX, x+0(FP)
1429 MOVL CX, y+4(FP)
1430 JMP runtime·goPanicIndexU(SB)
1431 TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-8
1432 MOVL CX, x+0(FP)
1433 MOVL DX, y+4(FP)
1434 JMP runtime·goPanicSliceAlen(SB)
1435 TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-8
1436 MOVL CX, x+0(FP)
1437 MOVL DX, y+4(FP)
1438 JMP runtime·goPanicSliceAlenU(SB)
1439 TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-8
1440 MOVL CX, x+0(FP)
1441 MOVL DX, y+4(FP)
1442 JMP runtime·goPanicSliceAcap(SB)
1443 TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-8
1444 MOVL CX, x+0(FP)
1445 MOVL DX, y+4(FP)
1446 JMP runtime·goPanicSliceAcapU(SB)
1447 TEXT runtime·panicSliceB(SB),NOSPLIT,$0-8
1448 MOVL AX, x+0(FP)
1449 MOVL CX, y+4(FP)
1450 JMP runtime·goPanicSliceB(SB)
1451 TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-8
1452 MOVL AX, x+0(FP)
1453 MOVL CX, y+4(FP)
1454 JMP runtime·goPanicSliceBU(SB)
1455 TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-8
1456 MOVL DX, x+0(FP)
1457 MOVL BX, y+4(FP)
1458 JMP runtime·goPanicSlice3Alen(SB)
1459 TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-8
1460 MOVL DX, x+0(FP)
1461 MOVL BX, y+4(FP)
1462 JMP runtime·goPanicSlice3AlenU(SB)
1463 TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-8
1464 MOVL DX, x+0(FP)
1465 MOVL BX, y+4(FP)
1466 JMP runtime·goPanicSlice3Acap(SB)
1467 TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-8
1468 MOVL DX, x+0(FP)
1469 MOVL BX, y+4(FP)
1470 JMP runtime·goPanicSlice3AcapU(SB)
1471 TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-8
1472 MOVL CX, x+0(FP)
1473 MOVL DX, y+4(FP)
1474 JMP runtime·goPanicSlice3B(SB)
1475 TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-8
1476 MOVL CX, x+0(FP)
1477 MOVL DX, y+4(FP)
1478 JMP runtime·goPanicSlice3BU(SB)
1479 TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-8
1480 MOVL AX, x+0(FP)
1481 MOVL CX, y+4(FP)
1482 JMP runtime·goPanicSlice3C(SB)
1483 TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-8
1484 MOVL AX, x+0(FP)
1485 MOVL CX, y+4(FP)
1486 JMP runtime·goPanicSlice3CU(SB)
1487 TEXT runtime·panicSliceConvert(SB),NOSPLIT,$0-8
1488 MOVL DX, x+0(FP)
1489 MOVL BX, y+4(FP)
1490 JMP runtime·goPanicSliceConvert(SB)
1491
1492 // Extended versions for 64-bit indexes.
1493 TEXT runtime·panicExtendIndex(SB),NOSPLIT,$0-12
1494 MOVL SI, hi+0(FP)
1495 MOVL AX, lo+4(FP)
1496 MOVL CX, y+8(FP)
1497 JMP runtime·goPanicExtendIndex(SB)
1498 TEXT runtime·panicExtendIndexU(SB),NOSPLIT,$0-12
1499 MOVL SI, hi+0(FP)
1500 MOVL AX, lo+4(FP)
1501 MOVL CX, y+8(FP)
1502 JMP runtime·goPanicExtendIndexU(SB)
1503 TEXT runtime·panicExtendSliceAlen(SB),NOSPLIT,$0-12
1504 MOVL SI, hi+0(FP)
1505 MOVL CX, lo+4(FP)
1506 MOVL DX, y+8(FP)
1507 JMP runtime·goPanicExtendSliceAlen(SB)
1508 TEXT runtime·panicExtendSliceAlenU(SB),NOSPLIT,$0-12
1509 MOVL SI, hi+0(FP)
1510 MOVL CX, lo+4(FP)
1511 MOVL DX, y+8(FP)
1512 JMP runtime·goPanicExtendSliceAlenU(SB)
1513 TEXT runtime·panicExtendSliceAcap(SB),NOSPLIT,$0-12
1514 MOVL SI, hi+0(FP)
1515 MOVL CX, lo+4(FP)
1516 MOVL DX, y+8(FP)
1517 JMP runtime·goPanicExtendSliceAcap(SB)
1518 TEXT runtime·panicExtendSliceAcapU(SB),NOSPLIT,$0-12
1519 MOVL SI, hi+0(FP)
1520 MOVL CX, lo+4(FP)
1521 MOVL DX, y+8(FP)
1522 JMP runtime·goPanicExtendSliceAcapU(SB)
1523 TEXT runtime·panicExtendSliceB(SB),NOSPLIT,$0-12
1524 MOVL SI, hi+0(FP)
1525 MOVL AX, lo+4(FP)
1526 MOVL CX, y+8(FP)
1527 JMP runtime·goPanicExtendSliceB(SB)
1528 TEXT runtime·panicExtendSliceBU(SB),NOSPLIT,$0-12
1529 MOVL SI, hi+0(FP)
1530 MOVL AX, lo+4(FP)
1531 MOVL CX, y+8(FP)
1532 JMP runtime·goPanicExtendSliceBU(SB)
1533 TEXT runtime·panicExtendSlice3Alen(SB),NOSPLIT,$0-12
1534 MOVL SI, hi+0(FP)
1535 MOVL DX, lo+4(FP)
1536 MOVL BX, y+8(FP)
1537 JMP runtime·goPanicExtendSlice3Alen(SB)
1538 TEXT runtime·panicExtendSlice3AlenU(SB),NOSPLIT,$0-12
1539 MOVL SI, hi+0(FP)
1540 MOVL DX, lo+4(FP)
1541 MOVL BX, y+8(FP)
1542 JMP runtime·goPanicExtendSlice3AlenU(SB)
1543 TEXT runtime·panicExtendSlice3Acap(SB),NOSPLIT,$0-12
1544 MOVL SI, hi+0(FP)
1545 MOVL DX, lo+4(FP)
1546 MOVL BX, y+8(FP)
1547 JMP runtime·goPanicExtendSlice3Acap(SB)
1548 TEXT runtime·panicExtendSlice3AcapU(SB),NOSPLIT,$0-12
1549 MOVL SI, hi+0(FP)
1550 MOVL DX, lo+4(FP)
1551 MOVL BX, y+8(FP)
1552 JMP runtime·goPanicExtendSlice3AcapU(SB)
1553 TEXT runtime·panicExtendSlice3B(SB),NOSPLIT,$0-12
1554 MOVL SI, hi+0(FP)
1555 MOVL CX, lo+4(FP)
1556 MOVL DX, y+8(FP)
1557 JMP runtime·goPanicExtendSlice3B(SB)
1558 TEXT runtime·panicExtendSlice3BU(SB),NOSPLIT,$0-12
1559 MOVL SI, hi+0(FP)
1560 MOVL CX, lo+4(FP)
1561 MOVL DX, y+8(FP)
1562 JMP runtime·goPanicExtendSlice3BU(SB)
1563 TEXT runtime·panicExtendSlice3C(SB),NOSPLIT,$0-12
1564 MOVL SI, hi+0(FP)
1565 MOVL AX, lo+4(FP)
1566 MOVL CX, y+8(FP)
1567 JMP runtime·goPanicExtendSlice3C(SB)
1568 TEXT runtime·panicExtendSlice3CU(SB),NOSPLIT,$0-12
1569 MOVL SI, hi+0(FP)
1570 MOVL AX, lo+4(FP)
1571 MOVL CX, y+8(FP)
1572 JMP runtime·goPanicExtendSlice3CU(SB)
1573
1574 #ifdef GOOS_android
1575 // Use the free TLS_SLOT_APP slot #2 on Android Q.
1576 // Earlier androids are set up in gcc_android.c.
1577 DATA runtime·tls_g+0(SB)/4, $8
1578 GLOBL runtime·tls_g+0(SB), NOPTR, $4
1579 #endif
1580
View as plain text