Source file
src/runtime/os_linux.go
1
2
3
4
5 package runtime
6
7 import (
8 "internal/abi"
9 "internal/goarch"
10 "runtime/internal/atomic"
11 "runtime/internal/syscall"
12 "unsafe"
13 )
14
15
16
17
18 const sigPerThreadSyscall = _SIGRTMIN + 1
19
20 type mOS struct {
21
22
23
24
25
26
27
28 profileTimer int32
29 profileTimerValid uint32
30
31
32
33 needPerThreadSyscall atomic.Uint8
34 }
35
36
37 func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer, val3 uint32) int32
38
39
40
41
42
43
44
45
46
47
48 const (
49 _FUTEX_PRIVATE_FLAG = 128
50 _FUTEX_WAIT_PRIVATE = 0 | _FUTEX_PRIVATE_FLAG
51 _FUTEX_WAKE_PRIVATE = 1 | _FUTEX_PRIVATE_FLAG
52 )
53
54
55
56
57
58
59 func futexsleep(addr *uint32, val uint32, ns int64) {
60
61
62
63
64
65 if ns < 0 {
66 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, nil, nil, 0)
67 return
68 }
69
70 var ts timespec
71 ts.setNsec(ns)
72 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, unsafe.Pointer(&ts), nil, 0)
73 }
74
75
76
77 func futexwakeup(addr *uint32, cnt uint32) {
78 ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE_PRIVATE, cnt, nil, nil, 0)
79 if ret >= 0 {
80 return
81 }
82
83
84
85
86 systemstack(func() {
87 print("futexwakeup addr=", addr, " returned ", ret, "\n")
88 })
89
90 *(*int32)(unsafe.Pointer(uintptr(0x1006))) = 0x1006
91 }
92
93 func getproccount() int32 {
94
95
96
97
98
99
100
101 const maxCPUs = 64 * 1024
102 var buf [maxCPUs / 8]byte
103 r := sched_getaffinity(0, unsafe.Sizeof(buf), &buf[0])
104 if r < 0 {
105 return 1
106 }
107 n := int32(0)
108 for _, v := range buf[:r] {
109 for v != 0 {
110 n += int32(v & 1)
111 v >>= 1
112 }
113 }
114 if n == 0 {
115 n = 1
116 }
117 return n
118 }
119
120
121 const (
122 _CLONE_VM = 0x100
123 _CLONE_FS = 0x200
124 _CLONE_FILES = 0x400
125 _CLONE_SIGHAND = 0x800
126 _CLONE_PTRACE = 0x2000
127 _CLONE_VFORK = 0x4000
128 _CLONE_PARENT = 0x8000
129 _CLONE_THREAD = 0x10000
130 _CLONE_NEWNS = 0x20000
131 _CLONE_SYSVSEM = 0x40000
132 _CLONE_SETTLS = 0x80000
133 _CLONE_PARENT_SETTID = 0x100000
134 _CLONE_CHILD_CLEARTID = 0x200000
135 _CLONE_UNTRACED = 0x800000
136 _CLONE_CHILD_SETTID = 0x1000000
137 _CLONE_STOPPED = 0x2000000
138 _CLONE_NEWUTS = 0x4000000
139 _CLONE_NEWIPC = 0x8000000
140
141
142
143
144
145
146
147
148 cloneFlags = _CLONE_VM |
149 _CLONE_FS |
150 _CLONE_FILES |
151 _CLONE_SIGHAND |
152 _CLONE_SYSVSEM |
153 _CLONE_THREAD
154 )
155
156
157 func clone(flags int32, stk, mp, gp, fn unsafe.Pointer) int32
158
159
160
161 func newosproc(mp *m) {
162 stk := unsafe.Pointer(mp.g0.stack.hi)
163
166 if false {
167 print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " clone=", abi.FuncPCABI0(clone), " id=", mp.id, " ostk=", &mp, "\n")
168 }
169
170
171
172 var oset sigset
173 sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
174 ret := clone(cloneFlags, stk, unsafe.Pointer(mp), unsafe.Pointer(mp.g0), unsafe.Pointer(abi.FuncPCABI0(mstart)))
175 sigprocmask(_SIG_SETMASK, &oset, nil)
176
177 if ret < 0 {
178 print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", -ret, ")\n")
179 if ret == -_EAGAIN {
180 println("runtime: may need to increase max user processes (ulimit -u)")
181 }
182 throw("newosproc")
183 }
184 }
185
186
187
188 func newosproc0(stacksize uintptr, fn unsafe.Pointer) {
189 stack := sysAlloc(stacksize, &memstats.stacks_sys)
190 if stack == nil {
191 write(2, unsafe.Pointer(&failallocatestack[0]), int32(len(failallocatestack)))
192 exit(1)
193 }
194 ret := clone(cloneFlags, unsafe.Pointer(uintptr(stack)+stacksize), nil, nil, fn)
195 if ret < 0 {
196 write(2, unsafe.Pointer(&failthreadcreate[0]), int32(len(failthreadcreate)))
197 exit(1)
198 }
199 }
200
201 var failallocatestack = []byte("runtime: failed to allocate stack for the new OS thread\n")
202 var failthreadcreate = []byte("runtime: failed to create new OS thread\n")
203
204 const (
205 _AT_NULL = 0
206 _AT_PAGESZ = 6
207 _AT_HWCAP = 16
208 _AT_RANDOM = 25
209 _AT_HWCAP2 = 26
210 )
211
212 var procAuxv = []byte("/proc/self/auxv\x00")
213
214 var addrspace_vec [1]byte
215
216 func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
217
218 func sysargs(argc int32, argv **byte) {
219 n := argc + 1
220
221
222 for argv_index(argv, n) != nil {
223 n++
224 }
225
226
227 n++
228
229
230 auxv := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*goarch.PtrSize))
231 if sysauxv(auxv[:]) != 0 {
232 return
233 }
234
235
236
237 fd := open(&procAuxv[0], 0 , 0)
238 if fd < 0 {
239
240
241
242 const size = 256 << 10
243 p, err := mmap(nil, size, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
244 if err != 0 {
245 return
246 }
247 var n uintptr
248 for n = 4 << 10; n < size; n <<= 1 {
249 err := mincore(unsafe.Pointer(uintptr(p)+n), 1, &addrspace_vec[0])
250 if err == 0 {
251 physPageSize = n
252 break
253 }
254 }
255 if physPageSize == 0 {
256 physPageSize = size
257 }
258 munmap(p, size)
259 return
260 }
261 var buf [128]uintptr
262 n = read(fd, noescape(unsafe.Pointer(&buf[0])), int32(unsafe.Sizeof(buf)))
263 closefd(fd)
264 if n < 0 {
265 return
266 }
267
268
269 buf[len(buf)-2] = _AT_NULL
270 sysauxv(buf[:])
271 }
272
273
274
275 var startupRandomData []byte
276
277 func sysauxv(auxv []uintptr) int {
278 var i int
279 for ; auxv[i] != _AT_NULL; i += 2 {
280 tag, val := auxv[i], auxv[i+1]
281 switch tag {
282 case _AT_RANDOM:
283
284
285 startupRandomData = (*[16]byte)(unsafe.Pointer(val))[:]
286
287 case _AT_PAGESZ:
288 physPageSize = val
289 }
290
291 archauxv(tag, val)
292 vdsoauxv(tag, val)
293 }
294 return i / 2
295 }
296
297 var sysTHPSizePath = []byte("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size\x00")
298
299 func getHugePageSize() uintptr {
300 var numbuf [20]byte
301 fd := open(&sysTHPSizePath[0], 0 , 0)
302 if fd < 0 {
303 return 0
304 }
305 ptr := noescape(unsafe.Pointer(&numbuf[0]))
306 n := read(fd, ptr, int32(len(numbuf)))
307 closefd(fd)
308 if n <= 0 {
309 return 0
310 }
311 n--
312 v, ok := atoi(slicebytetostringtmp((*byte)(ptr), int(n)))
313 if !ok || v < 0 {
314 v = 0
315 }
316 if v&(v-1) != 0 {
317
318 return 0
319 }
320 return uintptr(v)
321 }
322
323 func osinit() {
324 ncpu = getproccount()
325 physHugePageSize = getHugePageSize()
326 if iscgo {
327
328
329
330
331
332
333
334
335
336
337
338
339
340 sigdelset(&sigsetAllExiting, 32)
341 sigdelset(&sigsetAllExiting, 33)
342 sigdelset(&sigsetAllExiting, 34)
343 }
344 osArchInit()
345 }
346
347 var urandom_dev = []byte("/dev/urandom\x00")
348
349 func getRandomData(r []byte) {
350 if startupRandomData != nil {
351 n := copy(r, startupRandomData)
352 extendRandom(r, n)
353 return
354 }
355 fd := open(&urandom_dev[0], 0 , 0)
356 n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
357 closefd(fd)
358 extendRandom(r, int(n))
359 }
360
361 func goenvs() {
362 goenvs_unix()
363 }
364
365
366
367
368
369
370 func libpreinit() {
371 initsig(true)
372 }
373
374
375
376 func mpreinit(mp *m) {
377 mp.gsignal = malg(32 * 1024)
378 mp.gsignal.m = mp
379 }
380
381 func gettid() uint32
382
383
384
385 func minit() {
386 minitSignals()
387
388
389
390
391 getg().m.procid = uint64(gettid())
392 }
393
394
395
396 func unminit() {
397 unminitSignals()
398 }
399
400
401
402 func mdestroy(mp *m) {
403 }
404
405
406
407
408
409 func sigreturn()
410 func sigtramp()
411 func cgoSigtramp()
412
413
414 func sigaltstack(new, old *stackt)
415
416
417 func setitimer(mode int32, new, old *itimerval)
418
419
420 func timer_create(clockid int32, sevp *sigevent, timerid *int32) int32
421
422
423 func timer_settime(timerid int32, flags int32, new, old *itimerspec) int32
424
425
426 func timer_delete(timerid int32) int32
427
428
429 func rtsigprocmask(how int32, new, old *sigset, size int32)
430
431
432
433 func sigprocmask(how int32, new, old *sigset) {
434 rtsigprocmask(how, new, old, int32(unsafe.Sizeof(*new)))
435 }
436
437 func raise(sig uint32)
438 func raiseproc(sig uint32)
439
440
441 func sched_getaffinity(pid, len uintptr, buf *byte) int32
442 func osyield()
443
444
445 func osyield_no_g() {
446 osyield()
447 }
448
449 func pipe() (r, w int32, errno int32)
450 func pipe2(flags int32) (r, w int32, errno int32)
451 func setNonblock(fd int32)
452
453 const (
454 _si_max_size = 128
455 _sigev_max_size = 64
456 )
457
458
459
460 func setsig(i uint32, fn uintptr) {
461 var sa sigactiont
462 sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK | _SA_RESTORER | _SA_RESTART
463 sigfillset(&sa.sa_mask)
464
465
466
467 if GOARCH == "386" || GOARCH == "amd64" {
468 sa.sa_restorer = abi.FuncPCABI0(sigreturn)
469 }
470 if fn == abi.FuncPCABIInternal(sighandler) {
471 if iscgo {
472 fn = abi.FuncPCABI0(cgoSigtramp)
473 } else {
474 fn = abi.FuncPCABI0(sigtramp)
475 }
476 }
477 sa.sa_handler = fn
478 sigaction(i, &sa, nil)
479 }
480
481
482
483 func setsigstack(i uint32) {
484 var sa sigactiont
485 sigaction(i, nil, &sa)
486 if sa.sa_flags&_SA_ONSTACK != 0 {
487 return
488 }
489 sa.sa_flags |= _SA_ONSTACK
490 sigaction(i, &sa, nil)
491 }
492
493
494
495 func getsig(i uint32) uintptr {
496 var sa sigactiont
497 sigaction(i, nil, &sa)
498 return sa.sa_handler
499 }
500
501
502
503 func setSignalstackSP(s *stackt, sp uintptr) {
504 *(*uintptr)(unsafe.Pointer(&s.ss_sp)) = sp
505 }
506
507
508 func (c *sigctxt) fixsigcode(sig uint32) {
509 }
510
511
512
513 func sysSigaction(sig uint32, new, old *sigactiont) {
514 if rt_sigaction(uintptr(sig), new, old, unsafe.Sizeof(sigactiont{}.sa_mask)) != 0 {
515
516
517
518
519
520
521
522
523
524
525
526 if sig != 32 && sig != 33 && sig != 64 {
527
528 systemstack(func() {
529 throw("sigaction failed")
530 })
531 }
532 }
533 }
534
535
536
537 func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32
538
539 func getpid() int
540 func tgkill(tgid, tid, sig int)
541
542
543 func signalM(mp *m, sig int) {
544 tgkill(getpid(), int(mp.procid), sig)
545 }
546
547
548 const go118UseTimerCreateProfiler = true
549
550
551
552
553
554
555
556
557 func validSIGPROF(mp *m, c *sigctxt) bool {
558 code := int32(c.sigcode())
559 setitimer := code == _SI_KERNEL
560 timer_create := code == _SI_TIMER
561
562 if !(setitimer || timer_create) {
563
564
565
566 return true
567 }
568
569 if mp == nil {
570
571
572
573
574
575
576
577
578
579
580
581
582 return setitimer
583 }
584
585
586
587 if atomic.Load(&mp.profileTimerValid) != 0 {
588
589
590
591
592
593 return timer_create
594 }
595
596
597 return setitimer
598 }
599
600 func setProcessCPUProfiler(hz int32) {
601 setProcessCPUProfilerTimer(hz)
602 }
603
604 func setThreadCPUProfiler(hz int32) {
605 mp := getg().m
606 mp.profilehz = hz
607
608 if !go118UseTimerCreateProfiler {
609 return
610 }
611
612
613 if atomic.Load(&mp.profileTimerValid) != 0 {
614 timerid := mp.profileTimer
615 atomic.Store(&mp.profileTimerValid, 0)
616 mp.profileTimer = 0
617
618 ret := timer_delete(timerid)
619 if ret != 0 {
620 print("runtime: failed to disable profiling timer; timer_delete(", timerid, ") errno=", -ret, "\n")
621 throw("timer_delete")
622 }
623 }
624
625 if hz == 0 {
626
627 return
628 }
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649 spec := new(itimerspec)
650 spec.it_value.setNsec(1 + int64(fastrandn(uint32(1e9/hz))))
651 spec.it_interval.setNsec(1e9 / int64(hz))
652
653 var timerid int32
654 var sevp sigevent
655 sevp.notify = _SIGEV_THREAD_ID
656 sevp.signo = _SIGPROF
657 sevp.sigev_notify_thread_id = int32(mp.procid)
658 ret := timer_create(_CLOCK_THREAD_CPUTIME_ID, &sevp, &timerid)
659 if ret != 0 {
660
661
662 return
663 }
664
665 ret = timer_settime(timerid, 0, spec, nil)
666 if ret != 0 {
667 print("runtime: failed to configure profiling timer; timer_settime(", timerid,
668 ", 0, {interval: {",
669 spec.it_interval.tv_sec, "s + ", spec.it_interval.tv_nsec, "ns} value: {",
670 spec.it_value.tv_sec, "s + ", spec.it_value.tv_nsec, "ns}}, nil) errno=", -ret, "\n")
671 throw("timer_settime")
672 }
673
674 mp.profileTimer = timerid
675 atomic.Store(&mp.profileTimerValid, 1)
676 }
677
678
679
680 type perThreadSyscallArgs struct {
681 trap uintptr
682 a1 uintptr
683 a2 uintptr
684 a3 uintptr
685 a4 uintptr
686 a5 uintptr
687 a6 uintptr
688 r1 uintptr
689 r2 uintptr
690 }
691
692
693
694
695
696
697 var perThreadSyscall perThreadSyscallArgs
698
699
700
701
702
703
704
705
706
707 func syscall_runtime_doAllThreadsSyscall(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) {
708 if iscgo {
709
710 panic("doAllThreadsSyscall not supported with cgo enabled")
711 }
712
713
714
715
716
717
718
719
720 stopTheWorld("doAllThreadsSyscall")
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742 allocmLock.lock()
743
744
745
746
747
748
749 acquirem()
750
751
752
753
754
755
756 r1, r2, errno := syscall.Syscall6(trap, a1, a2, a3, a4, a5, a6)
757 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
758
759 r2 = 0
760 }
761 if errno != 0 {
762 releasem(getg().m)
763 allocmLock.unlock()
764 startTheWorld()
765 return r1, r2, errno
766 }
767
768 perThreadSyscall = perThreadSyscallArgs{
769 trap: trap,
770 a1: a1,
771 a2: a2,
772 a3: a3,
773 a4: a4,
774 a5: a5,
775 a6: a6,
776 r1: r1,
777 r2: r2,
778 }
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815 for mp := allm; mp != nil; mp = mp.alllink {
816 for atomic.Load64(&mp.procid) == 0 {
817
818 osyield()
819 }
820 }
821
822
823
824 gp := getg()
825 tid := gp.m.procid
826 for mp := allm; mp != nil; mp = mp.alllink {
827 if atomic.Load64(&mp.procid) == tid {
828
829 continue
830 }
831 mp.needPerThreadSyscall.Store(1)
832 signalM(mp, sigPerThreadSyscall)
833 }
834
835
836 for mp := allm; mp != nil; mp = mp.alllink {
837 if mp.procid == tid {
838 continue
839 }
840 for mp.needPerThreadSyscall.Load() != 0 {
841 osyield()
842 }
843 }
844
845 perThreadSyscall = perThreadSyscallArgs{}
846
847 releasem(getg().m)
848 allocmLock.unlock()
849 startTheWorld()
850
851 return r1, r2, errno
852 }
853
854
855
856
857
858
859 func runPerThreadSyscall() {
860 gp := getg()
861 if gp.m.needPerThreadSyscall.Load() == 0 {
862 return
863 }
864
865 args := perThreadSyscall
866 r1, r2, errno := syscall.Syscall6(args.trap, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6)
867 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
868
869 r2 = 0
870 }
871 if errno != 0 || r1 != args.r1 || r2 != args.r2 {
872 print("trap:", args.trap, ", a123456=[", args.a1, ",", args.a2, ",", args.a3, ",", args.a4, ",", args.a5, ",", args.a6, "]\n")
873 print("results: got {r1=", r1, ",r2=", r2, ",errno=", errno, "}, want {r1=", args.r1, ",r2=", args.r2, ",errno=0\n")
874 throw("AllThreadsSyscall6 results differ between threads; runtime corrupted")
875 }
876
877 gp.m.needPerThreadSyscall.Store(0)
878 }
879
View as plain text