Source file src/runtime/mstats.go
1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Memory statistics 6 7 package runtime 8 9 import ( 10 "internal/goarch" 11 "runtime/internal/atomic" 12 "unsafe" 13 ) 14 15 // Statistics. 16 // 17 // For detailed descriptions see the documentation for MemStats. 18 // Fields that differ from MemStats are further documented here. 19 // 20 // Many of these fields are updated on the fly, while others are only 21 // updated when updatememstats is called. 22 type mstats struct { 23 // General statistics. 24 alloc uint64 // bytes allocated and not yet freed 25 total_alloc uint64 // bytes allocated (even if freed) 26 sys uint64 // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate) 27 nlookup uint64 // number of pointer lookups (unused) 28 nmalloc uint64 // number of mallocs 29 nfree uint64 // number of frees 30 31 // Statistics about malloc heap. 32 // Updated atomically, or with the world stopped. 33 // 34 // Like MemStats, heap_sys and heap_inuse do not count memory 35 // in manually-managed spans. 36 heap_sys sysMemStat // virtual address space obtained from system for GC'd heap 37 heap_inuse uint64 // bytes in mSpanInUse spans 38 heap_released uint64 // bytes released to the os 39 40 // heap_objects is not used by the runtime directly and instead 41 // computed on the fly by updatememstats. 42 heap_objects uint64 // total number of allocated objects 43 44 // Statistics about stacks. 45 stacks_inuse uint64 // bytes in manually-managed stack spans; computed by updatememstats 46 stacks_sys sysMemStat // only counts newosproc0 stack in mstats; differs from MemStats.StackSys 47 48 // Statistics about allocation of low-level fixed-size structures. 49 // Protected by FixAlloc locks. 50 mspan_inuse uint64 // mspan structures 51 mspan_sys sysMemStat 52 mcache_inuse uint64 // mcache structures 53 mcache_sys sysMemStat 54 buckhash_sys sysMemStat // profiling bucket hash table 55 56 // Statistics about GC overhead. 57 gcWorkBufInUse uint64 // computed by updatememstats 58 gcProgPtrScalarBitsInUse uint64 // computed by updatememstats 59 gcMiscSys sysMemStat // updated atomically or during STW 60 61 // Miscellaneous statistics. 62 other_sys sysMemStat // updated atomically or during STW 63 64 // Statistics about the garbage collector. 65 66 // Protected by mheap or stopping the world during GC. 67 last_gc_unix uint64 // last gc (in unix time) 68 pause_total_ns uint64 69 pause_ns [256]uint64 // circular buffer of recent gc pause lengths 70 pause_end [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970) 71 numgc uint32 72 numforcedgc uint32 // number of user-forced GCs 73 gc_cpu_fraction float64 // fraction of CPU time used by GC 74 enablegc bool 75 debuggc bool 76 77 // Statistics about allocation size classes. 78 79 by_size [_NumSizeClasses]struct { 80 size uint32 81 nmalloc uint64 82 nfree uint64 83 } 84 85 // Add an uint32 for even number of size classes to align below fields 86 // to 64 bits for atomic operations on 32 bit platforms. 87 _ [1 - _NumSizeClasses%2]uint32 88 89 last_gc_nanotime uint64 // last gc (monotonic time) 90 last_heap_inuse uint64 // heap_inuse at mark termination of the previous GC 91 92 // heapStats is a set of statistics 93 heapStats consistentHeapStats 94 95 // _ uint32 // ensure gcPauseDist is aligned 96 97 // gcPauseDist represents the distribution of all GC-related 98 // application pauses in the runtime. 99 // 100 // Each individual pause is counted separately, unlike pause_ns. 101 gcPauseDist timeHistogram 102 } 103 104 var memstats mstats 105 106 // A MemStats records statistics about the memory allocator. 107 type MemStats struct { 108 // General statistics. 109 110 // Alloc is bytes of allocated heap objects. 111 // 112 // This is the same as HeapAlloc (see below). 113 Alloc uint64 114 115 // TotalAlloc is cumulative bytes allocated for heap objects. 116 // 117 // TotalAlloc increases as heap objects are allocated, but 118 // unlike Alloc and HeapAlloc, it does not decrease when 119 // objects are freed. 120 TotalAlloc uint64 121 122 // Sys is the total bytes of memory obtained from the OS. 123 // 124 // Sys is the sum of the XSys fields below. Sys measures the 125 // virtual address space reserved by the Go runtime for the 126 // heap, stacks, and other internal data structures. It's 127 // likely that not all of the virtual address space is backed 128 // by physical memory at any given moment, though in general 129 // it all was at some point. 130 Sys uint64 131 132 // Lookups is the number of pointer lookups performed by the 133 // runtime. 134 // 135 // This is primarily useful for debugging runtime internals. 136 Lookups uint64 137 138 // Mallocs is the cumulative count of heap objects allocated. 139 // The number of live objects is Mallocs - Frees. 140 Mallocs uint64 141 142 // Frees is the cumulative count of heap objects freed. 143 Frees uint64 144 145 // Heap memory statistics. 146 // 147 // Interpreting the heap statistics requires some knowledge of 148 // how Go organizes memory. Go divides the virtual address 149 // space of the heap into "spans", which are contiguous 150 // regions of memory 8K or larger. A span may be in one of 151 // three states: 152 // 153 // An "idle" span contains no objects or other data. The 154 // physical memory backing an idle span can be released back 155 // to the OS (but the virtual address space never is), or it 156 // can be converted into an "in use" or "stack" span. 157 // 158 // An "in use" span contains at least one heap object and may 159 // have free space available to allocate more heap objects. 160 // 161 // A "stack" span is used for goroutine stacks. Stack spans 162 // are not considered part of the heap. A span can change 163 // between heap and stack memory; it is never used for both 164 // simultaneously. 165 166 // HeapAlloc is bytes of allocated heap objects. 167 // 168 // "Allocated" heap objects include all reachable objects, as 169 // well as unreachable objects that the garbage collector has 170 // not yet freed. Specifically, HeapAlloc increases as heap 171 // objects are allocated and decreases as the heap is swept 172 // and unreachable objects are freed. Sweeping occurs 173 // incrementally between GC cycles, so these two processes 174 // occur simultaneously, and as a result HeapAlloc tends to 175 // change smoothly (in contrast with the sawtooth that is 176 // typical of stop-the-world garbage collectors). 177 HeapAlloc uint64 178 179 // HeapSys is bytes of heap memory obtained from the OS. 180 // 181 // HeapSys measures the amount of virtual address space 182 // reserved for the heap. This includes virtual address space 183 // that has been reserved but not yet used, which consumes no 184 // physical memory, but tends to be small, as well as virtual 185 // address space for which the physical memory has been 186 // returned to the OS after it became unused (see HeapReleased 187 // for a measure of the latter). 188 // 189 // HeapSys estimates the largest size the heap has had. 190 HeapSys uint64 191 192 // HeapIdle is bytes in idle (unused) spans. 193 // 194 // Idle spans have no objects in them. These spans could be 195 // (and may already have been) returned to the OS, or they can 196 // be reused for heap allocations, or they can be reused as 197 // stack memory. 198 // 199 // HeapIdle minus HeapReleased estimates the amount of memory 200 // that could be returned to the OS, but is being retained by 201 // the runtime so it can grow the heap without requesting more 202 // memory from the OS. If this difference is significantly 203 // larger than the heap size, it indicates there was a recent 204 // transient spike in live heap size. 205 HeapIdle uint64 206 207 // HeapInuse is bytes in in-use spans. 208 // 209 // In-use spans have at least one object in them. These spans 210 // can only be used for other objects of roughly the same 211 // size. 212 // 213 // HeapInuse minus HeapAlloc estimates the amount of memory 214 // that has been dedicated to particular size classes, but is 215 // not currently being used. This is an upper bound on 216 // fragmentation, but in general this memory can be reused 217 // efficiently. 218 HeapInuse uint64 219 220 // HeapReleased is bytes of physical memory returned to the OS. 221 // 222 // This counts heap memory from idle spans that was returned 223 // to the OS and has not yet been reacquired for the heap. 224 HeapReleased uint64 225 226 // HeapObjects is the number of allocated heap objects. 227 // 228 // Like HeapAlloc, this increases as objects are allocated and 229 // decreases as the heap is swept and unreachable objects are 230 // freed. 231 HeapObjects uint64 232 233 // Stack memory statistics. 234 // 235 // Stacks are not considered part of the heap, but the runtime 236 // can reuse a span of heap memory for stack memory, and 237 // vice-versa. 238 239 // StackInuse is bytes in stack spans. 240 // 241 // In-use stack spans have at least one stack in them. These 242 // spans can only be used for other stacks of the same size. 243 // 244 // There is no StackIdle because unused stack spans are 245 // returned to the heap (and hence counted toward HeapIdle). 246 StackInuse uint64 247 248 // StackSys is bytes of stack memory obtained from the OS. 249 // 250 // StackSys is StackInuse, plus any memory obtained directly 251 // from the OS for OS thread stacks (which should be minimal). 252 StackSys uint64 253 254 // Off-heap memory statistics. 255 // 256 // The following statistics measure runtime-internal 257 // structures that are not allocated from heap memory (usually 258 // because they are part of implementing the heap). Unlike 259 // heap or stack memory, any memory allocated to these 260 // structures is dedicated to these structures. 261 // 262 // These are primarily useful for debugging runtime memory 263 // overheads. 264 265 // MSpanInuse is bytes of allocated mspan structures. 266 MSpanInuse uint64 267 268 // MSpanSys is bytes of memory obtained from the OS for mspan 269 // structures. 270 MSpanSys uint64 271 272 // MCacheInuse is bytes of allocated mcache structures. 273 MCacheInuse uint64 274 275 // MCacheSys is bytes of memory obtained from the OS for 276 // mcache structures. 277 MCacheSys uint64 278 279 // BuckHashSys is bytes of memory in profiling bucket hash tables. 280 BuckHashSys uint64 281 282 // GCSys is bytes of memory in garbage collection metadata. 283 GCSys uint64 284 285 // OtherSys is bytes of memory in miscellaneous off-heap 286 // runtime allocations. 287 OtherSys uint64 288 289 // Garbage collector statistics. 290 291 // NextGC is the target heap size of the next GC cycle. 292 // 293 // The garbage collector's goal is to keep HeapAlloc ≤ NextGC. 294 // At the end of each GC cycle, the target for the next cycle 295 // is computed based on the amount of reachable data and the 296 // value of GOGC. 297 NextGC uint64 298 299 // LastGC is the time the last garbage collection finished, as 300 // nanoseconds since 1970 (the UNIX epoch). 301 LastGC uint64 302 303 // PauseTotalNs is the cumulative nanoseconds in GC 304 // stop-the-world pauses since the program started. 305 // 306 // During a stop-the-world pause, all goroutines are paused 307 // and only the garbage collector can run. 308 PauseTotalNs uint64 309 310 // PauseNs is a circular buffer of recent GC stop-the-world 311 // pause times in nanoseconds. 312 // 313 // The most recent pause is at PauseNs[(NumGC+255)%256]. In 314 // general, PauseNs[N%256] records the time paused in the most 315 // recent N%256th GC cycle. There may be multiple pauses per 316 // GC cycle; this is the sum of all pauses during a cycle. 317 PauseNs [256]uint64 318 319 // PauseEnd is a circular buffer of recent GC pause end times, 320 // as nanoseconds since 1970 (the UNIX epoch). 321 // 322 // This buffer is filled the same way as PauseNs. There may be 323 // multiple pauses per GC cycle; this records the end of the 324 // last pause in a cycle. 325 PauseEnd [256]uint64 326 327 // NumGC is the number of completed GC cycles. 328 NumGC uint32 329 330 // NumForcedGC is the number of GC cycles that were forced by 331 // the application calling the GC function. 332 NumForcedGC uint32 333 334 // GCCPUFraction is the fraction of this program's available 335 // CPU time used by the GC since the program started. 336 // 337 // GCCPUFraction is expressed as a number between 0 and 1, 338 // where 0 means GC has consumed none of this program's CPU. A 339 // program's available CPU time is defined as the integral of 340 // GOMAXPROCS since the program started. That is, if 341 // GOMAXPROCS is 2 and a program has been running for 10 342 // seconds, its "available CPU" is 20 seconds. GCCPUFraction 343 // does not include CPU time used for write barrier activity. 344 // 345 // This is the same as the fraction of CPU reported by 346 // GODEBUG=gctrace=1. 347 GCCPUFraction float64 348 349 // EnableGC indicates that GC is enabled. It is always true, 350 // even if GOGC=off. 351 EnableGC bool 352 353 // DebugGC is currently unused. 354 DebugGC bool 355 356 // BySize reports per-size class allocation statistics. 357 // 358 // BySize[N] gives statistics for allocations of size S where 359 // BySize[N-1].Size < S ≤ BySize[N].Size. 360 // 361 // This does not report allocations larger than BySize[60].Size. 362 BySize [61]struct { 363 // Size is the maximum byte size of an object in this 364 // size class. 365 Size uint32 366 367 // Mallocs is the cumulative count of heap objects 368 // allocated in this size class. The cumulative bytes 369 // of allocation is Size*Mallocs. The number of live 370 // objects in this size class is Mallocs - Frees. 371 Mallocs uint64 372 373 // Frees is the cumulative count of heap objects freed 374 // in this size class. 375 Frees uint64 376 } 377 } 378 379 func init() { 380 if offset := unsafe.Offsetof(memstats.heapStats); offset%8 != 0 { 381 println(offset) 382 throw("memstats.heapStats not aligned to 8 bytes") 383 } 384 if offset := unsafe.Offsetof(memstats.gcPauseDist); offset%8 != 0 { 385 println(offset) 386 throw("memstats.gcPauseDist not aligned to 8 bytes") 387 } 388 // Ensure the size of heapStatsDelta causes adjacent fields/slots (e.g. 389 // [3]heapStatsDelta) to be 8-byte aligned. 390 if size := unsafe.Sizeof(heapStatsDelta{}); size%8 != 0 { 391 println(size) 392 throw("heapStatsDelta not a multiple of 8 bytes in size") 393 } 394 } 395 396 // ReadMemStats populates m with memory allocator statistics. 397 // 398 // The returned memory allocator statistics are up to date as of the 399 // call to ReadMemStats. This is in contrast with a heap profile, 400 // which is a snapshot as of the most recently completed garbage 401 // collection cycle. 402 func ReadMemStats(m *MemStats) { 403 stopTheWorld("read mem stats") 404 405 systemstack(func() { 406 readmemstats_m(m) 407 }) 408 409 startTheWorld() 410 } 411 412 func readmemstats_m(stats *MemStats) { 413 updatememstats() 414 415 stats.Alloc = memstats.alloc 416 stats.TotalAlloc = memstats.total_alloc 417 stats.Sys = memstats.sys 418 stats.Mallocs = memstats.nmalloc 419 stats.Frees = memstats.nfree 420 stats.HeapAlloc = memstats.alloc 421 stats.HeapSys = memstats.heap_sys.load() 422 // By definition, HeapIdle is memory that was mapped 423 // for the heap but is not currently used to hold heap 424 // objects. It also specifically is memory that can be 425 // used for other purposes, like stacks, but this memory 426 // is subtracted out of HeapSys before it makes that 427 // transition. Put another way: 428 // 429 // heap_sys = bytes allocated from the OS for the heap - bytes ultimately used for non-heap purposes 430 // heap_idle = bytes allocated from the OS for the heap - bytes ultimately used for any purpose 431 // 432 // or 433 // 434 // heap_sys = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse 435 // heap_idle = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse - heap_inuse 436 // 437 // => heap_idle = heap_sys - heap_inuse 438 stats.HeapIdle = memstats.heap_sys.load() - memstats.heap_inuse 439 stats.HeapInuse = memstats.heap_inuse 440 stats.HeapReleased = memstats.heap_released 441 stats.HeapObjects = memstats.heap_objects 442 stats.StackInuse = memstats.stacks_inuse 443 // memstats.stacks_sys is only memory mapped directly for OS stacks. 444 // Add in heap-allocated stack memory for user consumption. 445 stats.StackSys = memstats.stacks_inuse + memstats.stacks_sys.load() 446 stats.MSpanInuse = memstats.mspan_inuse 447 stats.MSpanSys = memstats.mspan_sys.load() 448 stats.MCacheInuse = memstats.mcache_inuse 449 stats.MCacheSys = memstats.mcache_sys.load() 450 stats.BuckHashSys = memstats.buckhash_sys.load() 451 // MemStats defines GCSys as an aggregate of all memory related 452 // to the memory management system, but we track this memory 453 // at a more granular level in the runtime. 454 stats.GCSys = memstats.gcMiscSys.load() + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse 455 stats.OtherSys = memstats.other_sys.load() 456 stats.NextGC = gcController.heapGoal 457 stats.LastGC = memstats.last_gc_unix 458 stats.PauseTotalNs = memstats.pause_total_ns 459 stats.PauseNs = memstats.pause_ns 460 stats.PauseEnd = memstats.pause_end 461 stats.NumGC = memstats.numgc 462 stats.NumForcedGC = memstats.numforcedgc 463 stats.GCCPUFraction = memstats.gc_cpu_fraction 464 stats.EnableGC = true 465 466 // Handle BySize. Copy N values, where N is 467 // the minimum of the lengths of the two arrays. 468 // Unfortunately copy() won't work here because 469 // the arrays have different structs. 470 // 471 // TODO(mknyszek): Consider renaming the fields 472 // of by_size's elements to align so we can use 473 // the copy built-in. 474 bySizeLen := len(stats.BySize) 475 if l := len(memstats.by_size); l < bySizeLen { 476 bySizeLen = l 477 } 478 for i := 0; i < bySizeLen; i++ { 479 stats.BySize[i].Size = memstats.by_size[i].size 480 stats.BySize[i].Mallocs = memstats.by_size[i].nmalloc 481 stats.BySize[i].Frees = memstats.by_size[i].nfree 482 } 483 } 484 485 //go:linkname readGCStats runtime/debug.readGCStats 486 func readGCStats(pauses *[]uint64) { 487 systemstack(func() { 488 readGCStats_m(pauses) 489 }) 490 } 491 492 // readGCStats_m must be called on the system stack because it acquires the heap 493 // lock. See mheap for details. 494 //go:systemstack 495 func readGCStats_m(pauses *[]uint64) { 496 p := *pauses 497 // Calling code in runtime/debug should make the slice large enough. 498 if cap(p) < len(memstats.pause_ns)+3 { 499 throw("short slice passed to readGCStats") 500 } 501 502 // Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns. 503 lock(&mheap_.lock) 504 505 n := memstats.numgc 506 if n > uint32(len(memstats.pause_ns)) { 507 n = uint32(len(memstats.pause_ns)) 508 } 509 510 // The pause buffer is circular. The most recent pause is at 511 // pause_ns[(numgc-1)%len(pause_ns)], and then backward 512 // from there to go back farther in time. We deliver the times 513 // most recent first (in p[0]). 514 p = p[:cap(p)] 515 for i := uint32(0); i < n; i++ { 516 j := (memstats.numgc - 1 - i) % uint32(len(memstats.pause_ns)) 517 p[i] = memstats.pause_ns[j] 518 p[n+i] = memstats.pause_end[j] 519 } 520 521 p[n+n] = memstats.last_gc_unix 522 p[n+n+1] = uint64(memstats.numgc) 523 p[n+n+2] = memstats.pause_total_ns 524 unlock(&mheap_.lock) 525 *pauses = p[:n+n+3] 526 } 527 528 // Updates the memstats structure. 529 // 530 // The world must be stopped. 531 // 532 //go:nowritebarrier 533 func updatememstats() { 534 assertWorldStopped() 535 536 // Flush mcaches to mcentral before doing anything else. 537 // 538 // Flushing to the mcentral may in general cause stats to 539 // change as mcentral data structures are manipulated. 540 systemstack(flushallmcaches) 541 542 memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse) 543 memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse) 544 memstats.sys = memstats.heap_sys.load() + memstats.stacks_sys.load() + memstats.mspan_sys.load() + 545 memstats.mcache_sys.load() + memstats.buckhash_sys.load() + memstats.gcMiscSys.load() + 546 memstats.other_sys.load() 547 548 // Calculate memory allocator stats. 549 // During program execution we only count number of frees and amount of freed memory. 550 // Current number of alive objects in the heap and amount of alive heap memory 551 // are calculated by scanning all spans. 552 // Total number of mallocs is calculated as number of frees plus number of alive objects. 553 // Similarly, total amount of allocated memory is calculated as amount of freed memory 554 // plus amount of alive heap memory. 555 memstats.alloc = 0 556 memstats.total_alloc = 0 557 memstats.nmalloc = 0 558 memstats.nfree = 0 559 for i := 0; i < len(memstats.by_size); i++ { 560 memstats.by_size[i].nmalloc = 0 561 memstats.by_size[i].nfree = 0 562 } 563 // Collect consistent stats, which are the source-of-truth in the some cases. 564 var consStats heapStatsDelta 565 memstats.heapStats.unsafeRead(&consStats) 566 567 // Collect large allocation stats. 568 totalAlloc := uint64(consStats.largeAlloc) 569 memstats.nmalloc += uint64(consStats.largeAllocCount) 570 totalFree := uint64(consStats.largeFree) 571 memstats.nfree += uint64(consStats.largeFreeCount) 572 573 // Collect per-sizeclass stats. 574 for i := 0; i < _NumSizeClasses; i++ { 575 // Malloc stats. 576 a := uint64(consStats.smallAllocCount[i]) 577 totalAlloc += a * uint64(class_to_size[i]) 578 memstats.nmalloc += a 579 memstats.by_size[i].nmalloc = a 580 581 // Free stats. 582 f := uint64(consStats.smallFreeCount[i]) 583 totalFree += f * uint64(class_to_size[i]) 584 memstats.nfree += f 585 memstats.by_size[i].nfree = f 586 } 587 588 // Account for tiny allocations. 589 memstats.nfree += uint64(consStats.tinyAllocCount) 590 memstats.nmalloc += uint64(consStats.tinyAllocCount) 591 592 // Calculate derived stats. 593 memstats.total_alloc = totalAlloc 594 memstats.alloc = totalAlloc - totalFree 595 memstats.heap_objects = memstats.nmalloc - memstats.nfree 596 597 memstats.stacks_inuse = uint64(consStats.inStacks) 598 memstats.gcWorkBufInUse = uint64(consStats.inWorkBufs) 599 memstats.gcProgPtrScalarBitsInUse = uint64(consStats.inPtrScalarBits) 600 601 // We also count stacks_inuse, gcWorkBufInUse, and gcProgPtrScalarBitsInUse as sys memory. 602 memstats.sys += memstats.stacks_inuse + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse 603 604 // The world is stopped, so the consistent stats (after aggregation) 605 // should be identical to some combination of memstats. In particular: 606 // 607 // * heap_inuse == inHeap 608 // * heap_released == released 609 // * heap_sys - heap_released == committed - inStacks - inWorkBufs - inPtrScalarBits 610 // 611 // Check if that's actually true. 612 // 613 // TODO(mknyszek): Maybe don't throw here. It would be bad if a 614 // bug in otherwise benign accounting caused the whole application 615 // to crash. 616 if memstats.heap_inuse != uint64(consStats.inHeap) { 617 print("runtime: heap_inuse=", memstats.heap_inuse, "\n") 618 print("runtime: consistent value=", consStats.inHeap, "\n") 619 throw("heap_inuse and consistent stats are not equal") 620 } 621 if memstats.heap_released != uint64(consStats.released) { 622 print("runtime: heap_released=", memstats.heap_released, "\n") 623 print("runtime: consistent value=", consStats.released, "\n") 624 throw("heap_released and consistent stats are not equal") 625 } 626 globalRetained := memstats.heap_sys.load() - memstats.heap_released 627 consRetained := uint64(consStats.committed - consStats.inStacks - consStats.inWorkBufs - consStats.inPtrScalarBits) 628 if globalRetained != consRetained { 629 print("runtime: global value=", globalRetained, "\n") 630 print("runtime: consistent value=", consRetained, "\n") 631 throw("measures of the retained heap are not equal") 632 } 633 } 634 635 // flushmcache flushes the mcache of allp[i]. 636 // 637 // The world must be stopped. 638 // 639 //go:nowritebarrier 640 func flushmcache(i int) { 641 assertWorldStopped() 642 643 p := allp[i] 644 c := p.mcache 645 if c == nil { 646 return 647 } 648 c.releaseAll() 649 stackcache_clear(c) 650 } 651 652 // flushallmcaches flushes the mcaches of all Ps. 653 // 654 // The world must be stopped. 655 // 656 //go:nowritebarrier 657 func flushallmcaches() { 658 assertWorldStopped() 659 660 for i := 0; i < int(gomaxprocs); i++ { 661 flushmcache(i) 662 } 663 } 664 665 // sysMemStat represents a global system statistic that is managed atomically. 666 // 667 // This type must structurally be a uint64 so that mstats aligns with MemStats. 668 type sysMemStat uint64 669 670 // load atomically reads the value of the stat. 671 // 672 // Must be nosplit as it is called in runtime initialization, e.g. newosproc0. 673 //go:nosplit 674 func (s *sysMemStat) load() uint64 { 675 return atomic.Load64((*uint64)(s)) 676 } 677 678 // add atomically adds the sysMemStat by n. 679 // 680 // Must be nosplit as it is called in runtime initialization, e.g. newosproc0. 681 //go:nosplit 682 func (s *sysMemStat) add(n int64) { 683 if s == nil { 684 return 685 } 686 val := atomic.Xadd64((*uint64)(s), n) 687 if (n > 0 && int64(val) < n) || (n < 0 && int64(val)+n < n) { 688 print("runtime: val=", val, " n=", n, "\n") 689 throw("sysMemStat overflow") 690 } 691 } 692 693 // heapStatsDelta contains deltas of various runtime memory statistics 694 // that need to be updated together in order for them to be kept 695 // consistent with one another. 696 type heapStatsDelta struct { 697 // Memory stats. 698 committed int64 // byte delta of memory committed 699 released int64 // byte delta of released memory generated 700 inHeap int64 // byte delta of memory placed in the heap 701 inStacks int64 // byte delta of memory reserved for stacks 702 inWorkBufs int64 // byte delta of memory reserved for work bufs 703 inPtrScalarBits int64 // byte delta of memory reserved for unrolled GC prog bits 704 705 // Allocator stats. 706 tinyAllocCount uintptr // number of tiny allocations 707 largeAlloc uintptr // bytes allocated for large objects 708 largeAllocCount uintptr // number of large object allocations 709 smallAllocCount [_NumSizeClasses]uintptr // number of allocs for small objects 710 largeFree uintptr // bytes freed for large objects (>maxSmallSize) 711 largeFreeCount uintptr // number of frees for large objects (>maxSmallSize) 712 smallFreeCount [_NumSizeClasses]uintptr // number of frees for small objects (<=maxSmallSize) 713 714 // Add a uint32 to ensure this struct is a multiple of 8 bytes in size. 715 // Only necessary on 32-bit platforms. 716 _ [(goarch.PtrSize / 4) % 2]uint32 717 } 718 719 // merge adds in the deltas from b into a. 720 func (a *heapStatsDelta) merge(b *heapStatsDelta) { 721 a.committed += b.committed 722 a.released += b.released 723 a.inHeap += b.inHeap 724 a.inStacks += b.inStacks 725 a.inWorkBufs += b.inWorkBufs 726 a.inPtrScalarBits += b.inPtrScalarBits 727 728 a.tinyAllocCount += b.tinyAllocCount 729 a.largeAlloc += b.largeAlloc 730 a.largeAllocCount += b.largeAllocCount 731 for i := range b.smallAllocCount { 732 a.smallAllocCount[i] += b.smallAllocCount[i] 733 } 734 a.largeFree += b.largeFree 735 a.largeFreeCount += b.largeFreeCount 736 for i := range b.smallFreeCount { 737 a.smallFreeCount[i] += b.smallFreeCount[i] 738 } 739 } 740 741 // consistentHeapStats represents a set of various memory statistics 742 // whose updates must be viewed completely to get a consistent 743 // state of the world. 744 // 745 // To write updates to memory stats use the acquire and release 746 // methods. To obtain a consistent global snapshot of these statistics, 747 // use read. 748 type consistentHeapStats struct { 749 // stats is a ring buffer of heapStatsDelta values. 750 // Writers always atomically update the delta at index gen. 751 // 752 // Readers operate by rotating gen (0 -> 1 -> 2 -> 0 -> ...) 753 // and synchronizing with writers by observing each P's 754 // statsSeq field. If the reader observes a P not writing, 755 // it can be sure that it will pick up the new gen value the 756 // next time it writes. 757 // 758 // The reader then takes responsibility by clearing space 759 // in the ring buffer for the next reader to rotate gen to 760 // that space (i.e. it merges in values from index (gen-2) mod 3 761 // to index (gen-1) mod 3, then clears the former). 762 // 763 // Note that this means only one reader can be reading at a time. 764 // There is no way for readers to synchronize. 765 // 766 // This process is why we need a ring buffer of size 3 instead 767 // of 2: one is for the writers, one contains the most recent 768 // data, and the last one is clear so writers can begin writing 769 // to it the moment gen is updated. 770 stats [3]heapStatsDelta 771 772 // gen represents the current index into which writers 773 // are writing, and can take on the value of 0, 1, or 2. 774 // This value is updated atomically. 775 gen uint32 776 777 // noPLock is intended to provide mutual exclusion for updating 778 // stats when no P is available. It does not block other writers 779 // with a P, only other writers without a P and the reader. Because 780 // stats are usually updated when a P is available, contention on 781 // this lock should be minimal. 782 noPLock mutex 783 } 784 785 // acquire returns a heapStatsDelta to be updated. In effect, 786 // it acquires the shard for writing. release must be called 787 // as soon as the relevant deltas are updated. 788 // 789 // The returned heapStatsDelta must be updated atomically. 790 // 791 // The caller's P must not change between acquire and 792 // release. This also means that the caller should not 793 // acquire a P or release its P in between. A P also must 794 // not acquire a given consistentHeapStats if it hasn't 795 // yet released it. 796 // 797 // nosplit because a stack growth in this function could 798 // lead to a stack allocation that could reenter the 799 // function. 800 // 801 //go:nosplit 802 func (m *consistentHeapStats) acquire() *heapStatsDelta { 803 if pp := getg().m.p.ptr(); pp != nil { 804 seq := atomic.Xadd(&pp.statsSeq, 1) 805 if seq%2 == 0 { 806 // Should have been incremented to odd. 807 print("runtime: seq=", seq, "\n") 808 throw("bad sequence number") 809 } 810 } else { 811 lock(&m.noPLock) 812 } 813 gen := atomic.Load(&m.gen) % 3 814 return &m.stats[gen] 815 } 816 817 // release indicates that the writer is done modifying 818 // the delta. The value returned by the corresponding 819 // acquire must no longer be accessed or modified after 820 // release is called. 821 // 822 // The caller's P must not change between acquire and 823 // release. This also means that the caller should not 824 // acquire a P or release its P in between. 825 // 826 // nosplit because a stack growth in this function could 827 // lead to a stack allocation that causes another acquire 828 // before this operation has completed. 829 // 830 //go:nosplit 831 func (m *consistentHeapStats) release() { 832 if pp := getg().m.p.ptr(); pp != nil { 833 seq := atomic.Xadd(&pp.statsSeq, 1) 834 if seq%2 != 0 { 835 // Should have been incremented to even. 836 print("runtime: seq=", seq, "\n") 837 throw("bad sequence number") 838 } 839 } else { 840 unlock(&m.noPLock) 841 } 842 } 843 844 // unsafeRead aggregates the delta for this shard into out. 845 // 846 // Unsafe because it does so without any synchronization. The 847 // world must be stopped. 848 func (m *consistentHeapStats) unsafeRead(out *heapStatsDelta) { 849 assertWorldStopped() 850 851 for i := range m.stats { 852 out.merge(&m.stats[i]) 853 } 854 } 855 856 // unsafeClear clears the shard. 857 // 858 // Unsafe because the world must be stopped and values should 859 // be donated elsewhere before clearing. 860 func (m *consistentHeapStats) unsafeClear() { 861 assertWorldStopped() 862 863 for i := range m.stats { 864 m.stats[i] = heapStatsDelta{} 865 } 866 } 867 868 // read takes a globally consistent snapshot of m 869 // and puts the aggregated value in out. Even though out is a 870 // heapStatsDelta, the resulting values should be complete and 871 // valid statistic values. 872 // 873 // Not safe to call concurrently. The world must be stopped 874 // or metricsSema must be held. 875 func (m *consistentHeapStats) read(out *heapStatsDelta) { 876 // Getting preempted after this point is not safe because 877 // we read allp. We need to make sure a STW can't happen 878 // so it doesn't change out from under us. 879 mp := acquirem() 880 881 // Get the current generation. We can be confident that this 882 // will not change since read is serialized and is the only 883 // one that modifies currGen. 884 currGen := atomic.Load(&m.gen) 885 prevGen := currGen - 1 886 if currGen == 0 { 887 prevGen = 2 888 } 889 890 // Prevent writers without a P from writing while we update gen. 891 lock(&m.noPLock) 892 893 // Rotate gen, effectively taking a snapshot of the state of 894 // these statistics at the point of the exchange by moving 895 // writers to the next set of deltas. 896 // 897 // This exchange is safe to do because we won't race 898 // with anyone else trying to update this value. 899 atomic.Xchg(&m.gen, (currGen+1)%3) 900 901 // Allow P-less writers to continue. They'll be writing to the 902 // next generation now. 903 unlock(&m.noPLock) 904 905 for _, p := range allp { 906 // Spin until there are no more writers. 907 for atomic.Load(&p.statsSeq)%2 != 0 { 908 } 909 } 910 911 // At this point we've observed that each sequence 912 // number is even, so any future writers will observe 913 // the new gen value. That means it's safe to read from 914 // the other deltas in the stats buffer. 915 916 // Perform our responsibilities and free up 917 // stats[prevGen] for the next time we want to take 918 // a snapshot. 919 m.stats[currGen].merge(&m.stats[prevGen]) 920 m.stats[prevGen] = heapStatsDelta{} 921 922 // Finally, copy out the complete delta. 923 *out = m.stats[currGen] 924 925 releasem(mp) 926 } 927