Source file src/runtime/mem_linux.go

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime
     6  
     7  import (
     8  	"runtime/internal/atomic"
     9  	"unsafe"
    10  )
    11  
    12  const (
    13  	_EACCES = 13
    14  	_EINVAL = 22
    15  )
    16  
    17  // Don't split the stack as this method may be invoked without a valid G, which
    18  // prevents us from allocating more stack.
    19  //go:nosplit
    20  func sysAlloc(n uintptr, sysStat *sysMemStat) unsafe.Pointer {
    21  	p, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
    22  	if err != 0 {
    23  		if err == _EACCES {
    24  			print("runtime: mmap: access denied\n")
    25  			exit(2)
    26  		}
    27  		if err == _EAGAIN {
    28  			print("runtime: mmap: too much locked memory (check 'ulimit -l').\n")
    29  			exit(2)
    30  		}
    31  		return nil
    32  	}
    33  	sysStat.add(int64(n))
    34  	return p
    35  }
    36  
    37  var adviseUnused = uint32(_MADV_FREE)
    38  
    39  func sysUnused(v unsafe.Pointer, n uintptr) {
    40  	// By default, Linux's "transparent huge page" support will
    41  	// merge pages into a huge page if there's even a single
    42  	// present regular page, undoing the effects of madvise(adviseUnused)
    43  	// below. On amd64, that means khugepaged can turn a single
    44  	// 4KB page to 2MB, bloating the process's RSS by as much as
    45  	// 512X. (See issue #8832 and Linux kernel bug
    46  	// https://bugzilla.kernel.org/show_bug.cgi?id=93111)
    47  	//
    48  	// To work around this, we explicitly disable transparent huge
    49  	// pages when we release pages of the heap. However, we have
    50  	// to do this carefully because changing this flag tends to
    51  	// split the VMA (memory mapping) containing v in to three
    52  	// VMAs in order to track the different values of the
    53  	// MADV_NOHUGEPAGE flag in the different regions. There's a
    54  	// default limit of 65530 VMAs per address space (sysctl
    55  	// vm.max_map_count), so we must be careful not to create too
    56  	// many VMAs (see issue #12233).
    57  	//
    58  	// Since huge pages are huge, there's little use in adjusting
    59  	// the MADV_NOHUGEPAGE flag on a fine granularity, so we avoid
    60  	// exploding the number of VMAs by only adjusting the
    61  	// MADV_NOHUGEPAGE flag on a large granularity. This still
    62  	// gets most of the benefit of huge pages while keeping the
    63  	// number of VMAs under control. With hugePageSize = 2MB, even
    64  	// a pessimal heap can reach 128GB before running out of VMAs.
    65  	if physHugePageSize != 0 {
    66  		// If it's a large allocation, we want to leave huge
    67  		// pages enabled. Hence, we only adjust the huge page
    68  		// flag on the huge pages containing v and v+n-1, and
    69  		// only if those aren't aligned.
    70  		var head, tail uintptr
    71  		if uintptr(v)&(physHugePageSize-1) != 0 {
    72  			// Compute huge page containing v.
    73  			head = alignDown(uintptr(v), physHugePageSize)
    74  		}
    75  		if (uintptr(v)+n)&(physHugePageSize-1) != 0 {
    76  			// Compute huge page containing v+n-1.
    77  			tail = alignDown(uintptr(v)+n-1, physHugePageSize)
    78  		}
    79  
    80  		// Note that madvise will return EINVAL if the flag is
    81  		// already set, which is quite likely. We ignore
    82  		// errors.
    83  		if head != 0 && head+physHugePageSize == tail {
    84  			// head and tail are different but adjacent,
    85  			// so do this in one call.
    86  			madvise(unsafe.Pointer(head), 2*physHugePageSize, _MADV_NOHUGEPAGE)
    87  		} else {
    88  			// Advise the huge pages containing v and v+n-1.
    89  			if head != 0 {
    90  				madvise(unsafe.Pointer(head), physHugePageSize, _MADV_NOHUGEPAGE)
    91  			}
    92  			if tail != 0 && tail != head {
    93  				madvise(unsafe.Pointer(tail), physHugePageSize, _MADV_NOHUGEPAGE)
    94  			}
    95  		}
    96  	}
    97  
    98  	if uintptr(v)&(physPageSize-1) != 0 || n&(physPageSize-1) != 0 {
    99  		// madvise will round this to any physical page
   100  		// *covered* by this range, so an unaligned madvise
   101  		// will release more memory than intended.
   102  		throw("unaligned sysUnused")
   103  	}
   104  
   105  	var advise uint32
   106  	if debug.madvdontneed != 0 {
   107  		advise = _MADV_DONTNEED
   108  	} else {
   109  		advise = atomic.Load(&adviseUnused)
   110  	}
   111  	if errno := madvise(v, n, int32(advise)); advise == _MADV_FREE && errno != 0 {
   112  		// MADV_FREE was added in Linux 4.5. Fall back to MADV_DONTNEED if it is
   113  		// not supported.
   114  		atomic.Store(&adviseUnused, _MADV_DONTNEED)
   115  		madvise(v, n, _MADV_DONTNEED)
   116  	}
   117  
   118  	if debug.harddecommit > 0 {
   119  		p, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
   120  		if p != v || err != 0 {
   121  			throw("runtime: cannot disable permissions in address space")
   122  		}
   123  	}
   124  }
   125  
   126  func sysUsed(v unsafe.Pointer, n uintptr) {
   127  	if debug.harddecommit > 0 {
   128  		p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
   129  		if err == _ENOMEM {
   130  			throw("runtime: out of memory")
   131  		}
   132  		if p != v || err != 0 {
   133  			throw("runtime: cannot remap pages in address space")
   134  		}
   135  		return
   136  
   137  		// Don't do the sysHugePage optimization in hard decommit mode.
   138  		// We're breaking up pages everywhere, there's no point.
   139  	}
   140  	// Partially undo the NOHUGEPAGE marks from sysUnused
   141  	// for whole huge pages between v and v+n. This may
   142  	// leave huge pages off at the end points v and v+n
   143  	// even though allocations may cover these entire huge
   144  	// pages. We could detect this and undo NOHUGEPAGE on
   145  	// the end points as well, but it's probably not worth
   146  	// the cost because when neighboring allocations are
   147  	// freed sysUnused will just set NOHUGEPAGE again.
   148  	sysHugePage(v, n)
   149  }
   150  
   151  func sysHugePage(v unsafe.Pointer, n uintptr) {
   152  	if physHugePageSize != 0 {
   153  		// Round v up to a huge page boundary.
   154  		beg := alignUp(uintptr(v), physHugePageSize)
   155  		// Round v+n down to a huge page boundary.
   156  		end := alignDown(uintptr(v)+n, physHugePageSize)
   157  
   158  		if beg < end {
   159  			madvise(unsafe.Pointer(beg), end-beg, _MADV_HUGEPAGE)
   160  		}
   161  	}
   162  }
   163  
   164  // Don't split the stack as this function may be invoked without a valid G,
   165  // which prevents us from allocating more stack.
   166  //go:nosplit
   167  func sysFree(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
   168  	sysStat.add(-int64(n))
   169  	munmap(v, n)
   170  }
   171  
   172  func sysFault(v unsafe.Pointer, n uintptr) {
   173  	mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, -1, 0)
   174  }
   175  
   176  func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer {
   177  	p, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
   178  	if err != 0 {
   179  		return nil
   180  	}
   181  	return p
   182  }
   183  
   184  func sysMap(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
   185  	sysStat.add(int64(n))
   186  
   187  	p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
   188  	if err == _ENOMEM {
   189  		throw("runtime: out of memory")
   190  	}
   191  	if p != v || err != 0 {
   192  		print("runtime: mmap(", v, ", ", n, ") returned ", p, ", ", err, "\n")
   193  		throw("runtime: cannot map pages in arena address space")
   194  	}
   195  }
   196  

View as plain text