Skip to content

Commit

Permalink
runtime: rearrange mheap_.alloc* into allocSpan
Browse files Browse the repository at this point in the history
This change combines the functionality of allocSpanLocked, allocManual,
and alloc_m into a new method called allocSpan. While these methods'
abstraction boundaries are OK when the heap lock is held throughout,
they start to break down when we want finer-grained locking in the page
allocator.

allocSpan does just that, and only locks the heap when it absolutely has
to. Piggy-backing off of work in previous CLs to make more of span
initialization lockless, this change makes span initialization entirely
lockless as part of the reorganization.

Ultimately this change will enable us to add a lockless fast path to
allocSpan.

Updates #35112.

Change-Id: I99875939d75fb4e958a67ac99e4a7cda44f06864
Reviewed-on: https://go-review.googlesource.com/c/go/+/196641
Run-TryBot: Michael Knyszek <[email protected]>
TryBot-Result: Gobot Gobot <[email protected]>
Reviewed-by: Austin Clements <[email protected]>
  • Loading branch information
mknyszek committed Nov 8, 2019
1 parent a5a6f61 commit a762221
Showing 1 changed file with 165 additions and 160 deletions.
325 changes: 165 additions & 160 deletions src/runtime/mheap.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ type mheap struct {
// could self-deadlock if its stack grows with the lock held.
lock mutex
pages pageAlloc // page allocation data structure
sweepgen uint32 // sweep generation, see comment in mspan
sweepgen uint32 // sweep generation, see comment in mspan; written during STW
sweepdone uint32 // all spans are swept
sweepers uint32 // number of active sweepone calls

Expand Down Expand Up @@ -848,136 +848,23 @@ func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr {
return nFreed
}

// alloc_m is the internal implementation of mheap.alloc.
//
// alloc_m must run on the system stack because it locks the heap, so
// any stack growth during alloc_m would self-deadlock.
//
//go:systemstack
func (h *mheap) alloc_m(npage uintptr, spanclass spanClass) *mspan {
_g_ := getg()

// To prevent excessive heap growth, before allocating n pages
// we need to sweep and reclaim at least n pages.
if h.sweepdone == 0 {
h.reclaim(npage)
}

// Compute size information.
nbytes := npage << _PageShift
var elemSize, nelems uintptr
if sizeclass := spanclass.sizeclass(); sizeclass == 0 {
elemSize = nbytes
nelems = 1
} else {
elemSize = uintptr(class_to_size[sizeclass])
nelems = nbytes / elemSize
}

// Allocate mark and allocation bits before we take the heap
// lock. We'll drop these on the floor if we fail to allocate
// the span, but in that case we'll panic soon.
gcmarkBits := newMarkBits(nelems)
allocBits := newAllocBits(nelems)

lock(&h.lock)
// transfer stats from cache to global
memstats.heap_scan += uint64(_g_.m.mcache.local_scan)
_g_.m.mcache.local_scan = 0
memstats.tinyallocs += uint64(_g_.m.mcache.local_tinyallocs)
_g_.m.mcache.local_tinyallocs = 0

s := h.allocSpanLocked(npage, &memstats.heap_inuse)
if s != nil {
// Record span info, because gc needs to be
// able to map interior pointer to containing span.
atomic.Store(&s.sweepgen, h.sweepgen)
h.sweepSpans[h.sweepgen/2%2].push(s) // Add to swept in-use list.
s.allocCount = 0
s.spanclass = spanclass
s.elemsize = elemSize
if sizeclass := spanclass.sizeclass(); sizeclass == 0 {
s.divShift = 0
s.divMul = 0
s.divShift2 = 0
s.baseMask = 0

// Update additional stats.
mheap_.largealloc += uint64(s.elemsize)
mheap_.nlargealloc++
atomic.Xadd64(&memstats.heap_live, int64(npage<<_PageShift))
} else {
m := &class_to_divmagic[sizeclass]
s.divShift = m.shift
s.divMul = m.mul
s.divShift2 = m.shift2
s.baseMask = m.baseMask
}

// Initialize mark and allocation structures.
s.freeindex = 0
s.allocCache = ^uint64(0) // all 1s indicating all free.
s.nelems = nelems
s.gcmarkBits = gcmarkBits
s.allocBits = allocBits

// Now that the span is filled in, set its state. This
// is a publication barrier for the other fields in
// the span. While valid pointers into this span
// should never be visible until the span is returned,
// if the garbage collector finds an invalid pointer,
// access to the span may race with initialization of
// the span. We resolve this race by atomically
// setting the state after the span is fully
// initialized, and atomically checking the state in
// any situation where a pointer is suspect.
s.state.set(mSpanInUse)

// Mark in-use span in arena page bitmap.
//
// This publishes the span to the page sweeper, so
// it's imperative that the span be completely initialized
// prior to this line.
arena, pageIdx, pageMask := pageIndexOf(s.base())
atomic.Or8(&arena.pageInUse[pageIdx], pageMask)

// Update related page sweeper stats.
atomic.Xadd64(&h.pagesInUse, int64(npage))
}
// heap_scan and heap_live were updated.
if gcBlackenEnabled != 0 {
gcController.revise()
}

if trace.enabled {
traceHeapAlloc()
}

// h.spans is accessed concurrently without synchronization
// from other threads. Hence, there must be a store/store
// barrier here to ensure the writes to h.spans above happen
// before the caller can publish a pointer p to an object
// allocated from s. As soon as this happens, the garbage
// collector running on another processor could read p and
// look up s in h.spans. The unlock acts as the barrier to
// order these writes. On the read side, the data dependency
// between p and the index in h.spans orders the reads.
unlock(&h.lock)
return s
}

// alloc allocates a new span of npage pages from the GC'd heap.
//
// spanclass indicates the span's size class and scannability.
//
// If needzero is true, the memory for the returned span will be zeroed.
func (h *mheap) alloc(npage uintptr, spanclass spanClass, needzero bool) *mspan {
func (h *mheap) alloc(npages uintptr, spanclass spanClass, needzero bool) *mspan {
// Don't do any operations that lock the heap on the G stack.
// It might trigger stack growth, and the stack growth code needs
// to be able to allocate heap.
var s *mspan
systemstack(func() {
s = h.alloc_m(npage, spanclass)
// To prevent excessive heap growth, before allocating n pages
// we need to sweep and reclaim at least n pages.
if h.sweepdone == 0 {
h.reclaim(npages)
}
s = h.allocSpan(npages, false, spanclass, &memstats.heap_inuse)
})

if s != nil {
Expand All @@ -999,29 +886,12 @@ func (h *mheap) alloc(npage uintptr, spanclass spanClass, needzero bool) *mspan
// The memory backing the returned span may not be zeroed if
// span.needzero is set.
//
// allocManual must be called on the system stack because it acquires
// the heap lock. See mheap for details.
// allocManual must be called on the system stack because it may
// acquire the heap lock via allocSpan. See mheap for details.
//
//go:systemstack
func (h *mheap) allocManual(npage uintptr, stat *uint64) *mspan {
lock(&h.lock)
s := h.allocSpanLocked(npage, stat)
if s != nil {
s.manualFreeList = 0
s.allocCount = 0
s.spanclass = 0
s.nelems = 0
s.elemsize = 0
s.limit = s.base() + s.npages<<_PageShift
s.state.set(mSpanManual) // Publish the span
// Manually managed memory doesn't count toward heap_sys.
mSysStatDec(&memstats.heap_sys, s.npages*pageSize)
}

// This unlock acts as a release barrier. See mheap.alloc_m.
unlock(&h.lock)

return s
func (h *mheap) allocManual(npages uintptr, stat *uint64) *mspan {
return h.allocSpan(npages, true, 0, stat)
}

// setSpans modifies the span map so [spanOf(base), spanOf(base+npage*pageSize))
Expand Down Expand Up @@ -1103,43 +973,178 @@ func (h *mheap) allocNeedsZero(base, npage uintptr) (needZero bool) {
return
}

// Allocates a span of the given size. h must be locked.
// The returned span has been removed from the
// free structures, but its state is still mSpanFree.
func (h *mheap) allocSpanLocked(npage uintptr, stat *uint64) *mspan {
base, scav := h.pages.alloc(npage)
// allocSpan allocates an mspan which owns npages worth of memory.
//
// If manual == false, allocSpan allocates a heap span of class spanclass
// and updates heap accounting. If manual == true, allocSpan allocates a
// manually-managed span (spanclass is ignored), and the caller is
// responsible for any accounting related to its use of the span. Either
// way, allocSpan will atomically add the bytes in the newly allocated
// span to *sysStat.
//
// The returned span is fully initialized.
//
// h must not be locked.
//
// allocSpan must be called on the system stack both because it acquires
// the heap lock and because it must block GC transitions.
//
//go:systemstack
func (h *mheap) allocSpan(npages uintptr, manual bool, spanclass spanClass, sysStat *uint64) (s *mspan) {
// Function-global state.
gp := getg()
base, scav := uintptr(0), uintptr(0)

// We failed to do what we need to do without the lock.
lock(&h.lock)

// Try to acquire a base address.
base, scav = h.pages.alloc(npages)
if base != 0 {
goto HaveBase
}
if !h.grow(npage) {
if !h.grow(npages) {
unlock(&h.lock)
return nil
}
base, scav = h.pages.alloc(npage)
base, scav = h.pages.alloc(npages)
if base != 0 {
goto HaveBase
}
throw("grew heap, but no adequate free space found")

HaveBase:
if scav != 0 {
// sysUsed all the pages that are actually available
// in the span.
sysUsed(unsafe.Pointer(base), npage*pageSize)
mSysStatDec(&memstats.heap_released, scav)
if !manual {
// This is a heap span, so we should do some additional accounting
// which may only be done with the heap locked.

// Transfer stats from mcache to global.
memstats.heap_scan += uint64(gp.m.mcache.local_scan)
gp.m.mcache.local_scan = 0
memstats.tinyallocs += uint64(gp.m.mcache.local_tinyallocs)
gp.m.mcache.local_tinyallocs = 0

// Do some additional accounting if it's a large allocation.
if spanclass.sizeclass() == 0 {
mheap_.largealloc += uint64(npages * pageSize)
mheap_.nlargealloc++
atomic.Xadd64(&memstats.heap_live, int64(npages*pageSize))
}

// Either heap_live or heap_scan could have been updated.
if gcBlackenEnabled != 0 {
gcController.revise()
}
}

s := (*mspan)(h.spanalloc.alloc())
s.init(base, npage)
if h.allocNeedsZero(base, npage) {
// Allocate an mspan object before releasing the lock.
s = (*mspan)(h.spanalloc.alloc())
unlock(&h.lock)

// Initialize the span.
s.init(base, npages)
if h.allocNeedsZero(base, npages) {
s.needzero = 1
}
h.setSpans(s.base(), npage, s)
nbytes := npages * pageSize
if manual {
s.manualFreeList = 0
s.nelems = 0
s.limit = s.base() + s.npages*pageSize
// Manually managed memory doesn't count toward heap_sys.
mSysStatDec(&memstats.heap_sys, s.npages*pageSize)
s.state.set(mSpanManual)
} else {
// We must set span properties before the span is published anywhere
// since we're not holding the heap lock.
s.spanclass = spanclass
if sizeclass := spanclass.sizeclass(); sizeclass == 0 {
s.elemsize = nbytes
s.nelems = 1

s.divShift = 0
s.divMul = 0
s.divShift2 = 0
s.baseMask = 0
} else {
s.elemsize = uintptr(class_to_size[sizeclass])
s.nelems = nbytes / s.elemsize

m := &class_to_divmagic[sizeclass]
s.divShift = m.shift
s.divMul = m.mul
s.divShift2 = m.shift2
s.baseMask = m.baseMask
}

// Initialize mark and allocation structures.
s.freeindex = 0
s.allocCache = ^uint64(0) // all 1s indicating all free.
s.gcmarkBits = newMarkBits(s.nelems)
s.allocBits = newAllocBits(s.nelems)

// It's safe to access h.sweepgen without the heap lock because it's
// only ever updated with the world stopped and we run on the
// systemstack which blocks a STW transition.
atomic.Store(&s.sweepgen, h.sweepgen)

// Now that the span is filled in, set its state. This
// is a publication barrier for the other fields in
// the span. While valid pointers into this span
// should never be visible until the span is returned,
// if the garbage collector finds an invalid pointer,
// access to the span may race with initialization of
// the span. We resolve this race by atomically
// setting the state after the span is fully
// initialized, and atomically checking the state in
// any situation where a pointer is suspect.
s.state.set(mSpanInUse)
}

// Commit and account for any scavenged memory that the span now owns.
if scav != 0 {
// sysUsed all the pages that are actually available
// in the span since some of them might be scavenged.
sysUsed(unsafe.Pointer(base), nbytes)
mSysStatDec(&memstats.heap_released, scav)
}
// Update stats.
nbytes := npage * pageSize
mSysStatInc(stat, nbytes)
mSysStatInc(sysStat, nbytes)
mSysStatDec(&memstats.heap_idle, nbytes)

// Publish the span in various locations.

// This is safe to call without the lock held because the slots
// related to this span will only every be read or modified by
// this thread until pointers into the span are published or
// pageInUse is updated.
h.setSpans(s.base(), npages, s)

if !manual {
// Add to swept in-use list.
//
// This publishes the span to root marking.
//
// h.sweepgen is guaranteed to only change during STW,
// and preemption is disabled in the page allocator.
h.sweepSpans[h.sweepgen/2%2].push(s)

// Mark in-use span in arena page bitmap.
//
// This publishes the span to the page sweeper, so
// it's imperative that the span be completely initialized
// prior to this line.
arena, pageIdx, pageMask := pageIndexOf(s.base())
atomic.Or8(&arena.pageInUse[pageIdx], pageMask)

// Update related page sweeper stats.
atomic.Xadd64(&h.pagesInUse, int64(npages))

if trace.enabled {
// Trace that a heap alloc occurred.
traceHeapAlloc()
}
}
return s
}

Expand Down

0 comments on commit a762221

Please sign in to comment.