Skip to content

Commit

Permalink
runtime: move checkmarks to a separate bitmap
Browse files Browse the repository at this point in the history
Currently, the GC stores the object marks for checkmarks mode in the
heap bitmap using a rather complex encoding: for one word objects, the
checkmark is stored in the pointer/scalar bit since one word objects
must be pointers; for larger objects, the checkmark is stored in what
would be the scan/dead bit for the second word of the object. This
encoding made more sense when the runtime used the first scan/dead bit
as the regular mark bit, but we moved away from that long ago.

This encoding and overloading of the heap bitmap bits causes a great
deal of complexity in many parts of the allocator and garbage
collector and leads to some subtle bugs like #15903.

This CL moves the checkmarks mark bits into their own per-arena bitmap
and reclaims the second scan/dead bit as a regular scan/dead bit.

I tested this by enabling doubleCheck mode in heapBitsSetType and
running in both regular and GODEBUG=gccheckmark=1 mode.

Fixes #15903.

No performance degradation. (Very slight improvement on a few
benchmarks, but it's probably just noise.)

name                                old time/op            new time/op            delta
BiogoIgor                                      16.6s ± 1%             16.4s ± 1%  -0.94%  (p=0.000 n=25+24)
BiogoKrishna                                   19.2s ± 3%             19.2s ± 3%    ~     (p=0.638 n=23+25)
BleveIndexBatch100                             6.12s ± 5%             6.17s ± 4%    ~     (p=0.170 n=25+25)
CompileTemplate                                206ms ± 1%             205ms ± 1%  -0.43%  (p=0.005 n=24+24)
CompileUnicode                                82.2ms ± 2%            81.5ms ± 2%  -0.95%  (p=0.001 n=22+22)
CompileGoTypes                                 755ms ± 3%             754ms ± 4%    ~     (p=0.715 n=25+25)
CompileCompiler                                3.73s ± 1%             3.73s ± 1%    ~     (p=0.445 n=25+24)
CompileSSA                                     8.67s ± 1%             8.66s ± 1%    ~     (p=0.836 n=24+22)
CompileFlate                                   134ms ± 2%             133ms ± 1%  -0.66%  (p=0.001 n=24+23)
CompileGoParser                                164ms ± 1%             163ms ± 1%  -0.85%  (p=0.000 n=24+24)
CompileReflect                                 466ms ± 5%             466ms ± 3%    ~     (p=0.863 n=25+25)
CompileTar                                     182ms ± 1%             182ms ± 1%  -0.31%  (p=0.048 n=24+24)
CompileXML                                     249ms ± 1%             248ms ± 1%  -0.32%  (p=0.031 n=21+25)
CompileStdCmd                                  10.3s ± 1%             10.3s ± 1%    ~     (p=0.459 n=23+23)
FoglemanFauxGLRenderRotateBoat                 8.66s ± 1%             8.62s ± 1%  -0.47%  (p=0.000 n=23+24)
FoglemanPathTraceRenderGopherIter1             20.3s ± 3%             20.2s ± 2%    ~     (p=0.893 n=25+25)
GopherLuaKNucleotide                           29.7s ± 1%             29.8s ± 2%    ~     (p=0.421 n=24+25)
MarkdownRenderXHTML                            246ms ± 1%             247ms ± 1%    ~     (p=0.558 n=25+24)
Tile38WithinCircle100kmRequest                 779µs ± 4%             779µs ± 3%    ~     (p=0.954 n=25+25)
Tile38IntersectsCircle100kmRequest            1.02ms ± 3%            1.01ms ± 4%    ~     (p=0.658 n=25+25)
Tile38KNearestLimit100Request                  984µs ± 4%             986µs ± 4%    ~     (p=0.627 n=24+25)
[Geo mean]                                     552ms                  551ms       -0.19%

https://perf.golang.org/search?q=upload:20200723.6

Change-Id: Ic703f26a83fb034941dc6f4788fc997d56890dec
Reviewed-on: https://go-review.googlesource.com/c/go/+/244539
Run-TryBot: Austin Clements <[email protected]>
TryBot-Result: Gobot Gobot <[email protected]>
Reviewed-by: Michael Knyszek <[email protected]>
Reviewed-by: Martin Möhrmann <[email protected]>
  • Loading branch information
aclements committed Aug 17, 2020
1 parent 7148abc commit d19fedd
Show file tree
Hide file tree
Showing 9 changed files with 148 additions and 231 deletions.
5 changes: 1 addition & 4 deletions src/reflect/all_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6467,12 +6467,9 @@ func verifyGCBitsSlice(t *testing.T, typ Type, cap int, bits []byte) {
// Repeat the bitmap for the slice size, trimming scalars in
// the last element.
bits = rep(cap, bits)
for len(bits) > 2 && bits[len(bits)-1] == 0 {
for len(bits) > 0 && bits[len(bits)-1] == 0 {
bits = bits[:len(bits)-1]
}
if len(bits) == 2 && bits[0] == 0 && bits[1] == 0 {
bits = bits[:0]
}
if !bytes.Equal(heapBits, bits) {
t.Errorf("heapBits incorrect for make(%v, 0, %v)\nhave %v\nwant %v", typ, cap, heapBits, bits)
}
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/cgocall.go
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,7 @@ func cgoCheckUnknownPointer(p unsafe.Pointer, msg string) (base, i uintptr) {
hbits := heapBitsForAddr(base)
n := span.elemsize
for i = uintptr(0); i < n; i += sys.PtrSize {
if i != 1*sys.PtrSize && !hbits.morePointers() {
if !hbits.morePointers() {
// No more possible pointers.
break
}
Expand Down
19 changes: 2 additions & 17 deletions src/runtime/gcinfo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ func TestGCInfo(t *testing.T) {
}

for i := 0; i < 10; i++ {
verifyGCInfo(t, "heap Ptr", escape(new(Ptr)), trimDead(padDead(infoPtr)))
verifyGCInfo(t, "heap Ptr", escape(new(Ptr)), trimDead(infoPtr))
verifyGCInfo(t, "heap PtrSlice", escape(&make([]*byte, 10)[0]), trimDead(infoPtr10))
verifyGCInfo(t, "heap ScalarPtr", escape(new(ScalarPtr)), trimDead(infoScalarPtr))
verifyGCInfo(t, "heap ScalarPtrSlice", escape(&make([]ScalarPtr, 4)[0]), trimDead(infoScalarPtr4))
Expand All @@ -97,25 +97,10 @@ func verifyGCInfo(t *testing.T, name string, p interface{}, mask0 []byte) {
}
}

func padDead(mask []byte) []byte {
// Because the dead bit isn't encoded in the second word,
// and because on 32-bit systems a one-word allocation
// uses a two-word block, the pointer info for a one-word
// object needs to be expanded to include an extra scalar
// on 32-bit systems to match the heap bitmap.
if runtime.PtrSize == 4 && len(mask) == 1 {
return []byte{mask[0], 0}
}
return mask
}

func trimDead(mask []byte) []byte {
for len(mask) > 2 && mask[len(mask)-1] == typeScalar {
for len(mask) > 0 && mask[len(mask)-1] == typeScalar {
mask = mask[:len(mask)-1]
}
if len(mask) == 2 && mask[0] == typeScalar && mask[1] == typeScalar {
mask = mask[:0]
}
return mask
}

Expand Down
2 changes: 1 addition & 1 deletion src/runtime/heapdump.go
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,7 @@ func makeheapobjbv(p uintptr, size uintptr) bitvector {
i := uintptr(0)
hbits := heapBitsForAddr(p)
for ; i < nptr; i++ {
if i != 1 && !hbits.morePointers() {
if !hbits.morePointers() {
break // end of object
}
if hbits.isPointer() {
Expand Down
173 changes: 34 additions & 139 deletions src/runtime/mbitmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
//
// Stack, data, and bss bitmaps
//
// Stack frames and global variables in the data and bss sections are described
// by 1-bit bitmaps in which 0 means uninteresting and 1 means live pointer
// to be visited during GC. The bits in each byte are consumed starting with
// the low bit: 1<<0, 1<<1, and so on.
// Stack frames and global variables in the data and bss sections are
// described by bitmaps with 1 bit per pointer-sized word. A "1" bit
// means the word is a live pointer to be visited by the GC (referred to
// as "pointer"). A "0" bit means the word should be ignored by GC
// (referred to as "scalar", though it could be a dead pointer value).
//
// Heap bitmap
//
Expand All @@ -20,57 +21,28 @@
// through start+3*ptrSize, ha.bitmap[1] holds the entries for
// start+4*ptrSize through start+7*ptrSize, and so on.
//
// In each 2-bit entry, the lower bit holds the same information as in the 1-bit
// bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC.
// The meaning of the high bit depends on the position of the word being described
// in its allocated object. In all words *except* the second word, the
// high bit indicates that the object is still being described. In
// these words, if a bit pair with a high bit 0 is encountered, the
// low bit can also be assumed to be 0, and the object description is
// over. This 00 is called the ``dead'' encoding: it signals that the
// rest of the words in the object are uninteresting to the garbage
// collector.
//
// In the second word, the high bit is the GC ``checkmarked'' bit (see below).
// In each 2-bit entry, the lower bit is a pointer/scalar bit, just
// like in the stack/data bitmaps described above. The upper bit
// indicates scan/dead: a "1" value ("scan") indicates that there may
// be pointers in later words of the allocation, and a "0" value
// ("dead") indicates there are no more pointers in the allocation. If
// the upper bit is 0, the lower bit must also be 0, and this
// indicates scanning can ignore the rest of the allocation.
//
// The 2-bit entries are split when written into the byte, so that the top half
// of the byte contains 4 high bits and the bottom half contains 4 low (pointer)
// bits.
// This form allows a copy from the 1-bit to the 4-bit form to keep the
// pointer bits contiguous, instead of having to space them out.
//
// The code makes use of the fact that the zero value for a heap bitmap
// has no live pointer bit set and is (depending on position), not used,
// not checkmarked, and is the dead encoding.
// These properties must be preserved when modifying the encoding.
// The code makes use of the fact that the zero value for a heap
// bitmap means scalar/dead. This property must be preserved when
// modifying the encoding.
//
// The bitmap for noscan spans is not maintained. Code must ensure
// that an object is scannable before consulting its bitmap by
// checking either the noscan bit in the span or by consulting its
// type's information.
//
// Checkmarks
//
// In a concurrent garbage collector, one worries about failing to mark
// a live object due to mutations without write barriers or bugs in the
// collector implementation. As a sanity check, the GC has a 'checkmark'
// mode that retraverses the object graph with the world stopped, to make
// sure that everything that should be marked is marked.
// In checkmark mode, in the heap bitmap, the high bit of the 2-bit entry
// for the second word of the object holds the checkmark bit.
// When not in checkmark mode, this bit is set to 1.
//
// The smallest possible allocation is 8 bytes. On a 32-bit machine, that
// means every allocated object has two words, so there is room for the
// checkmark bit. On a 64-bit machine, however, the 8-byte allocation is
// just one word, so the second bit pair is not available for encoding the
// checkmark. However, because non-pointer allocations are combined
// into larger 16-byte (maxTinySize) allocations, a plain 8-byte allocation
// must be a pointer, so the type bit in the first word is not actually needed.
// It is still used in general, except in checkmark the type bit is repurposed
// as the checkmark bit and then reinitialized (to 1) as the type bit when
// finished.
//

package runtime

Expand Down Expand Up @@ -551,33 +523,6 @@ func (h heapBits) isPointer() bool {
return h.bits()&bitPointer != 0
}

// isCheckmarked reports whether the heap bits have the checkmarked bit set.
// It must be told how large the object at h is, because the encoding of the
// checkmark bit varies by size.
// h must describe the initial word of the object.
func (h heapBits) isCheckmarked(size uintptr) bool {
if size == sys.PtrSize {
return (*h.bitp>>h.shift)&bitPointer != 0
}
// All multiword objects are 2-word aligned,
// so we know that the initial word's 2-bit pair
// and the second word's 2-bit pair are in the
// same heap bitmap byte, *h.bitp.
return (*h.bitp>>(heapBitsShift+h.shift))&bitScan != 0
}

// setCheckmarked sets the checkmarked bit.
// It must be told how large the object at h is, because the encoding of the
// checkmark bit varies by size.
// h must describe the initial word of the object.
func (h heapBits) setCheckmarked(size uintptr) {
if size == sys.PtrSize {
atomic.Or8(h.bitp, bitPointer<<h.shift)
return
}
atomic.Or8(h.bitp, bitScan<<(heapBitsShift+h.shift))
}

// bulkBarrierPreWrite executes a write barrier
// for every pointer slot in the memory range [src, src+size),
// using pointer/scalar information from [dst, dst+size).
Expand Down Expand Up @@ -795,7 +740,6 @@ func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) {
// TODO(rsc): Perhaps introduce a different heapBitsSpan type.

// initSpan initializes the heap bitmap for a span.
// It clears all checkmark bits.
// If this is a span of pointer-sized objects, it initializes all
// words to pointer/scan.
// Otherwise, it initializes all words to scalar/dead.
Expand Down Expand Up @@ -826,45 +770,6 @@ func (h heapBits) initSpan(s *mspan) {
}
}

// initCheckmarkSpan initializes a span for being checkmarked.
// It clears the checkmark bits, which are set to 1 in normal operation.
func (h heapBits) initCheckmarkSpan(size, n, total uintptr) {
// The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely.
if sys.PtrSize == 8 && size == sys.PtrSize {
// Checkmark bit is type bit, bottom bit of every 2-bit entry.
// Only possible on 64-bit system, since minimum size is 8.
// Must clear type bit (checkmark bit) of every word.
// The type bit is the lower of every two-bit pair.
for i := uintptr(0); i < n; i += wordsPerBitmapByte {
*h.bitp &^= bitPointerAll
h = h.forward(wordsPerBitmapByte)
}
return
}
for i := uintptr(0); i < n; i++ {
*h.bitp &^= bitScan << (heapBitsShift + h.shift)
h = h.forward(size / sys.PtrSize)
}
}

// clearCheckmarkSpan undoes all the checkmarking in a span.
// The actual checkmark bits are ignored, so the only work to do
// is to fix the pointer bits. (Pointer bits are ignored by scanobject
// but consulted by typedmemmove.)
func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
// The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely.
if sys.PtrSize == 8 && size == sys.PtrSize {
// Checkmark bit is type bit, bottom bit of every 2-bit entry.
// Only possible on 64-bit system, since minimum size is 8.
// Must clear type bit (checkmark bit) of every word.
// The type bit is the lower of every two-bit pair.
for i := uintptr(0); i < n; i += wordsPerBitmapByte {
*h.bitp |= bitPointerAll
h = h.forward(wordsPerBitmapByte)
}
}
}

// countAlloc returns the number of objects allocated in span s by
// scanning the allocation bitmap.
func (s *mspan) countAlloc() int {
Expand Down Expand Up @@ -957,11 +862,11 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
if sys.PtrSize == 4 && dataSize == sys.PtrSize {
// 1 pointer object. On 32-bit machines clear the bit for the
// unused second word.
*h.bitp &^= (bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << h.shift
*h.bitp &^= (bitPointer | bitScan | (bitPointer|bitScan)<<heapBitsShift) << h.shift
*h.bitp |= (bitPointer | bitScan) << h.shift
} else {
// 2-element slice of pointer.
*h.bitp |= (bitPointer | bitScan | bitPointer<<heapBitsShift) << h.shift
*h.bitp |= (bitPointer | bitScan | (bitPointer|bitScan)<<heapBitsShift) << h.shift
}
return
}
Expand All @@ -974,11 +879,10 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
}
}
b := uint32(*ptrmask)
hb := (b & 3) | bitScan
// bitPointer == 1, bitScan is 1 << 4, heapBitsShift is 1.
// 110011 is shifted h.shift and complemented.
// This clears out the bits that are about to be
// ored into *h.hbitp in the next instructions.
hb := b & 3
hb |= bitScanAll & ((bitScan << (typ.ptrdata / sys.PtrSize)) - 1)
// Clear the bits for this object so we can set the
// appropriate ones.
*h.bitp &^= (bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << h.shift
*h.bitp |= uint8(hb << h.shift)
return
Expand Down Expand Up @@ -1155,11 +1059,6 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
throw("heapBitsSetType: called with non-pointer type")
return
}
if nw < 2 {
// Must write at least 2 words, because the "no scan"
// encoding doesn't take effect until the third word.
nw = 2
}

// Phase 1: Special case for leading byte (shift==0) or half-byte (shift==2).
// The leading byte is special because it contains the bits for word 1,
Expand All @@ -1172,21 +1071,22 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {

case h.shift == 0:
// Ptrmask and heap bitmap are aligned.
// Handle first byte of bitmap specially.
//
// This is a fast path for small objects.
//
// The first byte we write out covers the first four
// words of the object. The scan/dead bit on the first
// word must be set to scan since there are pointers
// somewhere in the object. The scan/dead bit on the
// second word is the checkmark, so we don't set it.
// somewhere in the object.
// In all following words, we set the scan/dead
// appropriately to indicate that the object contains
// to the next 2-bit entry in the bitmap.
//
// TODO: It doesn't matter if we set the checkmark, so
// maybe this case isn't needed any more.
// We set four bits at a time here, but if the object
// is fewer than four words, phase 3 will clear
// unnecessary bits.
hb = b & bitPointerAll
hb |= bitScan | bitScan<<(2*heapBitsShift) | bitScan<<(3*heapBitsShift)
hb |= bitScanAll
if w += 4; w >= nw {
goto Phase3
}
Expand All @@ -1203,14 +1103,13 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
// We took care of 1-word and 2-word objects above,
// so this is at least a 6-word object.
hb = (b & (bitPointer | bitPointer<<heapBitsShift)) << (2 * heapBitsShift)
// This is not noscan, so set the scan bit in the
// first word.
hb |= bitScan << (2 * heapBitsShift)
if nw > 1 {
hb |= bitScan << (3 * heapBitsShift)
}
b >>= 2
nb -= 2
// Note: no bitScan for second word because that's
// the checkmark.
*hbitp &^= uint8((bitPointer | bitScan | (bitPointer << heapBitsShift)) << (2 * heapBitsShift))
*hbitp &^= uint8((bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << (2 * heapBitsShift))
*hbitp |= uint8(hb)
hbitp = add1(hbitp)
if w += 2; w >= nw {
Expand Down Expand Up @@ -1449,11 +1348,7 @@ Phase4:
if j < nptr && (*addb(ptrmask, j/8)>>(j%8))&1 != 0 {
want |= bitPointer
}
if i != 1 {
want |= bitScan
} else {
have &^= bitScan
}
want |= bitScan
}
if have != want {
println("mismatch writing bits for", typ.string(), "x", dataSize/typ.size)
Expand Down Expand Up @@ -2013,7 +1908,7 @@ func getgcmask(ep interface{}) (mask []byte) {
if hbits.isPointer() {
mask[i/sys.PtrSize] = 1
}
if i != 1*sys.PtrSize && !hbits.morePointers() {
if !hbits.morePointers() {
mask = mask[:i/sys.PtrSize]
break
}
Expand Down
Loading

0 comments on commit d19fedd

Please sign in to comment.