From d19fedd180fceb6a60961e19387893ddb047e4e6 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 5 Jun 2020 16:48:03 -0400 Subject: [PATCH] runtime: move checkmarks to a separate bitmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the GC stores the object marks for checkmarks mode in the heap bitmap using a rather complex encoding: for one word objects, the checkmark is stored in the pointer/scalar bit since one word objects must be pointers; for larger objects, the checkmark is stored in what would be the scan/dead bit for the second word of the object. This encoding made more sense when the runtime used the first scan/dead bit as the regular mark bit, but we moved away from that long ago. This encoding and overloading of the heap bitmap bits causes a great deal of complexity in many parts of the allocator and garbage collector and leads to some subtle bugs like #15903. This CL moves the checkmarks mark bits into their own per-arena bitmap and reclaims the second scan/dead bit as a regular scan/dead bit. I tested this by enabling doubleCheck mode in heapBitsSetType and running in both regular and GODEBUG=gccheckmark=1 mode. Fixes #15903. No performance degradation. (Very slight improvement on a few benchmarks, but it's probably just noise.) name old time/op new time/op delta BiogoIgor 16.6s ± 1% 16.4s ± 1% -0.94% (p=0.000 n=25+24) BiogoKrishna 19.2s ± 3% 19.2s ± 3% ~ (p=0.638 n=23+25) BleveIndexBatch100 6.12s ± 5% 6.17s ± 4% ~ (p=0.170 n=25+25) CompileTemplate 206ms ± 1% 205ms ± 1% -0.43% (p=0.005 n=24+24) CompileUnicode 82.2ms ± 2% 81.5ms ± 2% -0.95% (p=0.001 n=22+22) CompileGoTypes 755ms ± 3% 754ms ± 4% ~ (p=0.715 n=25+25) CompileCompiler 3.73s ± 1% 3.73s ± 1% ~ (p=0.445 n=25+24) CompileSSA 8.67s ± 1% 8.66s ± 1% ~ (p=0.836 n=24+22) CompileFlate 134ms ± 2% 133ms ± 1% -0.66% (p=0.001 n=24+23) CompileGoParser 164ms ± 1% 163ms ± 1% -0.85% (p=0.000 n=24+24) CompileReflect 466ms ± 5% 466ms ± 3% ~ (p=0.863 n=25+25) CompileTar 182ms ± 1% 182ms ± 1% -0.31% (p=0.048 n=24+24) CompileXML 249ms ± 1% 248ms ± 1% -0.32% (p=0.031 n=21+25) CompileStdCmd 10.3s ± 1% 10.3s ± 1% ~ (p=0.459 n=23+23) FoglemanFauxGLRenderRotateBoat 8.66s ± 1% 8.62s ± 1% -0.47% (p=0.000 n=23+24) FoglemanPathTraceRenderGopherIter1 20.3s ± 3% 20.2s ± 2% ~ (p=0.893 n=25+25) GopherLuaKNucleotide 29.7s ± 1% 29.8s ± 2% ~ (p=0.421 n=24+25) MarkdownRenderXHTML 246ms ± 1% 247ms ± 1% ~ (p=0.558 n=25+24) Tile38WithinCircle100kmRequest 779µs ± 4% 779µs ± 3% ~ (p=0.954 n=25+25) Tile38IntersectsCircle100kmRequest 1.02ms ± 3% 1.01ms ± 4% ~ (p=0.658 n=25+25) Tile38KNearestLimit100Request 984µs ± 4% 986µs ± 4% ~ (p=0.627 n=24+25) [Geo mean] 552ms 551ms -0.19% https://perf.golang.org/search?q=upload:20200723.6 Change-Id: Ic703f26a83fb034941dc6f4788fc997d56890dec Reviewed-on: https://go-review.googlesource.com/c/go/+/244539 Run-TryBot: Austin Clements TryBot-Result: Gobot Gobot Reviewed-by: Michael Knyszek Reviewed-by: Martin Möhrmann --- src/reflect/all_test.go | 5 +- src/runtime/cgocall.go | 2 +- src/runtime/gcinfo_test.go | 19 +--- src/runtime/heapdump.go | 2 +- src/runtime/mbitmap.go | 173 ++++++++----------------------------- src/runtime/mcheckmark.go | 100 +++++++++++++++++++++ src/runtime/mgc.go | 4 +- src/runtime/mgcmark.go | 70 +-------------- src/runtime/mheap.go | 4 + 9 files changed, 148 insertions(+), 231 deletions(-) create mode 100644 src/runtime/mcheckmark.go diff --git a/src/reflect/all_test.go b/src/reflect/all_test.go index 6b31568bb9e9cd..ed2f2250779134 100644 --- a/src/reflect/all_test.go +++ b/src/reflect/all_test.go @@ -6467,12 +6467,9 @@ func verifyGCBitsSlice(t *testing.T, typ Type, cap int, bits []byte) { // Repeat the bitmap for the slice size, trimming scalars in // the last element. bits = rep(cap, bits) - for len(bits) > 2 && bits[len(bits)-1] == 0 { + for len(bits) > 0 && bits[len(bits)-1] == 0 { bits = bits[:len(bits)-1] } - if len(bits) == 2 && bits[0] == 0 && bits[1] == 0 { - bits = bits[:0] - } if !bytes.Equal(heapBits, bits) { t.Errorf("heapBits incorrect for make(%v, 0, %v)\nhave %v\nwant %v", typ, cap, heapBits, bits) } diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go index a4e64b00ccd22b..099aa540e0fd25 100644 --- a/src/runtime/cgocall.go +++ b/src/runtime/cgocall.go @@ -605,7 +605,7 @@ func cgoCheckUnknownPointer(p unsafe.Pointer, msg string) (base, i uintptr) { hbits := heapBitsForAddr(base) n := span.elemsize for i = uintptr(0); i < n; i += sys.PtrSize { - if i != 1*sys.PtrSize && !hbits.morePointers() { + if !hbits.morePointers() { // No more possible pointers. break } diff --git a/src/runtime/gcinfo_test.go b/src/runtime/gcinfo_test.go index ec1ba90c2e614a..0808b416f0a104 100644 --- a/src/runtime/gcinfo_test.go +++ b/src/runtime/gcinfo_test.go @@ -77,7 +77,7 @@ func TestGCInfo(t *testing.T) { } for i := 0; i < 10; i++ { - verifyGCInfo(t, "heap Ptr", escape(new(Ptr)), trimDead(padDead(infoPtr))) + verifyGCInfo(t, "heap Ptr", escape(new(Ptr)), trimDead(infoPtr)) verifyGCInfo(t, "heap PtrSlice", escape(&make([]*byte, 10)[0]), trimDead(infoPtr10)) verifyGCInfo(t, "heap ScalarPtr", escape(new(ScalarPtr)), trimDead(infoScalarPtr)) verifyGCInfo(t, "heap ScalarPtrSlice", escape(&make([]ScalarPtr, 4)[0]), trimDead(infoScalarPtr4)) @@ -97,25 +97,10 @@ func verifyGCInfo(t *testing.T, name string, p interface{}, mask0 []byte) { } } -func padDead(mask []byte) []byte { - // Because the dead bit isn't encoded in the second word, - // and because on 32-bit systems a one-word allocation - // uses a two-word block, the pointer info for a one-word - // object needs to be expanded to include an extra scalar - // on 32-bit systems to match the heap bitmap. - if runtime.PtrSize == 4 && len(mask) == 1 { - return []byte{mask[0], 0} - } - return mask -} - func trimDead(mask []byte) []byte { - for len(mask) > 2 && mask[len(mask)-1] == typeScalar { + for len(mask) > 0 && mask[len(mask)-1] == typeScalar { mask = mask[:len(mask)-1] } - if len(mask) == 2 && mask[0] == typeScalar && mask[1] == typeScalar { - mask = mask[:0] - } return mask } diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go index cfd5c251b40a3b..4c3530921139e0 100644 --- a/src/runtime/heapdump.go +++ b/src/runtime/heapdump.go @@ -713,7 +713,7 @@ func makeheapobjbv(p uintptr, size uintptr) bitvector { i := uintptr(0) hbits := heapBitsForAddr(p) for ; i < nptr; i++ { - if i != 1 && !hbits.morePointers() { + if !hbits.morePointers() { break // end of object } if hbits.isPointer() { diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index cad6f564045d0f..8de44c14b9968a 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -6,10 +6,11 @@ // // Stack, data, and bss bitmaps // -// Stack frames and global variables in the data and bss sections are described -// by 1-bit bitmaps in which 0 means uninteresting and 1 means live pointer -// to be visited during GC. The bits in each byte are consumed starting with -// the low bit: 1<<0, 1<<1, and so on. +// Stack frames and global variables in the data and bss sections are +// described by bitmaps with 1 bit per pointer-sized word. A "1" bit +// means the word is a live pointer to be visited by the GC (referred to +// as "pointer"). A "0" bit means the word should be ignored by GC +// (referred to as "scalar", though it could be a dead pointer value). // // Heap bitmap // @@ -20,18 +21,13 @@ // through start+3*ptrSize, ha.bitmap[1] holds the entries for // start+4*ptrSize through start+7*ptrSize, and so on. // -// In each 2-bit entry, the lower bit holds the same information as in the 1-bit -// bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC. -// The meaning of the high bit depends on the position of the word being described -// in its allocated object. In all words *except* the second word, the -// high bit indicates that the object is still being described. In -// these words, if a bit pair with a high bit 0 is encountered, the -// low bit can also be assumed to be 0, and the object description is -// over. This 00 is called the ``dead'' encoding: it signals that the -// rest of the words in the object are uninteresting to the garbage -// collector. -// -// In the second word, the high bit is the GC ``checkmarked'' bit (see below). +// In each 2-bit entry, the lower bit is a pointer/scalar bit, just +// like in the stack/data bitmaps described above. The upper bit +// indicates scan/dead: a "1" value ("scan") indicates that there may +// be pointers in later words of the allocation, and a "0" value +// ("dead") indicates there are no more pointers in the allocation. If +// the upper bit is 0, the lower bit must also be 0, and this +// indicates scanning can ignore the rest of the allocation. // // The 2-bit entries are split when written into the byte, so that the top half // of the byte contains 4 high bits and the bottom half contains 4 low (pointer) @@ -39,38 +35,14 @@ // This form allows a copy from the 1-bit to the 4-bit form to keep the // pointer bits contiguous, instead of having to space them out. // -// The code makes use of the fact that the zero value for a heap bitmap -// has no live pointer bit set and is (depending on position), not used, -// not checkmarked, and is the dead encoding. -// These properties must be preserved when modifying the encoding. +// The code makes use of the fact that the zero value for a heap +// bitmap means scalar/dead. This property must be preserved when +// modifying the encoding. // // The bitmap for noscan spans is not maintained. Code must ensure // that an object is scannable before consulting its bitmap by // checking either the noscan bit in the span or by consulting its // type's information. -// -// Checkmarks -// -// In a concurrent garbage collector, one worries about failing to mark -// a live object due to mutations without write barriers or bugs in the -// collector implementation. As a sanity check, the GC has a 'checkmark' -// mode that retraverses the object graph with the world stopped, to make -// sure that everything that should be marked is marked. -// In checkmark mode, in the heap bitmap, the high bit of the 2-bit entry -// for the second word of the object holds the checkmark bit. -// When not in checkmark mode, this bit is set to 1. -// -// The smallest possible allocation is 8 bytes. On a 32-bit machine, that -// means every allocated object has two words, so there is room for the -// checkmark bit. On a 64-bit machine, however, the 8-byte allocation is -// just one word, so the second bit pair is not available for encoding the -// checkmark. However, because non-pointer allocations are combined -// into larger 16-byte (maxTinySize) allocations, a plain 8-byte allocation -// must be a pointer, so the type bit in the first word is not actually needed. -// It is still used in general, except in checkmark the type bit is repurposed -// as the checkmark bit and then reinitialized (to 1) as the type bit when -// finished. -// package runtime @@ -551,33 +523,6 @@ func (h heapBits) isPointer() bool { return h.bits()&bitPointer != 0 } -// isCheckmarked reports whether the heap bits have the checkmarked bit set. -// It must be told how large the object at h is, because the encoding of the -// checkmark bit varies by size. -// h must describe the initial word of the object. -func (h heapBits) isCheckmarked(size uintptr) bool { - if size == sys.PtrSize { - return (*h.bitp>>h.shift)&bitPointer != 0 - } - // All multiword objects are 2-word aligned, - // so we know that the initial word's 2-bit pair - // and the second word's 2-bit pair are in the - // same heap bitmap byte, *h.bitp. - return (*h.bitp>>(heapBitsShift+h.shift))&bitScan != 0 -} - -// setCheckmarked sets the checkmarked bit. -// It must be told how large the object at h is, because the encoding of the -// checkmark bit varies by size. -// h must describe the initial word of the object. -func (h heapBits) setCheckmarked(size uintptr) { - if size == sys.PtrSize { - atomic.Or8(h.bitp, bitPointer<= nw { goto Phase3 } @@ -1203,14 +1103,13 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { // We took care of 1-word and 2-word objects above, // so this is at least a 6-word object. hb = (b & (bitPointer | bitPointer< 1 { + hb |= bitScan << (3 * heapBitsShift) + } b >>= 2 nb -= 2 - // Note: no bitScan for second word because that's - // the checkmark. - *hbitp &^= uint8((bitPointer | bitScan | (bitPointer << heapBitsShift)) << (2 * heapBitsShift)) + *hbitp &^= uint8((bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << (2 * heapBitsShift)) *hbitp |= uint8(hb) hbitp = add1(hbitp) if w += 2; w >= nw { @@ -1449,11 +1348,7 @@ Phase4: if j < nptr && (*addb(ptrmask, j/8)>>(j%8))&1 != 0 { want |= bitPointer } - if i != 1 { - want |= bitScan - } else { - have &^= bitScan - } + want |= bitScan } if have != want { println("mismatch writing bits for", typ.string(), "x", dataSize/typ.size) @@ -2013,7 +1908,7 @@ func getgcmask(ep interface{}) (mask []byte) { if hbits.isPointer() { mask[i/sys.PtrSize] = 1 } - if i != 1*sys.PtrSize && !hbits.morePointers() { + if !hbits.morePointers() { mask = mask[:i/sys.PtrSize] break } diff --git a/src/runtime/mcheckmark.go b/src/runtime/mcheckmark.go new file mode 100644 index 00000000000000..1fd8e4e78f5dba --- /dev/null +++ b/src/runtime/mcheckmark.go @@ -0,0 +1,100 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// GC checkmarks +// +// In a concurrent garbage collector, one worries about failing to mark +// a live object due to mutations without write barriers or bugs in the +// collector implementation. As a sanity check, the GC has a 'checkmark' +// mode that retraverses the object graph with the world stopped, to make +// sure that everything that should be marked is marked. + +package runtime + +import ( + "runtime/internal/atomic" + "runtime/internal/sys" + "unsafe" +) + +// A checkmarksMap stores the GC marks in "checkmarks" mode. It is a +// per-arena bitmap with a bit for every word in the arena. The mark +// is stored on the bit corresponding to the first word of the marked +// allocation. +// +//go:notinheap +type checkmarksMap [heapArenaBytes / sys.PtrSize / 8]uint8 + +// If useCheckmark is true, marking of an object uses the checkmark +// bits instead of the standard mark bits. +var useCheckmark = false + +// startCheckmarks prepares for the checkmarks phase. +// +// The world must be stopped. +func startCheckmarks() { + // Clear all checkmarks. + for _, ai := range mheap_.allArenas { + arena := mheap_.arenas[ai.l1()][ai.l2()] + bitmap := arena.checkmarks + + if bitmap == nil { + // Allocate bitmap on first use. + bitmap = (*checkmarksMap)(persistentalloc(unsafe.Sizeof(*bitmap), 0, &memstats.gc_sys)) + if bitmap == nil { + throw("out of memory allocating checkmarks bitmap") + } + arena.checkmarks = bitmap + } else { + // Otherwise clear the existing bitmap. + for i := range bitmap { + bitmap[i] = 0 + } + } + } + // Enable checkmarking. + useCheckmark = true +} + +// endCheckmarks ends the checkmarks phase. +func endCheckmarks() { + if gcMarkWorkAvailable(nil) { + throw("GC work not flushed") + } + useCheckmark = false +} + +// setCheckmark throws if marking object is a checkmarks violation, +// and otherwise sets obj's checkmark. It returns true if obj was +// already checkmarked. +func setCheckmark(obj, base, off uintptr, mbits markBits) bool { + if !mbits.isMarked() { + printlock() + print("runtime: checkmarks found unexpected unmarked object obj=", hex(obj), "\n") + print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n") + + // Dump the source (base) object + gcDumpObject("base", base, off) + + // Dump the object + gcDumpObject("obj", obj, ^uintptr(0)) + + getg().m.traceback = 2 + throw("checkmark found unmarked object") + } + + ai := arenaIndex(obj) + arena := mheap_.arenas[ai.l1()][ai.l2()] + arenaWord := (obj / heapArenaBytes / 8) % uintptr(len(arena.checkmarks)) + mask := byte(1 << ((obj / heapArenaBytes) % 8)) + bytep := &arena.checkmarks[arenaWord] + + if atomic.Load8(bytep)&mask != 0 { + // Already checkmarked. + return true + } + + atomic.Or8(bytep, mask) + return false +} diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index b3499516f65a40..c8c4a4c7580d39 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -1670,13 +1670,13 @@ func gcMarkTermination(nextTriggerRatio float64) { // mark using checkmark bits, to check that we // didn't forget to mark anything during the // concurrent mark process. + startCheckmarks() gcResetMarkState() - initCheckmarks() gcw := &getg().m.p.ptr().gcw gcDrain(gcw, 0) wbBufFlush1(getg().m.p.ptr()) gcw.dispose() - clearCheckmarks() + endCheckmarks() } // marking is complete so we can turn the write barrier off diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index fe988c46d9c46a..96910ff72992ee 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -1354,11 +1354,7 @@ func scanobject(b uintptr, gcw *gcWork) { } // Load bits once. See CL 22712 and issue 16973 for discussion. bits := hbits.bits() - // During checkmarking, 1-word objects store the checkmark - // in the type bit for the one word. The only one-word objects - // are pointers, or else they'd be merged with other non-pointer - // data into larger allocations. - if i != 1*sys.PtrSize && bits&bitScan == 0 { + if bits&bitScan == 0 { break // no more pointers in this object } if bits&bitPointer == 0 { @@ -1511,28 +1507,10 @@ func greyobject(obj, base, off uintptr, span *mspan, gcw *gcWork, objIndex uintp mbits := span.markBitsForIndex(objIndex) if useCheckmark { - if !mbits.isMarked() { - printlock() - print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n") - print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n") - - // Dump the source (base) object - gcDumpObject("base", base, off) - - // Dump the object - gcDumpObject("obj", obj, ^uintptr(0)) - - getg().m.traceback = 2 - throw("checkmark found unmarked object") - } - hbits := heapBitsForAddr(obj) - if hbits.isCheckmarked(span.elemsize) { + if setCheckmark(obj, base, off, mbits) { + // Already marked. return } - hbits.setCheckmarked(span.elemsize) - if !hbits.isCheckmarked(span.elemsize) { - throw("setCheckmarked and isCheckmarked disagree") - } } else { if debug.gccheckmark > 0 && span.isFree(objIndex) { print("runtime: marking free object ", hex(obj), " found at *(", hex(base), "+", hex(off), ")\n") @@ -1661,45 +1639,3 @@ func gcMarkTinyAllocs() { greyobject(c.tiny, 0, 0, span, gcw, objIndex) } } - -// Checkmarking - -// To help debug the concurrent GC we remark with the world -// stopped ensuring that any object encountered has their normal -// mark bit set. To do this we use an orthogonal bit -// pattern to indicate the object is marked. The following pattern -// uses the upper two bits in the object's boundary nibble. -// 01: scalar not marked -// 10: pointer not marked -// 11: pointer marked -// 00: scalar marked -// Xoring with 01 will flip the pattern from marked to unmarked and vica versa. -// The higher bit is 1 for pointers and 0 for scalars, whether the object -// is marked or not. -// The first nibble no longer holds the typeDead pattern indicating that the -// there are no more pointers in the object. This information is held -// in the second nibble. - -// If useCheckmark is true, marking of an object uses the -// checkmark bits (encoding above) instead of the standard -// mark bits. -var useCheckmark = false - -//go:nowritebarrier -func initCheckmarks() { - useCheckmark = true - for _, s := range mheap_.allspans { - if s.state.get() == mSpanInUse { - heapBitsForAddr(s.base()).initCheckmarkSpan(s.layout()) - } - } -} - -func clearCheckmarks() { - useCheckmark = false - for _, s := range mheap_.allspans { - if s.state.get() == mSpanInUse { - heapBitsForAddr(s.base()).clearCheckmarkSpan(s.layout()) - } - } -} diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 2c7bfd8a59e879..63413751602907 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -300,6 +300,10 @@ type heapArena struct { // during marking. pageSpecials [pagesPerArena / 8]uint8 + // checkmarks stores the debug.gccheckmark state. It is only + // used if debug.gccheckmark > 0. + checkmarks *checkmarksMap + // zeroedBase marks the first byte of the first page in this // arena which hasn't been used yet and is therefore already // zero. zeroedBase is relative to the arena base.