Skip to content

Commit

Permalink
runtime: process ptr bitmaps one word at a time
Browse files Browse the repository at this point in the history
[This is a retry of CL 407036 + its revert CL 422394. The only
content change is the 1-line change in cmd/internal/obj/objfile.go.]

Read the bitmaps one uintptr at a time instead of one byte at a time.

Performance so far:
 Allocation heavy, no retention: ~30% faster in heapBitsSetType
 Scan heavy, ~no allocation: ~even in scanobject

Change-Id: I04d899e1dbd23e989e9f552cdc1880318779c14c
Reviewed-on: https://go-review.googlesource.com/c/go/+/422635
TryBot-Result: Gopher Robot <[email protected]>
Reviewed-by: Keith Randall <[email protected]>
Run-TryBot: Keith Randall <[email protected]>
Reviewed-by: Michael Knyszek <[email protected]>
  • Loading branch information
randall77 committed Aug 16, 2022
1 parent 6a9c674 commit e49e876
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 18 deletions.
6 changes: 5 additions & 1 deletion src/cmd/compile/internal/reflectdata/reflect.go
Original file line number Diff line number Diff line change
Expand Up @@ -1548,7 +1548,11 @@ func dgcsym(t *types.Type, write bool) (lsym *obj.LSym, useGCProg bool, ptrdata

// dgcptrmask emits and returns the symbol containing a pointer mask for type t.
func dgcptrmask(t *types.Type, write bool) *obj.LSym {
ptrmask := make([]byte, (types.PtrDataSize(t)/int64(types.PtrSize)+7)/8)
// Bytes we need for the ptrmask.
n := (types.PtrDataSize(t)/int64(types.PtrSize) + 7) / 8
// Runtime wants ptrmasks padded to a multiple of uintptr in size.
n = (n + int64(types.PtrSize) - 1) &^ (int64(types.PtrSize) - 1)
ptrmask := make([]byte, n)
fillptrmask(t, ptrmask)
p := fmt.Sprintf("runtime.gcbits.%x", ptrmask)

Expand Down
1 change: 0 additions & 1 deletion src/cmd/internal/obj/objfile.go
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,6 @@ func (w *writer) Sym(s *LSym) {
case strings.HasPrefix(s.Name, "go:string."),
strings.HasPrefix(name, "type:.namedata."),
strings.HasPrefix(name, "type:.importpath."),
strings.HasPrefix(name, "runtime.gcbits."),
strings.HasSuffix(name, ".opendefer"),
strings.HasSuffix(name, ".arginfo0"),
strings.HasSuffix(name, ".arginfo1"),
Expand Down
19 changes: 15 additions & 4 deletions src/reflect/type.go
Original file line number Diff line number Diff line change
Expand Up @@ -2271,7 +2271,10 @@ func bucketOf(ktyp, etyp *rtype) *rtype {

if ktyp.ptrdata != 0 || etyp.ptrdata != 0 {
nptr := (bucketSize*(1+ktyp.size+etyp.size) + goarch.PtrSize) / goarch.PtrSize
mask := make([]byte, (nptr+7)/8)
n := (nptr + 7) / 8
// Runtime needs pointer masks to be a multiple of uintptr in size.
n = (n + goarch.PtrSize - 1) &^ (goarch.PtrSize - 1)
mask := make([]byte, n)
base := bucketSize / goarch.PtrSize

if ktyp.ptrdata != 0 {
Expand Down Expand Up @@ -2977,7 +2980,10 @@ func ArrayOf(length int, elem Type) Type {
// Element is small with pointer mask; array is still small.
// Create direct pointer mask by turning each 1 bit in elem
// into length 1 bits in larger mask.
mask := make([]byte, (array.ptrdata/goarch.PtrSize+7)/8)
n := (array.ptrdata/goarch.PtrSize + 7) / 8
// Runtime needs pointer masks to be a multiple of uintptr in size.
n = (n + goarch.PtrSize - 1) &^ (goarch.PtrSize - 1)
mask := make([]byte, n)
emitGCMask(mask, 0, typ, array.len)
array.gcdata = &mask[0]

Expand Down Expand Up @@ -3146,8 +3152,13 @@ type bitVector struct {

// append a bit to the bitmap.
func (bv *bitVector) append(bit uint8) {
if bv.n%8 == 0 {
bv.data = append(bv.data, 0)
if bv.n%(8*goarch.PtrSize) == 0 {
// Runtime needs pointer masks to be a multiple of uintptr in size.
// Since reflect passes bv.data directly to the runtime as a pointer mask,
// we append a full uintptr of zeros at a time.
for i := 0; i < goarch.PtrSize; i++ {
bv.data = append(bv.data, 0)
}
}
bv.data[bv.n/8] |= bit << (bv.n % 8)
bv.n++
Expand Down
83 changes: 71 additions & 12 deletions src/runtime/mbitmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -893,6 +893,19 @@ func (h writeHeapBits) flush(addr, size uintptr) {
}
}

// Read the bytes starting at the aligned pointer p into a uintptr.
// Read is little-endian.
func readUintptr(p *byte) uintptr {
x := *(*uintptr)(unsafe.Pointer(p))
if goarch.BigEndian {
if goarch.PtrSize == 8 {
return uintptr(sys.Bswap64(uint64(x)))
}
return uintptr(sys.Bswap32(uint32(x)))
}
return x
}

// heapBitsSetType records that the new allocation [x, x+size)
// holds in [x, x+dataSize) one or more values of type typ.
// (The number of values is given by dataSize / typ.size.)
Expand All @@ -917,7 +930,7 @@ func (h writeHeapBits) flush(addr, size uintptr) {
// machines, callers must execute a store/store (publication) barrier
// between calling this function and making the object reachable.
func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
const doubleCheck = true // slow but helpful; enable to test modifications to this code
const doubleCheck = false // slow but helpful; enable to test modifications to this code

if doubleCheck && dataSize%typ.size != 0 {
throw("heapBitsSetType: dataSize not a multiple of typ.size")
Expand Down Expand Up @@ -995,19 +1008,65 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
// objects with scalar tails, all but the last tail does have to
// be initialized, because there is no way to say "skip forward".

for i := uintptr(0); true; i += typ.size {
p := typ.gcdata
var j uintptr
for j = 0; j+8*goarch.PtrSize < typ.ptrdata; j += 8 * goarch.PtrSize {
h = h.write(uintptr(*p), 8)
p = add1(p)
ptrs := typ.ptrdata / goarch.PtrSize
if typ.size == dataSize { // Single element
if ptrs <= ptrBits { // Single small element
m := readUintptr(typ.gcdata)
h = h.write(m, ptrs)
} else { // Single large element
p := typ.gcdata
for {
h = h.write(readUintptr(p), ptrBits)
p = addb(p, ptrBits/8)
ptrs -= ptrBits
if ptrs <= ptrBits {
break
}
}
m := readUintptr(p)
h = h.write(m, ptrs)
}
h = h.write(uintptr(*p), (typ.ptrdata-j)/goarch.PtrSize)
if i+typ.size == dataSize {
break // don't need the trailing nonptr bits on the last element.
} else { // Repeated element
words := typ.size / goarch.PtrSize // total words, including scalar tail
if words <= ptrBits { // Repeated small element
n := dataSize / typ.size
m := readUintptr(typ.gcdata)
// Make larger unit to repeat
for words <= ptrBits/2 {
if n&1 != 0 {
h = h.write(m, words)
}
n /= 2
m |= m << words
ptrs += words
words *= 2
if n == 1 {
break
}
}
for n > 1 {
h = h.write(m, words)
n--
}
h = h.write(m, ptrs)
} else { // Repeated large element
for i := uintptr(0); true; i += typ.size {
p := typ.gcdata
j := ptrs
for j > ptrBits {
h = h.write(readUintptr(p), ptrBits)
p = addb(p, ptrBits/8)
j -= ptrBits
}
m := readUintptr(p)
h = h.write(m, j)
if i+typ.size == dataSize {
break // don't need the trailing nonptr bits on the last element.
}
// Pad with zeros to the start of the next element.
h = h.pad(typ.size - typ.ptrdata)
}
}
// Pad with zeros to the start of the next element.
h = h.pad(typ.size - typ.ptrdata)
}
h.flush(x, size)

Expand Down

0 comments on commit e49e876

Please sign in to comment.