From a7909af24e080cb18e105af79be6915531147a86 Mon Sep 17 00:00:00 2001 From: Caleb Spare Date: Thu, 19 Nov 2020 20:05:45 -0800 Subject: [PATCH] Allow Sum64String and (*Digest).WriteString to be inlined MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Benchmarks: name old time/op new time/op delta Sum64String/4B-12 4.78ns ± 1% 3.57ns ± 4% -25.27% (p=0.000 n=8+10) Sum64String/100B-12 14.5ns ± 1% 12.9ns ± 0% -10.76% (p=0.000 n=9+10) Sum64String/4KB-12 229ns ± 0% 229ns ± 1% ~ (p=0.395 n=7+10) Sum64String/10MB-12 628µs ± 1% 630µs ± 2% ~ (p=1.000 n=9+10) DigestString/4B-12 11.4ns ± 1% 9.7ns ± 1% -14.95% (p=0.000 n=10+10) DigestString/100B-12 23.6ns ± 1% 21.3ns ± 2% -9.65% (p=0.000 n=10+10) DigestString/4KB-12 241ns ± 1% 239ns ± 0% -0.67% (p=0.001 n=10+7) DigestString/10MB-12 627µs ± 1% 628µs ± 1% ~ (p=0.631 n=10+10) name old speed new speed delta Sum64String/4B-12 837MB/s ± 1% 1124MB/s ± 2% +34.42% (p=0.000 n=10+9) Sum64String/100B-12 6.88GB/s ± 2% 7.72GB/s ± 1% +12.16% (p=0.000 n=10+10) Sum64String/4KB-12 17.5GB/s ± 0% 17.5GB/s ± 1% ~ (p=0.408 n=8+10) Sum64String/10MB-12 15.9GB/s ± 1% 15.9GB/s ± 2% ~ (p=1.000 n=9+10) DigestString/4B-12 350MB/s ± 1% 411MB/s ± 1% +17.55% (p=0.000 n=10+10) DigestString/100B-12 4.23GB/s ± 1% 4.69GB/s ± 1% +10.84% (p=0.000 n=10+9) DigestString/4KB-12 16.6GB/s ± 1% 16.7GB/s ± 0% +0.67% (p=0.001 n=10+8) DigestString/10MB-12 16.0GB/s ± 1% 15.9GB/s ± 1% ~ (p=0.631 n=10+10) And with -tags purego: name old time/op new time/op delta Sum64String/4B-12 5.57ns ± 1% 4.22ns ± 1% -24.14% (p=0.000 n=10+9) Sum64String/100B-12 16.0ns ± 1% 14.8ns ± 0% -7.27% (p=0.000 n=10+6) Sum64String/4KB-12 327ns ± 2% 325ns ± 1% ~ (p=0.050 n=10+10) Sum64String/10MB-12 866µs ± 3% 856µs ± 0% -1.05% (p=0.002 n=9+8) DigestString/4B-12 11.2ns ± 1% 10.0ns ± 1% -10.90% (p=0.000 n=10+9) DigestString/100B-12 25.5ns ± 1% 22.8ns ± 0% -10.62% (p=0.000 n=10+9) DigestString/4KB-12 342ns ± 1% 340ns ± 1% -0.56% (p=0.018 n=9+10) DigestString/10MB-12 877µs ± 1% 878µs ± 2% ~ (p=0.400 n=10+9) name old speed new speed delta Sum64String/4B-12 718MB/s ± 1% 947MB/s ± 1% +31.82% (p=0.000 n=10+9) Sum64String/100B-12 6.26GB/s ± 1% 6.75GB/s ± 1% +7.81% (p=0.000 n=10+10) Sum64String/4KB-12 12.2GB/s ± 2% 12.3GB/s ± 1% +0.70% (p=0.022 n=10+9) Sum64String/10MB-12 11.6GB/s ± 3% 11.7GB/s ± 0% +1.05% (p=0.002 n=9+8) DigestString/4B-12 357MB/s ± 1% 401MB/s ± 1% +12.32% (p=0.000 n=10+9) DigestString/100B-12 3.93GB/s ± 1% 4.40GB/s ± 0% +11.95% (p=0.000 n=10+9) DigestString/4KB-12 11.7GB/s ± 1% 11.8GB/s ± 1% +0.68% (p=0.011 n=10+10) DigestString/10MB-12 11.4GB/s ± 1% 11.4GB/s ± 2% ~ (p=0.400 n=10+9) --- xxhash_unsafe.go | 53 ++++++++++++++++++++++++++----------------- xxhash_unsafe_test.go | 37 ++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 21 deletions(-) diff --git a/xxhash_unsafe.go b/xxhash_unsafe.go index 53bf76e..376e0ca 100644 --- a/xxhash_unsafe.go +++ b/xxhash_unsafe.go @@ -6,41 +6,52 @@ package xxhash import ( - "reflect" "unsafe" ) -// Notes: +// In the future it's possible that compiler optimizations will make these +// XxxString functions unnecessary by realizing that calls such as +// Sum64([]byte(s)) don't need to copy s. See https://golang.org/issue/2205. +// If that happens, even if we keep these functions they can be replaced with +// the trivial safe code. + +// NOTE: The usual way of doing an unsafe string-to-[]byte conversion is: // -// See https://groups.google.com/d/msg/golang-nuts/dcjzJy-bSpw/tcZYBzQqAQAJ -// for some discussion about these unsafe conversions. +// var b []byte +// bh := (*reflect.SliceHeader)(unsafe.Pointer(&b)) +// bh.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data +// bh.Len = len(s) +// bh.Cap = len(s) // -// In the future it's possible that compiler optimizations will make these -// unsafe operations unnecessary: https://golang.org/issue/2205. +// Unfortunately, as of Go 1.15.3 the inliner's cost model assigns a high enough +// weight to this sequence of expressions that any function that uses it will +// not be inlined. Instead, the functions below use a different unsafe +// conversion designed to minimize the inliner weight and allow both to be +// inlined. There is also a test (TestInlining) which verifies that these are +// inlined. // -// Both of these wrapper functions still incur function call overhead since they -// will not be inlined. We could write Go/asm copies of Sum64 and Digest.Write -// for strings to squeeze out a bit more speed. Mid-stack inlining should -// eventually fix this. +// See https://github.com/golang/go/issues/42739 for discussion. // Sum64String computes the 64-bit xxHash digest of s. // It may be faster than Sum64([]byte(s)) by avoiding a copy. func Sum64String(s string) uint64 { - var b []byte - bh := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - bh.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data - bh.Len = len(s) - bh.Cap = len(s) + b := *(*[]byte)(unsafe.Pointer(&sliceHeader{s, len(s)})) return Sum64(b) } // WriteString adds more data to d. It always returns len(s), nil. // It may be faster than Write([]byte(s)) by avoiding a copy. func (d *Digest) WriteString(s string) (n int, err error) { - var b []byte - bh := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - bh.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data - bh.Len = len(s) - bh.Cap = len(s) - return d.Write(b) + d.Write(*(*[]byte)(unsafe.Pointer(&sliceHeader{s, len(s)}))) + // d.Write always returns len(s), nil. + // Ignoring the return output and returning these fixed values buys a + // savings of 6 in the inliner's cost model. + return len(s), nil +} + +// sliceHeader is similar to reflect.SliceHeader, but it assumes that the layout +// of the first two words is the same as the layout of a string. +type sliceHeader struct { + s string + cap int } diff --git a/xxhash_unsafe_test.go b/xxhash_unsafe_test.go index 4d3f964..6598267 100644 --- a/xxhash_unsafe_test.go +++ b/xxhash_unsafe_test.go @@ -3,6 +3,8 @@ package xxhash import ( + "os/exec" + "sort" "strings" "testing" ) @@ -22,3 +24,38 @@ func TestStringAllocs(t *testing.T) { }) }) } + +// This test is inspired by the Go runtime tests in https://golang.org/cl/57410. +// It asserts that certain important functions may be inlined. +func TestInlining(t *testing.T) { + funcs := map[string]struct{}{ + "Sum64String": {}, + "(*Digest).WriteString": {}, + } + + // TODO: it would be better to use the go binary that is running + // 'go test' (if we are running under 'go test'). + cmd := exec.Command("go", "test", "-gcflags=-m", "-run", "xxxx") + out, err := cmd.CombinedOutput() + if err != nil { + t.Log(string(out)) + t.Fatal(err) + } + + for _, line := range strings.Split(string(out), "\n") { + parts := strings.Split(line, ": can inline") + if len(parts) < 2 { + continue + } + delete(funcs, strings.TrimSpace(parts[1])) + } + + var failed []string + for fn := range funcs { + failed = append(failed, fn) + } + sort.Strings(failed) + for _, fn := range failed { + t.Errorf("function %s not inlined", fn) + } +}