Skip to content

Commit

Permalink
Trick the inliner into accepting Sum64String
Browse files Browse the repository at this point in the history
Benchmarks:

name                 old time/op    new time/op    delta
Sum64String/4B-12      4.94ns ± 3%    3.03ns ± 1%  -38.69%  (p=0.000 n=10+10)
Sum64String/100B-12    14.5ns ± 1%    12.0ns ± 1%  -17.22%  (p=0.000 n=10+10)
Sum64String/4KB-12      229ns ± 1%     227ns ± 0%   -1.19%  (p=0.000 n=10+9)
Sum64String/10MB-12     629µs ± 1%     629µs ± 0%     ~     (p=0.408 n=10+8)

name                 old speed      new speed      delta
Sum64String/4B-12     810MB/s ± 3%  1320MB/s ± 1%  +63.08%  (p=0.000 n=10+10)
Sum64String/100B-12  6.92GB/s ± 1%  8.34GB/s ± 1%  +20.58%  (p=0.000 n=10+10)
Sum64String/4KB-12   17.4GB/s ± 1%  17.6GB/s ± 0%   +1.11%  (p=0.000 n=10+9)
Sum64String/10MB-12  15.9GB/s ± 1%  15.9GB/s ± 0%     ~     (p=0.408 n=10+8)

And with -tags purego:

name                 old time/op    new time/op    delta
Sum64String/4B-12      5.57ns ± 1%    4.22ns ± 1%  -24.26%  (p=0.000 n=8+10)
Sum64String/100B-12    15.9ns ± 0%    14.4ns ± 0%   -9.12%  (p=0.000 n=8+10)
Sum64String/4KB-12      326ns ± 0%     325ns ± 0%   -0.33%  (p=0.008 n=9+10)
Sum64String/10MB-12     858µs ± 0%     860µs ± 1%     ~     (p=1.000 n=10+10)

name                 old speed      new speed      delta
Sum64String/4B-12     718MB/s ± 1%   949MB/s ± 1%  +32.08%  (p=0.000 n=8+10)
Sum64String/100B-12  6.29GB/s ± 0%  6.92GB/s ± 0%  +10.01%  (p=0.000 n=9+10)
Sum64String/4KB-12   12.3GB/s ± 0%  12.3GB/s ± 0%   +0.37%  (p=0.002 n=9+10)
Sum64String/10MB-12  11.7GB/s ± 0%  11.6GB/s ± 1%     ~     (p=1.000 n=10+10)
  • Loading branch information
cespare committed Nov 20, 2020
1 parent e0ea1e3 commit 2e6c845
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 15 deletions.
53 changes: 38 additions & 15 deletions xxhash_unsafe.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,37 +10,60 @@ import (
"unsafe"
)

// Notes:
//
// See https://groups.google.com/d/msg/golang-nuts/dcjzJy-bSpw/tcZYBzQqAQAJ
// for some discussion about these unsafe conversions.
//
// In the future it's possible that compiler optimizations will make these
// unsafe operations unnecessary: https://golang.org/issue/2205.
//
// Both of these wrapper functions still incur function call overhead since they
// will not be inlined. We could write Go/asm copies of Sum64 and Digest.Write
// for strings to squeeze out a bit more speed. Mid-stack inlining should
// eventually fix this.
// XxxString functions unnecessary by realizing that calls such as
// Sum64([]byte(s)) don't need to copy s. See https://golang.org/issue/2205.
// If that happens, even if we keep these functions they can be replaced with
// the trivial safe code.

// Sum64String computes the 64-bit xxHash digest of s.
// It may be faster than Sum64([]byte(s)) by avoiding a copy.
func Sum64String(s string) uint64 {
var b []byte
bh := (*reflect.SliceHeader)(unsafe.Pointer(&b))
bh.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data
bh.Len = len(s)
bh.Cap = len(s)

// NOTE: The usual way of doing the conversion is as follows:
//
// bh := (*reflect.SliceHeader)(unsafe.Pointer(&b))
// bh.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data
// bh.Len = len(s)
// bh.Cap = len(s)
//
// Unfortunately, as of Go 1.15.3 the inliner considers this slightly
// too expensive to be an inlining candidate. Writing the conversion
// this way gets us down to the max cost (barely).

*(*string)(unsafe.Pointer(&b)) = s
(*sliceHeader)(unsafe.Pointer(&b)).cap = len(s)
return Sum64(b)
}

// WriteString adds more data to d. It always returns len(s), nil.
// It may be faster than Write([]byte(s)) by avoiding a copy.
func (d *Digest) WriteString(s string) (n int, err error) {
// NOTE: Unfortunately, the tricks that work in Sum64String to make the
// function inlineable don't quite work here. The lowest weight I can
// obtain is with the code
//
// var b []byte
// (*sliceHeader)(unsafe.Pointer(&b)).s = s
// (*sliceHeader)(unsafe.Pointer(&b)).cap = len(s)
// d.Write(b)
// return len(s), nil
//
// which has a weight of 84, which exceeds the maximum of 80 for
// inlining.

var b []byte
bh := (*reflect.SliceHeader)(unsafe.Pointer(&b))
bh.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data
bh.Len = len(s)
bh.Cap = len(s)
return d.Write(b)
}

// sliceHeader is similar to reflect.SliceHeader, but it assumes that the layout
// of the first two words is the same as the layout of a string.
type sliceHeader struct {
s string
cap int
}
35 changes: 35 additions & 0 deletions xxhash_unsafe_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
package xxhash

import (
"os/exec"
"sort"
"strings"
"testing"
)
Expand All @@ -22,3 +24,36 @@ func TestStringAllocs(t *testing.T) {
})
})
}

// This test is inspired by the Go runtime tests in https://golang.org/cl/57410.
// It asserts that certain important functions may be inlined.
func TestInlining(t *testing.T) {
funcs := map[string]struct{}{
"Sum64String": {},
// "(*Digest).WriteString": {},
}

cmd := exec.Command("go", "test", "-gcflags=-m", "-run", "xxxx")
out, err := cmd.CombinedOutput()
if err != nil {
t.Log(string(out))
t.Fatal(err)
}

for _, line := range strings.Split(string(out), "\n") {
parts := strings.Split(line, ": can inline")
if len(parts) < 2 {
continue
}
delete(funcs, strings.TrimSpace(parts[1]))
}

var failed []string
for fn := range funcs {
failed = append(failed, fn)
}
sort.Strings(failed)
for _, fn := range failed {
t.Errorf("function %s not inlined", fn)
}
}

0 comments on commit 2e6c845

Please sign in to comment.