Skip to content

Commit

Permalink
Assembler version of Sum64String
Browse files Browse the repository at this point in the history
This makes Sum64String somewhere between a bit and a lot faster for
short strings:

name                                  old time/op    new time/op    delta
Hashes/xxhash,direct,bytes,n=5B-8       5.96ns ± 1%    5.92ns ± 1%   -0.56%  (p=0.000 n=17+18)
Hashes/xxhash,direct,string,n=5B-8      8.71ns ± 1%    5.91ns ± 0%  -32.15%  (p=0.000 n=19+19)
Hashes/xxhash,direct,bytes,n=100B-8     19.2ns ± 2%    18.9ns ± 1%   -1.25%  (p=0.000 n=18+18)
Hashes/xxhash,direct,string,n=100B-8    20.3ns ± 1%    17.7ns ± 1%  -12.74%  (p=0.000 n=17+20)
Hashes/xxhash,direct,bytes,n=4KB-8       278ns ± 1%     276ns ± 1%   -0.54%  (p=0.000 n=19+16)
Hashes/xxhash,direct,string,n=4KB-8      280ns ± 2%     276ns ± 1%   -1.35%  (p=0.000 n=18+20)
Hashes/xxhash,direct,bytes,n=10MB-8      767µs ± 2%     766µs ± 1%     ~     (p=0.775 n=18+19)
Hashes/xxhash,direct,string,n=10MB-8     773µs ± 1%     775µs ± 2%     ~     (p=0.496 n=20+19)

name                                  old speed      new speed      delta
Hashes/xxhash,direct,bytes,n=5B-8      839MB/s ± 1%   844MB/s ± 1%   +0.55%  (p=0.000 n=17+18)
Hashes/xxhash,direct,string,n=5B-8     574MB/s ± 1%   846MB/s ± 0%  +47.38%  (p=0.000 n=19+19)
Hashes/xxhash,direct,bytes,n=100B-8   5.21GB/s ± 2%  5.28GB/s ± 2%   +1.21%  (p=0.000 n=18+19)
Hashes/xxhash,direct,string,n=100B-8  4.92GB/s ± 1%  5.64GB/s ± 2%  +14.57%  (p=0.000 n=17+20)
Hashes/xxhash,direct,bytes,n=4KB-8    14.4GB/s ± 1%  14.5GB/s ± 1%   +0.44%  (p=0.000 n=19+17)
Hashes/xxhash,direct,string,n=4KB-8   14.3GB/s ± 2%  14.5GB/s ± 0%   +1.58%  (p=0.000 n=18+17)
Hashes/xxhash,direct,bytes,n=10MB-8   13.0GB/s ± 2%  13.1GB/s ± 1%     ~     (p=0.775 n=18+19)
Hashes/xxhash,direct,string,n=10MB-8  12.9GB/s ± 1%  12.9GB/s ± 2%     ~     (p=0.496 n=20+19)
  • Loading branch information
greatroar committed Nov 3, 2020
1 parent a50fa75 commit a16de4a
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 7 deletions.
11 changes: 11 additions & 0 deletions sum64string_unsafe.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// +build !amd64 !appengine,purego

package xxhash

// Forward to the version in xxhash_unsafe.go. This should be inlineable.

// Sum64String computes the 64-bit xxHash digest of s.
// It may be faster than Sum64([]byte(s)) by avoiding a copy.
func Sum64String(s string) uint64 {
return sum64String(s)
}
6 changes: 6 additions & 0 deletions xxhash_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,11 @@ package xxhash
//go:noescape
func Sum64(b []byte) uint64

// Sum64String computes the 64-bit xxHash digest of s.
// It may be faster than Sum64([]byte(s)) by avoiding a copy.
//
//go:noescape
func Sum64String(s string) uint64

//go:noescape
func writeBlocks(d *Digest, b []byte) int
20 changes: 16 additions & 4 deletions xxhash_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
// CX pointer to advance through b
// DX n
// BX loop end
// DI pointer for string return value
// R8 v1, k1
// R9 v2
// R10 v3
Expand Down Expand Up @@ -40,14 +41,25 @@

// func Sum64(b []byte) uint64
TEXT ·Sum64(SB), NOSPLIT, $0-32
MOVQ b_base+0(FP), CX
MOVQ b_len+8(FP), DX
LEAQ ret+24(FP), DI
JMP sum64<>(SB)

// func Sum64String(s string) uint64
TEXT ·Sum64String(SB), NOSPLIT, $0-24
MOVQ s_base+0(FP), CX
MOVQ s_len+8(FP), DX
LEAQ ret+16(FP), DI
JMP sum64<>(SB)

// Takes arguments in CX, DX. Stores its return value through DI.
TEXT sum64<>(SB), NOFRAME+NOSPLIT, $0
// Load fixed primes.
MOVQ ·prime1v(SB), R13
MOVQ ·prime2v(SB), R14
MOVQ ·prime4v(SB), R15

// Load slice.
MOVQ b_base+0(FP), CX
MOVQ b_len+8(FP), DX
LEAQ (CX)(DX*1), BX

// The first loop limit will be len(b)-32.
Expand Down Expand Up @@ -166,7 +178,7 @@ finalize:
SHRQ $32, R12
XORQ R12, AX

MOVQ AX, ret+24(FP)
MOVQ AX, (DI)
RET

// writeBlocks uses the same registers as above except that it uses AX to store
Expand Down
4 changes: 1 addition & 3 deletions xxhash_unsafe.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@ import (
// for strings to squeeze out a bit more speed. Mid-stack inlining should
// eventually fix this.

// Sum64String computes the 64-bit xxHash digest of s.
// It may be faster than Sum64([]byte(s)) by avoiding a copy.
func Sum64String(s string) uint64 {
func sum64String(s string) uint64 {
var b []byte
bh := (*reflect.SliceHeader)(unsafe.Pointer(&b))
bh.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data
Expand Down

0 comments on commit a16de4a

Please sign in to comment.