From 41a269632a5df4edc3f6db683ba9a339d19c6ee7 Mon Sep 17 00:00:00 2001 From: greatroar <@> Date: Tue, 3 Nov 2020 14:27:53 +0100 Subject: [PATCH] Assembler version of Sum64String MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes Sum64String somewhere between a bit and a lot faster for short strings: name old time/op new time/op delta Hashes/xxhash,direct,bytes,n=5B-8 5.96ns ± 1% 5.92ns ± 1% -0.56% (p=0.000 n=17+18) Hashes/xxhash,direct,string,n=5B-8 8.71ns ± 1% 5.91ns ± 0% -32.15% (p=0.000 n=19+19) Hashes/xxhash,direct,bytes,n=100B-8 19.2ns ± 2% 18.9ns ± 1% -1.25% (p=0.000 n=18+18) Hashes/xxhash,direct,string,n=100B-8 20.3ns ± 1% 17.7ns ± 1% -12.74% (p=0.000 n=17+20) Hashes/xxhash,direct,bytes,n=4KB-8 278ns ± 1% 276ns ± 1% -0.54% (p=0.000 n=19+16) Hashes/xxhash,direct,string,n=4KB-8 280ns ± 2% 276ns ± 1% -1.35% (p=0.000 n=18+20) Hashes/xxhash,direct,bytes,n=10MB-8 767µs ± 2% 766µs ± 1% ~ (p=0.775 n=18+19) Hashes/xxhash,direct,string,n=10MB-8 773µs ± 1% 775µs ± 2% ~ (p=0.496 n=20+19) name old speed new speed delta Hashes/xxhash,direct,bytes,n=5B-8 839MB/s ± 1% 844MB/s ± 1% +0.55% (p=0.000 n=17+18) Hashes/xxhash,direct,string,n=5B-8 574MB/s ± 1% 846MB/s ± 0% +47.38% (p=0.000 n=19+19) Hashes/xxhash,direct,bytes,n=100B-8 5.21GB/s ± 2% 5.28GB/s ± 2% +1.21% (p=0.000 n=18+19) Hashes/xxhash,direct,string,n=100B-8 4.92GB/s ± 1% 5.64GB/s ± 2% +14.57% (p=0.000 n=17+20) Hashes/xxhash,direct,bytes,n=4KB-8 14.4GB/s ± 1% 14.5GB/s ± 1% +0.44% (p=0.000 n=19+17) Hashes/xxhash,direct,string,n=4KB-8 14.3GB/s ± 2% 14.5GB/s ± 0% +1.58% (p=0.000 n=18+17) Hashes/xxhash,direct,bytes,n=10MB-8 13.0GB/s ± 2% 13.1GB/s ± 1% ~ (p=0.775 n=18+19) Hashes/xxhash,direct,string,n=10MB-8 12.9GB/s ± 1% 12.9GB/s ± 2% ~ (p=0.496 n=20+19) --- sum64string_unsafe.go | 11 +++++++++++ xxhash_amd64.go | 6 ++++++ xxhash_amd64.s | 21 +++++++++++++++++---- xxhash_unsafe.go | 4 +--- 4 files changed, 35 insertions(+), 7 deletions(-) create mode 100644 sum64string_unsafe.go diff --git a/sum64string_unsafe.go b/sum64string_unsafe.go new file mode 100644 index 0000000..ab683a6 --- /dev/null +++ b/sum64string_unsafe.go @@ -0,0 +1,11 @@ +// +build !amd64 !appengine,purego + +package xxhash + +// Forward to the version in xxhash_unsafe.go. This should be inlineable. + +// Sum64String computes the 64-bit xxHash digest of s. +// It may be faster than Sum64([]byte(s)) by avoiding a copy. +func Sum64String(s string) uint64 { + return sum64String(s) +} diff --git a/xxhash_amd64.go b/xxhash_amd64.go index ad14b80..4aa371b 100644 --- a/xxhash_amd64.go +++ b/xxhash_amd64.go @@ -9,5 +9,11 @@ package xxhash //go:noescape func Sum64(b []byte) uint64 +// Sum64String computes the 64-bit xxHash digest of s. +// It may be faster than Sum64([]byte(s)) by avoiding a copy. +// +//go:noescape +func Sum64String(s string) uint64 + //go:noescape func writeBlocks(d *Digest, b []byte) int diff --git a/xxhash_amd64.s b/xxhash_amd64.s index d580e32..15c6346 100644 --- a/xxhash_amd64.s +++ b/xxhash_amd64.s @@ -9,6 +9,7 @@ // CX pointer to advance through b // DX n // BX loop end +// DI pointer for string return value // R8 v1, k1 // R9 v2 // R10 v3 @@ -40,14 +41,26 @@ // func Sum64(b []byte) uint64 TEXT ·Sum64(SB), NOSPLIT, $0-32 + MOVQ b_base+0(FP), CX + MOVQ b_len+8(FP), DX + LEAQ ret+24(FP), DI + JMP sum64<>(SB) + +// func Sum64String(s string) uint64 +TEXT ·Sum64String(SB), NOSPLIT, $0-24 + MOVQ s_base+0(FP), CX + MOVQ s_len+8(FP), DX + LEAQ ret+16(FP), DI + JMP sum64<>(SB) + +// Takes arguments in CX, DX. Stores its return value through DI. +// All three must be set by the caller. +TEXT sum64<>(SB), NOFRAME+NOSPLIT, $0 // Load fixed primes. MOVQ ·prime1v(SB), R13 MOVQ ·prime2v(SB), R14 MOVQ ·prime4v(SB), R15 - // Load slice. - MOVQ b_base+0(FP), CX - MOVQ b_len+8(FP), DX LEAQ (CX)(DX*1), BX // The first loop limit will be len(b)-32. @@ -166,7 +179,7 @@ finalize: SHRQ $32, R12 XORQ R12, AX - MOVQ AX, ret+24(FP) + MOVQ AX, (DI) RET // writeBlocks uses the same registers as above except that it uses AX to store diff --git a/xxhash_unsafe.go b/xxhash_unsafe.go index 53bf76e..8a59c14 100644 --- a/xxhash_unsafe.go +++ b/xxhash_unsafe.go @@ -23,9 +23,7 @@ import ( // for strings to squeeze out a bit more speed. Mid-stack inlining should // eventually fix this. -// Sum64String computes the 64-bit xxHash digest of s. -// It may be faster than Sum64([]byte(s)) by avoiding a copy. -func Sum64String(s string) uint64 { +func sum64String(s string) uint64 { var b []byte bh := (*reflect.SliceHeader)(unsafe.Pointer(&b)) bh.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data