-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
exec: use go runtime hash functions in hashjoiner
The previous hash functions derived from Java's hashCode implementations were good when it came to deriving a hash that could be used for equality (my understanding is that hashCode is primarily used for this) but the distribution property of these functions was unclear. Additionally hashing was not supported for some types. This commit copies go's non-cryptographic hash functions used for maps to support hashing for all types and get good distribution properties with minimal performance impact. The hash algorithm used is derived from: https://github.com/Cyan4973/xxHash https://github.com/google/cityhash And was tested using https://github.com/aappleby/smhasher. Benchmark results shared in PR. Release note: None
- Loading branch information
Showing
5 changed files
with
219 additions
and
29 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,183 @@ | ||
// Copyright 2014 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the golang.org/LICENSE file. | ||
|
||
// Hashing algorithm inspired by | ||
// xxhash: https://code.google.com/p/xxhash/ | ||
// cityhash: https://code.google.com/p/cityhash/ | ||
// Most of the code in this file is copied from the go runtime package. These | ||
// are the hash functions used for go maps. | ||
|
||
package exec | ||
|
||
import ( | ||
"math/rand" | ||
"unsafe" | ||
) | ||
|
||
const ( | ||
ptrSize = 4 << (^uintptr(0) >> 63) // unsafe.Sizeof(uintptr(0)) but an ideal const | ||
c0 = uintptr((8-ptrSize)/4*2860486313 + (ptrSize-4)/4*33054211828000289) | ||
c1 = uintptr((8-ptrSize)/4*3267000013 + (ptrSize-4)/4*23344194077549503) | ||
// Constants for multiplication: four random odd 64-bit numbers. | ||
m1 = 16877499708836156737 | ||
m2 = 2820277070424839065 | ||
m3 = 9497967016996688599 | ||
m4 = 15839092249703872147 | ||
) | ||
|
||
// hashKey is used to seed the hash function. | ||
var hashKey [4]uintptr | ||
|
||
func init() { | ||
rand.Read((*[len(hashKey) * ptrSize]byte)(unsafe.Pointer(&hashKey))[:]) | ||
hashKey[0] |= 1 // make sure these numbers are odd | ||
hashKey[1] |= 1 | ||
hashKey[2] |= 1 | ||
hashKey[3] |= 1 | ||
} | ||
|
||
func readUnaligned32(p unsafe.Pointer) uint32 { | ||
return *(*uint32)(p) | ||
} | ||
|
||
func readUnaligned64(p unsafe.Pointer) uint64 { | ||
return *(*uint64)(p) | ||
} | ||
|
||
// Should be a built-in for unsafe.Pointer? | ||
//go:nosplit | ||
func add(p unsafe.Pointer, x uintptr) unsafe.Pointer { | ||
return unsafe.Pointer(uintptr(p) + x) | ||
} | ||
|
||
//go:linkname noescape runtime.noescape | ||
func noescape(p unsafe.Pointer) unsafe.Pointer | ||
|
||
func memhash(p unsafe.Pointer, seed, s uintptr) uintptr { | ||
h := uint64(seed + s*hashKey[0]) | ||
tail: | ||
switch { | ||
case s == 0: | ||
case s < 4: | ||
h ^= uint64(*(*byte)(p)) | ||
h ^= uint64(*(*byte)(add(p, s>>1))) << 8 | ||
h ^= uint64(*(*byte)(add(p, s-1))) << 16 | ||
h = rotl31(h*m1) * m2 | ||
case s <= 8: | ||
h ^= uint64(readUnaligned32(p)) | ||
h ^= uint64(readUnaligned32(add(p, s-4))) << 32 | ||
h = rotl31(h*m1) * m2 | ||
case s <= 16: | ||
h ^= readUnaligned64(p) | ||
h = rotl31(h*m1) * m2 | ||
h ^= readUnaligned64(add(p, s-8)) | ||
h = rotl31(h*m1) * m2 | ||
case s <= 32: | ||
h ^= readUnaligned64(p) | ||
h = rotl31(h*m1) * m2 | ||
h ^= readUnaligned64(add(p, 8)) | ||
h = rotl31(h*m1) * m2 | ||
h ^= readUnaligned64(add(p, s-16)) | ||
h = rotl31(h*m1) * m2 | ||
h ^= readUnaligned64(add(p, s-8)) | ||
h = rotl31(h*m1) * m2 | ||
default: | ||
v1 := h | ||
v2 := uint64(seed * hashKey[1]) | ||
v3 := uint64(seed * hashKey[2]) | ||
v4 := uint64(seed * hashKey[3]) | ||
for s >= 32 { | ||
v1 ^= readUnaligned64(p) | ||
v1 = rotl31(v1*m1) * m2 | ||
p = add(p, 8) | ||
v2 ^= readUnaligned64(p) | ||
v2 = rotl31(v2*m2) * m3 | ||
p = add(p, 8) | ||
v3 ^= readUnaligned64(p) | ||
v3 = rotl31(v3*m3) * m4 | ||
p = add(p, 8) | ||
v4 ^= readUnaligned64(p) | ||
v4 = rotl31(v4*m4) * m1 | ||
p = add(p, 8) | ||
s -= 32 | ||
} | ||
h = v1 ^ v2 ^ v3 ^ v4 | ||
goto tail | ||
} | ||
|
||
h ^= h >> 29 | ||
h *= m3 | ||
h ^= h >> 32 | ||
return uintptr(h) | ||
} | ||
|
||
func memhash8(p unsafe.Pointer, h uintptr) uintptr { | ||
return memhash(p, h, 1) | ||
} | ||
|
||
func memhash16(p unsafe.Pointer, h uintptr) uintptr { | ||
return memhash(p, h, 2) | ||
} | ||
|
||
func memhash32(p unsafe.Pointer, seed uintptr) uintptr { | ||
h := uint64(seed + 4*hashKey[0]) | ||
v := uint64(readUnaligned32(p)) | ||
h ^= v | ||
h ^= v << 32 | ||
h = rotl31(h*m1) * m2 | ||
h ^= h >> 29 | ||
h *= m3 | ||
h ^= h >> 32 | ||
return uintptr(h) | ||
} | ||
|
||
func memhash64(p unsafe.Pointer, seed uintptr) uintptr { | ||
h := uint64(seed + 8*hashKey[0]) | ||
h ^= uint64(readUnaligned32(p)) | uint64(readUnaligned32(add(p, 4)))<<32 | ||
h = rotl31(h*m1) * m2 | ||
h ^= h >> 29 | ||
h *= m3 | ||
h ^= h >> 32 | ||
return uintptr(h) | ||
} | ||
|
||
// Note: in order to get the compiler to issue rotl instructions, we | ||
// need to constant fold the shift amount by hand. | ||
// TODO: convince the compiler to issue rotl instructions after inlining. | ||
func rotl31(x uint64) uint64 { | ||
return (x << 31) | (x >> (64 - 31)) | ||
} | ||
|
||
// NOTE: Because NaN != NaN, a map can contain any | ||
// number of (mostly useless) entries keyed with NaNs. | ||
// To avoid long hash chains, we assign a random number | ||
// as the hash value for a NaN. | ||
|
||
func f32hash(p unsafe.Pointer, h uintptr) uintptr { | ||
f := *(*float32)(p) | ||
switch { | ||
case f == 0: | ||
return c1 * (c0 ^ h) // +0, -0 | ||
case f != f: | ||
// TODO(asubiotto): fastrand relies on some stack internals. | ||
//return c1 * (c0 ^ h ^ uintptr(fastrand())) // any kind of NaN | ||
return c1 * (c0 ^ h ^ uintptr(rand.Uint32())) | ||
default: | ||
return memhash(p, h, 4) | ||
} | ||
} | ||
|
||
func f64hash(p unsafe.Pointer, h uintptr) uintptr { | ||
f := *(*float64)(p) | ||
switch { | ||
case f == 0: | ||
return c1 * (c0 ^ h) // +0, -0 | ||
case f != f: | ||
// TODO(asubiotto): fastrand relies on some stack internals. | ||
//return c1 * (c0 ^ h ^ uintptr(fastrand())) // any kind of NaN | ||
return c1 * (c0 ^ h ^ uintptr(rand.Uint32())) // any kind of NaN | ||
default: | ||
return memhash(p, h, 8) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
// Empty assembly file to allow go:linkname to work. |