From 0f8d8d1df3c6671ef69a02073aed7e92a1ad9de1 Mon Sep 17 00:00:00 2001 From: Oleg Grenrus Date: Mon, 23 Aug 2021 11:20:22 +0300 Subject: [PATCH 1/3] Add 'golden' test that Text hash doesn't change --- tests/Regress.hs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/Regress.hs b/tests/Regress.hs index 94dd685..8a591fe 100644 --- a/tests/Regress.hs +++ b/tests/Regress.hs @@ -1,5 +1,6 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE OverloadedStrings #-} module Regress (regressions) where @@ -9,6 +10,7 @@ import Test.HUnit ((@=?)) import GHC.Generics (Generic) import Data.List (nub) import Data.Fixed (Pico) +import Data.Text (Text) #ifdef HAVE_MMAP import qualified Regress.Mmap as Mmap @@ -16,6 +18,8 @@ import qualified Regress.Mmap as Mmap import Data.Hashable +#include "MachDeps.h" + regressions :: [F.Test] regressions = [] ++ #ifdef HAVE_MMAP @@ -35,6 +39,10 @@ regressions = [] ++ let ns = take 20 $ iterate S Z let hs = map hash ns hs @=? nub hs +#if WORD_SIZE_IN_BITS == 64 + , testCase "64 bit Text" $ do + hash ("hello world" :: Text) @=? 6567282331143050109 +#endif ] where nullaryCase :: Int -> SumOfNullary -> IO () From 585240ce0691c0ce783e98163f0231f2dcfc03b5 Mon Sep 17 00:00:00 2001 From: Michael Tolly Date: Thu, 5 Aug 2021 00:32:10 -0500 Subject: [PATCH 2/3] Fix FNV on 64-bit Windows --- cbits/fnv.c | 13 +++++++++---- src/Data/Hashable/Class.hs | 16 ++++++++++++---- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/cbits/fnv.c b/cbits/fnv.c index e561905..e8cba05 100644 --- a/cbits/fnv.c +++ b/cbits/fnv.c @@ -32,11 +32,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "MachDeps.h" +#include #if WORD_SIZE_IN_BITS == 64 #define FNV_PRIME 1099511628211 +#define FNV_SIGNED int64_t +#define FNV_UNSIGNED uint64_t #else #define FNV_PRIME 16777619 +#define FNV_SIGNED int32_t +#define FNV_UNSIGNED uint32_t #endif /* FNV-1 hash @@ -44,11 +49,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * The FNV-1 hash description: http://isthe.com/chongo/tech/comp/fnv/ * The FNV-1 hash is public domain: http://isthe.com/chongo/tech/comp/fnv/#public_domain */ -long hashable_fnv_hash(const unsigned char* str, long len, long salt) { +FNV_SIGNED hashable_fnv_hash(const unsigned char* str, FNV_SIGNED len, FNV_SIGNED salt) { - unsigned long hash = salt; + FNV_UNSIGNED hash = salt; while (len--) { - hash = (hash * 16777619) ^ *str++; + hash = (hash * FNV_PRIME) ^ *str++; } return hash; @@ -57,6 +62,6 @@ long hashable_fnv_hash(const unsigned char* str, long len, long salt) { /* Used for ByteArray#s. We can't treat them like pointers in native Haskell, but we can in unsafe FFI calls. */ -long hashable_fnv_hash_offset(const unsigned char* str, long offset, long len, long salt) { +FNV_SIGNED hashable_fnv_hash_offset(const unsigned char* str, FNV_SIGNED offset, FNV_SIGNED len, FNV_SIGNED salt) { return hashable_fnv_hash(str + offset, len, salt); } diff --git a/src/Data/Hashable/Class.hs b/src/Data/Hashable/Class.hs index ad403a3..eed6ce7 100644 --- a/src/Data/Hashable/Class.hs +++ b/src/Data/Hashable/Class.hs @@ -122,10 +122,8 @@ import GHC.Fingerprint.Type(Fingerprint(..)) #endif #if MIN_VERSION_base(4,5,0) -import Foreign.C (CLong(..)) import Foreign.C.Types (CInt(..)) #else -import Foreign.C (CLong) import Foreign.C.Types (CInt) #endif @@ -816,8 +814,13 @@ hashPtrWithSalt p len salt = fromIntegral `fmap` c_hashCString (castPtr p) (fromIntegral len) (fromIntegral salt) +#if WORD_SIZE_IN_BITS == 64 +foreign import ccall unsafe "hashable_fnv_hash" c_hashCString + :: CString -> Int64 -> Int64 -> IO Int64 +#else foreign import ccall unsafe "hashable_fnv_hash" c_hashCString - :: CString -> CLong -> CLong -> IO CLong + :: CString -> Int32 -> Int32 -> IO Int32 +#endif -- | Compute a hash value for the content of this 'ByteArray#', -- beginning at the specified offset, using specified number of bytes. @@ -844,8 +847,13 @@ hashByteArrayWithSalt ba !off !len !h = fromIntegral $ c_hashByteArray ba (fromIntegral off) (fromIntegral len) (fromIntegral h) +#if WORD_SIZE_IN_BITS == 64 +foreign import ccall unsafe "hashable_fnv_hash_offset" c_hashByteArray + :: ByteArray# -> Int64 -> Int64 -> Int64 -> Int64 +#else foreign import ccall unsafe "hashable_fnv_hash_offset" c_hashByteArray - :: ByteArray# -> CLong -> CLong -> CLong -> CLong + :: ByteArray# -> Int32 -> Int32 -> Int32 -> Int32 +#endif -- | Combine two given hash values. 'combine' has zero as a left -- identity. From 3e3d4c53f2a097cfb38461804aa278509642e673 Mon Sep 17 00:00:00 2001 From: Oleg Grenrus Date: Mon, 23 Aug 2021 11:47:51 +0300 Subject: [PATCH 3/3] Use CApiFFI, small tweaks --- CHANGES.md | 6 ++++++ cbits/fnv.c | 17 +++-------------- hashable-bench/hashable-bench.cabal | 9 ++++++--- hashable-bench/include | 1 + hashable.cabal | 5 +++-- include/HsHashable.h | 22 ++++++++++++++++++++++ src/Data/Hashable/Class.hs | 27 +++++++++++++-------------- tests/Regress.hs | 2 +- 8 files changed, 55 insertions(+), 34 deletions(-) create mode 120000 hashable-bench/include create mode 100644 include/HsHashable.h diff --git a/CHANGES.md b/CHANGES.md index 8a55c8f..370776e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,11 @@ See also https://pvp.haskell.org/faq +## Version 1.3.3.0 + + * `Text` hashing uses 64-bit FNV prime + * Don't truncate Text hashvalues on 64bit Windows: + https://github.com/haskell-unordered-containers/hashable/pull/211 + ## Version 1.3.2.0 * Add `Hashable (Fixed a)` for `base <4.7` versions. diff --git a/cbits/fnv.c b/cbits/fnv.c index e8cba05..324a53f 100644 --- a/cbits/fnv.c +++ b/cbits/fnv.c @@ -31,25 +31,14 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "MachDeps.h" -#include - -#if WORD_SIZE_IN_BITS == 64 -#define FNV_PRIME 1099511628211 -#define FNV_SIGNED int64_t -#define FNV_UNSIGNED uint64_t -#else -#define FNV_PRIME 16777619 -#define FNV_SIGNED int32_t -#define FNV_UNSIGNED uint32_t -#endif +#include "HsHashable.h" /* FNV-1 hash * * The FNV-1 hash description: http://isthe.com/chongo/tech/comp/fnv/ * The FNV-1 hash is public domain: http://isthe.com/chongo/tech/comp/fnv/#public_domain */ -FNV_SIGNED hashable_fnv_hash(const unsigned char* str, FNV_SIGNED len, FNV_SIGNED salt) { +FNV_UNSIGNED hashable_fnv_hash(const unsigned char* str, FNV_SIGNED len, FNV_UNSIGNED salt) { FNV_UNSIGNED hash = salt; while (len--) { @@ -62,6 +51,6 @@ FNV_SIGNED hashable_fnv_hash(const unsigned char* str, FNV_SIGNED len, FNV_SIGNE /* Used for ByteArray#s. We can't treat them like pointers in native Haskell, but we can in unsafe FFI calls. */ -FNV_SIGNED hashable_fnv_hash_offset(const unsigned char* str, FNV_SIGNED offset, FNV_SIGNED len, FNV_SIGNED salt) { +FNV_UNSIGNED hashable_fnv_hash_offset(const unsigned char* str, FNV_SIGNED offset, FNV_SIGNED len, FNV_UNSIGNED salt) { return hashable_fnv_hash(str + offset, len, salt); } diff --git a/hashable-bench/hashable-bench.cabal b/hashable-bench/hashable-bench.cabal index 41b6c1b..6378ada 100644 --- a/hashable-bench/hashable-bench.cabal +++ b/hashable-bench/hashable-bench.cabal @@ -32,7 +32,9 @@ tested-with: || ==8.10.4 || ==9.0.1 -extra-source-files: benchmarks/cbits/*.h +extra-source-files: + benchmarks/cbits/*.h + include/HsHashable.h flag integer-gmp description: @@ -61,6 +63,7 @@ library Data.Hashable.Generic.Instances c-sources: cbits/fnv.c + include-dirs: include hs-source-dirs: src build-depends: base >=4.5 && <4.16 @@ -113,12 +116,12 @@ benchmark hashable-benchmark , siphash , text - if impl(ghc -any) + if impl(ghc) build-depends: ghc-prim , text >=0.11.0.5 - if (impl(ghc -any) && flag(integer-gmp)) + if (impl(ghc) && flag(integer-gmp)) build-depends: integer-gmp >=0.2 if impl(ghc >=7.2.1) diff --git a/hashable-bench/include b/hashable-bench/include new file mode 120000 index 0000000..f5030fe --- /dev/null +++ b/hashable-bench/include @@ -0,0 +1 @@ +../include \ No newline at end of file diff --git a/hashable.cabal b/hashable.cabal index 79520af..c02e6d3 100644 --- a/hashable.cabal +++ b/hashable.cabal @@ -1,7 +1,6 @@ cabal-version: 1.12 name: hashable -version: 1.3.2.0 -x-revision: 1 +version: 1.3.3.0 synopsis: A class for types that can be converted to a hash value description: This package defines a class, 'Hashable', for types that @@ -43,6 +42,7 @@ tested-with: extra-source-files: CHANGES.md README.md + include/HsHashable.h flag integer-gmp description: @@ -71,6 +71,7 @@ library Data.Hashable.Generic.Instances c-sources: cbits/fnv.c + include-dirs: include hs-source-dirs: src build-depends: base >=4.5 && <4.17 diff --git a/include/HsHashable.h b/include/HsHashable.h new file mode 100644 index 0000000..9c62fef --- /dev/null +++ b/include/HsHashable.h @@ -0,0 +1,22 @@ +#ifndef HS_HASHABLE_H +#define HS_HASHABLE_H + +#include "MachDeps.h" +#include + +#if WORD_SIZE_IN_BITS == 64 +#define FNV_PRIME 1099511628211 +#define FNV_SIGNED int64_t +#define FNV_UNSIGNED uint64_t +#else +#define FNV_PRIME 16777619 +#define FNV_SIGNED int32_t +#define FNV_UNSIGNED uint32_t +#endif + +uint64_t hs_hashable_init(); + +FNV_UNSIGNED hashable_fnv_hash(const unsigned char* str, FNV_SIGNED len, FNV_UNSIGNED salt); +FNV_UNSIGNED hashable_fnv_hash_offset(const unsigned char* str, FNV_SIGNED offset, FNV_SIGNED len, FNV_UNSIGNED salt); + +#endif diff --git a/src/Data/Hashable/Class.hs b/src/Data/Hashable/Class.hs index eed6ce7..f164eb2 100644 --- a/src/Data/Hashable/Class.hs +++ b/src/Data/Hashable/Class.hs @@ -1,7 +1,7 @@ {-# LANGUAGE BangPatterns, CPP, MagicHash, ScopedTypeVariables, UnliftedFFITypes, DeriveDataTypeable, DefaultSignatures, FlexibleContexts, TypeFamilies, - MultiParamTypeClasses #-} + MultiParamTypeClasses, CApiFFI #-} {-# LANGUAGE Trustworthy #-} @@ -121,11 +121,7 @@ import Data.Typeable.Internal (Typeable, TypeRep (..)) import GHC.Fingerprint.Type(Fingerprint(..)) #endif -#if MIN_VERSION_base(4,5,0) import Foreign.C.Types (CInt(..)) -#else -import Foreign.C.Types (CInt) -#endif #if !(MIN_VERSION_base(4,8,0)) import Data.Word (Word) @@ -208,7 +204,7 @@ initialSeed :: Word64 initialSeed = unsafePerformIO initialSeedC {-# NOINLINE initialSeed #-} -foreign import ccall "hs_hashable_init" initialSeedC :: IO Word64 +foreign import capi "HsHashable.h hs_hashable_init" initialSeedC :: IO Word64 #endif -- | A default salt used in the implementation of 'hash'. @@ -721,6 +717,7 @@ instance Hashable TL.Text where hashThreadId :: ThreadId -> Int hashThreadId (ThreadId t) = hash (fromIntegral (getThreadId t) :: Int) +-- this cannot be capi, as GHC panics. foreign import ccall unsafe "rts_getThreadId" getThreadId :: ThreadId# -> CInt @@ -814,12 +811,11 @@ hashPtrWithSalt p len salt = fromIntegral `fmap` c_hashCString (castPtr p) (fromIntegral len) (fromIntegral salt) +foreign import capi unsafe "HsHashable.h hashable_fnv_hash" c_hashCString #if WORD_SIZE_IN_BITS == 64 -foreign import ccall unsafe "hashable_fnv_hash" c_hashCString - :: CString -> Int64 -> Int64 -> IO Int64 + :: CString -> Int64 -> Int64 -> IO Word64 #else -foreign import ccall unsafe "hashable_fnv_hash" c_hashCString - :: CString -> Int32 -> Int32 -> IO Int32 + :: CString -> Int32 -> Int32 -> IO Word32 #endif -- | Compute a hash value for the content of this 'ByteArray#', @@ -847,12 +843,15 @@ hashByteArrayWithSalt ba !off !len !h = fromIntegral $ c_hashByteArray ba (fromIntegral off) (fromIntegral len) (fromIntegral h) -#if WORD_SIZE_IN_BITS == 64 -foreign import ccall unsafe "hashable_fnv_hash_offset" c_hashByteArray - :: ByteArray# -> Int64 -> Int64 -> Int64 -> Int64 +#if __GLASGOW_HASKELL__ >= 802 +foreign import capi unsafe "HsHashable.h hashable_fnv_hash_offset" c_hashByteArray #else foreign import ccall unsafe "hashable_fnv_hash_offset" c_hashByteArray - :: ByteArray# -> Int32 -> Int32 -> Int32 -> Int32 +#endif +#if WORD_SIZE_IN_BITS == 64 + :: ByteArray# -> Int64 -> Int64 -> Int64 -> Word64 +#else + :: ByteArray# -> Int32 -> Int32 -> Int32 -> Word32 #endif -- | Combine two given hash values. 'combine' has zero as a left diff --git a/tests/Regress.hs b/tests/Regress.hs index 8a591fe..403c41d 100644 --- a/tests/Regress.hs +++ b/tests/Regress.hs @@ -41,7 +41,7 @@ regressions = [] ++ hs @=? nub hs #if WORD_SIZE_IN_BITS == 64 , testCase "64 bit Text" $ do - hash ("hello world" :: Text) @=? 6567282331143050109 + hash ("hello world" :: Text) @=? 2668910425102664189 #endif ] where