From b5d8a696e4c90d608210c22a4911fb4ec76eb273 Mon Sep 17 00:00:00 2001 From: Tom Kaitchuck Date: Sun, 11 Oct 2020 23:13:28 -0700 Subject: [PATCH 01/14] Enable specialization with aHash --- Cargo.toml | 6 +++--- src/map.rs | 33 ++++++++++++++++++--------------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 21bd5c2ce7..4c59080a75 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ edition = "2018" [dependencies] # For the default hasher -ahash = { version = "0.4.4", optional = true, default-features = false } +ahash = { version = "0.5.1", optional = true } # For external trait impls rayon = { version = "1.0", optional = true } @@ -25,7 +25,7 @@ compiler_builtins = { version = "0.1.2", optional = true } alloc = { version = "1.0.0", optional = true, package = "rustc-std-workspace-alloc" } [dev-dependencies] -lazy_static = "1.2" +lazy_static = "1.4" rand = { version = "0.7.3", features = ["small_rng"] } rayon = "1.0" rustc-hash = "=1.0" @@ -36,7 +36,7 @@ doc-comment = "0.3.1" default = ["ahash", "inline-more"] ahash-compile-time-rng = ["ahash/compile-time-rng"] -nightly = [] +nightly = ["ahash/specialize"] rustc-internal-api = [] rustc-dep-of-std = [ "nightly", diff --git a/src/map.rs b/src/map.rs index d83676dcda..b6e1fb63cc 100644 --- a/src/map.rs +++ b/src/map.rs @@ -240,8 +240,16 @@ where #[cfg_attr(feature = "inline-more", inline)] pub(crate) fn make_hash(hash_builder: &impl BuildHasher, val: &K) -> u64 { let mut state = hash_builder.build_hasher(); - val.hash(&mut state); - state.finish() + #[cfg(feature = "ahash")] + { + use ahash::CallHasher; + val.get_hash(state) + } + #[cfg(not(feature = "ahash"))] + { + val.hash(&mut state); + state.finish() + } } #[cfg(feature = "ahash")] @@ -1541,9 +1549,8 @@ impl<'a, K, V, S> RawEntryBuilderMut<'a, K, V, S> { K: Borrow, Q: Hash + Eq, { - let mut hasher = self.map.hash_builder.build_hasher(); - k.hash(&mut hasher); - self.from_key_hashed_nocheck(hasher.finish(), k) + let hash = make_hash(&self.map.hash_builder, k); + self.from_key_hashed_nocheck(hash, k) } /// Creates a `RawEntryMut` from the given key and its hash. @@ -1598,9 +1605,8 @@ impl<'a, K, V, S> RawEntryBuilder<'a, K, V, S> { K: Borrow, Q: Hash + Eq, { - let mut hasher = self.map.hash_builder.build_hasher(); - k.hash(&mut hasher); - self.from_key_hashed_nocheck(hasher.finish(), k) + let hash = make_hash(&self.map.hash_builder, k); + self.from_key_hashed_nocheck(hash, k) } /// Access an entry by a key and its hash. @@ -1957,9 +1963,8 @@ impl<'a, K, V, S> RawVacantEntryMut<'a, K, V, S> { K: Hash, S: BuildHasher, { - let mut hasher = self.hash_builder.build_hasher(); - key.hash(&mut hasher); - self.insert_hashed_nocheck(hasher.finish(), key, value) + let hash = make_hash(self.hash_builder, &key); + self.insert_hashed_nocheck(hash, key, value) } /// Sets the value of the entry with the VacantEntry's key, @@ -2001,11 +2006,9 @@ impl<'a, K, V, S> RawVacantEntryMut<'a, K, V, S> { K: Hash, S: BuildHasher, { - let mut hasher = self.hash_builder.build_hasher(); - key.hash(&mut hasher); - + let hash = make_hash(self.hash_builder, &key); let elem = self.table.insert( - hasher.finish(), + hash, (key, value), make_hasher(self.hash_builder), ); From 71eaa196e52e37e48f8a6be2f10c240cb307c379 Mon Sep 17 00:00:00 2001 From: Tom Kaitchuck Date: Mon, 12 Oct 2020 09:52:43 -0700 Subject: [PATCH 02/14] Prevent some warnings and fix formatting --- src/map.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/map.rs b/src/map.rs index b6e1fb63cc..833f07c74b 100644 --- a/src/map.rs +++ b/src/map.rs @@ -2,7 +2,7 @@ use crate::raw::{Bucket, RawDrain, RawIntoIter, RawIter, RawTable}; use crate::TryReserveError; use core::borrow::Borrow; use core::fmt::{self, Debug}; -use core::hash::{BuildHasher, Hash, Hasher}; +use core::hash::{BuildHasher, Hash}; use core::iter::{FromIterator, FusedIterator}; use core::marker::PhantomData; use core::mem; @@ -239,14 +239,16 @@ where #[cfg_attr(feature = "inline-more", inline)] pub(crate) fn make_hash(hash_builder: &impl BuildHasher, val: &K) -> u64 { - let mut state = hash_builder.build_hasher(); #[cfg(feature = "ahash")] { use ahash::CallHasher; + let state = hash_builder.build_hasher(); val.get_hash(state) } #[cfg(not(feature = "ahash"))] { + use core::hash::Hasher; + let mut state = hash_builder.build_hasher(); val.hash(&mut state); state.finish() } @@ -2007,11 +2009,9 @@ impl<'a, K, V, S> RawVacantEntryMut<'a, K, V, S> { S: BuildHasher, { let hash = make_hash(self.hash_builder, &key); - let elem = self.table.insert( - hash, - (key, value), - make_hasher(self.hash_builder), - ); + let elem = self + .table + .insert(hash, (key, value), make_hasher(self.hash_builder)); RawOccupiedEntryMut { elem, table: self.table, From 67bf6b3f536a3a3d7896aa5015809e7d08d4940e Mon Sep 17 00:00:00 2001 From: Tom Kaitchuck Date: Mon, 12 Oct 2020 12:00:26 -0700 Subject: [PATCH 03/14] Use no-std by default. --- Cargo.toml | 4 ++-- README.md | 21 ++++++++++++--------- src/map.rs | 12 +++++++++++- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4c59080a75..e1a2973f7b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ edition = "2018" [dependencies] # For the default hasher -ahash = { version = "0.5.1", optional = true } +ahash = { version = "0.5.1", default-features = false, optional = true } # For external trait impls rayon = { version = "1.0", optional = true } @@ -34,7 +34,7 @@ doc-comment = "0.3.1" [features] default = ["ahash", "inline-more"] - +std = ["ahash/std", "serde/std"] ahash-compile-time-rng = ["ahash/compile-time-rng"] nightly = ["ahash/specialize"] rustc-internal-api = [] diff --git a/README.md b/README.md index 2e431710fc..38ec1a1202 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ in environments without `std`, such as embedded systems and kernels. ## Features - Drop-in replacement for the standard library `HashMap` and `HashSet` types. -- Uses `AHash` as the default hasher, which is much faster than SipHash. +- Uses [AHash](https://github.com/tkaitchuck/aHash) as the default hasher, which is much faster than SipHash. - Around 2x faster than the previous standard library `HashMap`. - Lower memory usage: only 1 byte of overhead per entry instead of 8. - Compatible with `#[no_std]` (but requires a global allocator with the `alloc` crate). @@ -37,7 +37,7 @@ in environments without `std`, such as embedded systems and kernels. Compared to the previous implementation of `std::collections::HashMap` (Rust 1.35). -With the hashbrown default AHash hasher (not HashDoS-resistant): +With the hashbrown default AHash hasher ([without HashDoS-resistance](#Flags)): ```text name oldstdhash ns/iter hashbrown ns/iter diff ns/iter diff % speedup @@ -96,19 +96,22 @@ use hashbrown::HashMap; let mut map = HashMap::new(); map.insert(1, "one"); ``` - +## Flags This crate has the following Cargo features: +- `inline-more`: Adds inline hints to most functions, improving run-time performance at the cost + of compilation time. (enabled by default) +- `ahash`: Compiles with ahash as default hasher. (enabled by default) +- `std`: Enables use of features that depend on the standard library. + If `ahash` is used this includes using random keys to provide DOS resistance. (disabled by default) +- `ahash-compile-time-rng`: This is an alternative to `std` which still allows for some degree of DOS-resistance. + However, it can result in issues for certain platforms. + See details in [issue#124](https://github.com/rust-lang/hashbrown/issues/124). (disabled by default) - `nightly`: Enables nightly-only features: `#[may_dangle]`. - `serde`: Enables serde serialization support. - `rayon`: Enables rayon parallel iterator support. - `raw`: Enables access to the experimental and unsafe `RawTable` API. -- `inline-more`: Adds inline hints to most functions, improving run-time performance at the cost - of compilation time. (enabled by default) -- `ahash`: Compiles with ahash as default hasher. (enabled by default) -- `ahash-compile-time-rng`: Activates the `compile-time-rng` feature of ahash, to increase the - DOS-resistance, but can result in issues for `no_std` builds. More details in - [issue#124](https://github.com/rust-lang/hashbrown/issues/124). (enabled by default) + ## License diff --git a/src/map.rs b/src/map.rs index 833f07c74b..4abcbf7a10 100644 --- a/src/map.rs +++ b/src/map.rs @@ -9,9 +9,19 @@ use core::mem; use core::ops::Index; /// Default hasher for `HashMap`. -#[cfg(feature = "ahash")] +#[cfg(all( + feature = "ahash", + any(feature = "std", feature = "ahash-compile-time-rng") +))] pub type DefaultHashBuilder = ahash::RandomState; +/// Default hasher for `HashMap`. +#[cfg(all( + feature = "ahash", + not(any(feature = "std", feature = "ahash-compile-time-rng")) +))] +pub type DefaultHashBuilder = core::hash::BuildHasherDefault; + /// Dummy default hasher for `HashMap`. #[cfg(not(feature = "ahash"))] pub enum DefaultHashBuilder {} From 5563b42aebef8788b07dbf0d93c42ebacf3d9265 Mon Sep 17 00:00:00 2001 From: Tom Kaitchuck Date: Tue, 20 Oct 2020 09:39:01 -0700 Subject: [PATCH 04/14] PR feedback --- Cargo.toml | 4 ++-- README.md | 14 +++++++------- src/map.rs | 13 +------------ 3 files changed, 10 insertions(+), 21 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e1a2973f7b..f541180855 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ edition = "2018" [dependencies] # For the default hasher -ahash = { version = "0.5.1", default-features = false, optional = true } +ahash = { version = "0.5.4", default-features = false, optional = true } # For external trait impls rayon = { version = "1.0", optional = true } @@ -34,7 +34,7 @@ doc-comment = "0.3.1" [features] default = ["ahash", "inline-more"] -std = ["ahash/std", "serde/std"] +ahash-run-time-rng = ["ahash/std"] ahash-compile-time-rng = ["ahash/compile-time-rng"] nightly = ["ahash/specialize"] rustc-internal-api = [] diff --git a/README.md b/README.md index 38ec1a1202..99a930b8e0 100644 --- a/README.md +++ b/README.md @@ -102,17 +102,17 @@ This crate has the following Cargo features: - `inline-more`: Adds inline hints to most functions, improving run-time performance at the cost of compilation time. (enabled by default) - `ahash`: Compiles with ahash as default hasher. (enabled by default) -- `std`: Enables use of features that depend on the standard library. - If `ahash` is used this includes using random keys to provide DOS resistance. (disabled by default) -- `ahash-compile-time-rng`: This is an alternative to `std` which still allows for some degree of DOS-resistance. - However, it can result in issues for certain platforms. - See details in [issue#124](https://github.com/rust-lang/hashbrown/issues/124). (disabled by default) -- `nightly`: Enables nightly-only features: `#[may_dangle]`. +- `ahash-run-time-rng`: Uses randomly generated keys for each hashmap to provide DOS resistance. + This requires the standard library. (disabled by default) +- `ahash-compile-time-rng`: This is an alternative to `ahash-run-time-rng` that works by pre-generating keys at + compile time and embedding them as constants. The avoids the dependency on the standard library but means the + binary will be slightly different each time it is compiled. (disabled by default) +- `nightly`: Enables nightly-only features including: `#[may_dangle]` and specialization to improve performance hashing + primitive types in aHash (if `ahash` is enabled). - `serde`: Enables serde serialization support. - `rayon`: Enables rayon parallel iterator support. - `raw`: Enables access to the experimental and unsafe `RawTable` API. - ## License Licensed under either of: diff --git a/src/map.rs b/src/map.rs index 4abcbf7a10..fc6a3d96ae 100644 --- a/src/map.rs +++ b/src/map.rs @@ -9,19 +9,8 @@ use core::mem; use core::ops::Index; /// Default hasher for `HashMap`. -#[cfg(all( - feature = "ahash", - any(feature = "std", feature = "ahash-compile-time-rng") -))] pub type DefaultHashBuilder = ahash::RandomState; -/// Default hasher for `HashMap`. -#[cfg(all( - feature = "ahash", - not(any(feature = "std", feature = "ahash-compile-time-rng")) -))] -pub type DefaultHashBuilder = core::hash::BuildHasherDefault; - /// Dummy default hasher for `HashMap`. #[cfg(not(feature = "ahash"))] pub enum DefaultHashBuilder {} @@ -249,7 +238,7 @@ where #[cfg_attr(feature = "inline-more", inline)] pub(crate) fn make_hash(hash_builder: &impl BuildHasher, val: &K) -> u64 { - #[cfg(feature = "ahash")] + #[cfg(feature = "ahash")] //This enables specialization to improve performance on primitive types { use ahash::CallHasher; let state = hash_builder.build_hasher(); From 64c31fea3971d03ede51815ca96a69e594089034 Mon Sep 17 00:00:00 2001 From: Tom Kaitchuck Date: Tue, 20 Oct 2020 10:13:46 -0700 Subject: [PATCH 05/14] Fix warnings --- src/map.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/map.rs b/src/map.rs index fc6a3d96ae..ebd13c0085 100644 --- a/src/map.rs +++ b/src/map.rs @@ -9,6 +9,7 @@ use core::mem; use core::ops::Index; /// Default hasher for `HashMap`. +#[cfg(feature = "ahash")] pub type DefaultHashBuilder = ahash::RandomState; /// Dummy default hasher for `HashMap`. @@ -238,8 +239,9 @@ where #[cfg_attr(feature = "inline-more", inline)] pub(crate) fn make_hash(hash_builder: &impl BuildHasher, val: &K) -> u64 { - #[cfg(feature = "ahash")] //This enables specialization to improve performance on primitive types + #[cfg(feature = "ahash")] { + //This enables specialization to improve performance on primitive types use ahash::CallHasher; let state = hash_builder.build_hasher(); val.get_hash(state) From 383ff52c37a35df88c24a9ff785ee78320f32608 Mon Sep 17 00:00:00 2001 From: Tom Kaitchuck Date: Tue, 20 Oct 2020 10:48:11 -0700 Subject: [PATCH 06/14] Update aHash version to work around lack of atomics --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index f541180855..773ae71fd8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ edition = "2018" [dependencies] # For the default hasher -ahash = { version = "0.5.4", default-features = false, optional = true } +ahash = { version = "0.5.5", default-features = false, optional = true } # For external trait impls rayon = { version = "1.0", optional = true } From 29f39aaa8a6d5d05900b5cf0e1549fe4e49eade2 Mon Sep 17 00:00:00 2001 From: Tom Kaitchuck Date: Wed, 21 Oct 2020 00:11:57 -0700 Subject: [PATCH 07/14] Update ahash version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 773ae71fd8..e0ec0ca805 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ edition = "2018" [dependencies] # For the default hasher -ahash = { version = "0.5.5", default-features = false, optional = true } +ahash = { version = "0.5.6", default-features = false, optional = true } # For external trait impls rayon = { version = "1.0", optional = true } From 40e7e089a5a767798c2e84ecdbcc3319f3225b97 Mon Sep 17 00:00:00 2001 From: Tom Kaitchuck Date: Tue, 24 Nov 2020 21:01:33 -0800 Subject: [PATCH 08/14] Change to aHash 0.6.1 and make key type explicet --- Cargo.toml | 5 ++-- README.md | 21 ++++++-------- src/map.rs | 85 +++++++++++++++++++++++++++++++++++------------------- 3 files changed, 67 insertions(+), 44 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e0ec0ca805..3c7467ec98 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ edition = "2018" [dependencies] # For the default hasher -ahash = { version = "0.5.6", default-features = false, optional = true } +ahash = { version = "0.6.1", default-features = false, optional = true } # For external trait impls rayon = { version = "1.0", optional = true } @@ -34,9 +34,8 @@ doc-comment = "0.3.1" [features] default = ["ahash", "inline-more"] -ahash-run-time-rng = ["ahash/std"] ahash-compile-time-rng = ["ahash/compile-time-rng"] -nightly = ["ahash/specialize"] +nightly = [] rustc-internal-api = [] rustc-dep-of-std = [ "nightly", diff --git a/README.md b/README.md index 99a930b8e0..0da616cf90 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ in environments without `std`, such as embedded systems and kernels. Compared to the previous implementation of `std::collections::HashMap` (Rust 1.35). -With the hashbrown default AHash hasher ([without HashDoS-resistance](#Flags)): +With the hashbrown default AHash hasher: ```text name oldstdhash ns/iter hashbrown ns/iter diff ns/iter diff % speedup @@ -58,7 +58,7 @@ With the hashbrown default AHash hasher ([without HashDoS-resistance](#Flags)): lookup_fail_ahash_serial 4,902 3,240 -1,662 -33.90% x 1.51 ``` -With the libstd default SipHash hasher (HashDoS-resistant): +With the libstd default SipHash hasher: ```text name oldstdhash ns/iter hashbrown ns/iter diff ns/iter diff % speedup @@ -99,19 +99,16 @@ map.insert(1, "one"); ## Flags This crate has the following Cargo features: -- `inline-more`: Adds inline hints to most functions, improving run-time performance at the cost - of compilation time. (enabled by default) -- `ahash`: Compiles with ahash as default hasher. (enabled by default) -- `ahash-run-time-rng`: Uses randomly generated keys for each hashmap to provide DOS resistance. - This requires the standard library. (disabled by default) -- `ahash-compile-time-rng`: This is an alternative to `ahash-run-time-rng` that works by pre-generating keys at - compile time and embedding them as constants. The avoids the dependency on the standard library but means the - binary will be slightly different each time it is compiled. (disabled by default) -- `nightly`: Enables nightly-only features including: `#[may_dangle]` and specialization to improve performance hashing - primitive types in aHash (if `ahash` is enabled). +- `nightly`: Enables nightly-only features including: `#[may_dangle]` - `serde`: Enables serde serialization support. - `rayon`: Enables rayon parallel iterator support. - `raw`: Enables access to the experimental and unsafe `RawTable` API. +- `inline-more`: Adds inline hints to most functions, improving run-time performance at the cost + of compilation time. (enabled by default) +- `ahash`: Compiles with ahash as default hasher. (enabled by default) +- `ahash-compile-time-rng`: Activates the `compile-time-rng` feature of ahash. For targets with no random number generator +this pre-generates seeds at compile time and embeds them as constants. See [aHash's documentation](https://github.com/tkaitchuck/aHash#flags) (disabled by default) + ## License diff --git a/src/map.rs b/src/map.rs index ebd13c0085..d6a191712e 100644 --- a/src/map.rs +++ b/src/map.rs @@ -209,10 +209,13 @@ impl Clone for HashMap { /// Ensures that a single closure type across uses of this which, in turn prevents multiple /// instances of any functions like RawTable::reserve from being generated #[cfg_attr(feature = "inline-more", inline)] -pub(crate) fn make_hasher( - hash_builder: &impl BuildHasher, -) -> impl Fn(&(K, V)) -> u64 + '_ { - move |val| make_hash(hash_builder, &val.0) +pub(crate) fn make_hasher(hash_builder: &S) -> impl Fn(&(Q, V)) -> u64 + '_ +where + K: Borrow, + Q: Hash, + S: BuildHasher +{ + move |val| make_hash::(hash_builder, &val.0) } /// Ensures that a single closure type across uses of this which, in turn prevents multiple @@ -238,13 +241,18 @@ where } #[cfg_attr(feature = "inline-more", inline)] -pub(crate) fn make_hash(hash_builder: &impl BuildHasher, val: &K) -> u64 { +pub(crate) fn make_hash(hash_builder: &S, val: &Q) -> u64 + where + K: Borrow, + Q: Hash + ?Sized, + S: BuildHasher +{ #[cfg(feature = "ahash")] { //This enables specialization to improve performance on primitive types use ahash::CallHasher; let state = hash_builder.build_hasher(); - val.get_hash(state) + Q::get_hash(val, state) } #[cfg(not(feature = "ahash"))] { @@ -255,6 +263,28 @@ pub(crate) fn make_hash(hash_builder: &impl BuildHasher, val: } } +#[cfg_attr(feature = "inline-more", inline)] +pub(crate) fn make_insert_hash(hash_builder: &S, val: &K) -> u64 + where + K: Hash, + S: BuildHasher +{ + #[cfg(feature = "ahash")] + { + //This enables specialization to improve performance on primitive types + use ahash::CallHasher; + let state = hash_builder.build_hasher(); + K::get_hash(val, state) + } + #[cfg(not(feature = "ahash"))] + { + use core::hash::Hasher; + let mut state = hash_builder.build_hasher(); + val.hash(&mut state); + state.finish() + } +} + #[cfg(feature = "ahash")] impl HashMap { /// Creates an empty `HashMap`. @@ -706,7 +736,7 @@ where #[cfg_attr(feature = "inline-more", inline)] pub fn reserve(&mut self, additional: usize) { self.table - .reserve(additional, make_hasher(&self.hash_builder)); + .reserve(additional, make_hasher::(&self.hash_builder)); } /// Tries to reserve capacity for at least `additional` more elements to be inserted @@ -728,7 +758,7 @@ where #[cfg_attr(feature = "inline-more", inline)] pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { self.table - .try_reserve(additional, make_hasher(&self.hash_builder)) + .try_reserve(additional, make_hasher::(&self.hash_builder)) } /// Shrinks the capacity of the map as much as possible. It will drop @@ -749,7 +779,7 @@ where /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn shrink_to_fit(&mut self) { - self.table.shrink_to(0, make_hasher(&self.hash_builder)); + self.table.shrink_to(0, make_hasher::(&self.hash_builder)); } /// Shrinks the capacity of the map with a lower limit. It will drop @@ -778,7 +808,7 @@ where #[cfg_attr(feature = "inline-more", inline)] pub fn shrink_to(&mut self, min_capacity: usize) { self.table - .shrink_to(min_capacity, make_hasher(&self.hash_builder)); + .shrink_to(min_capacity, make_hasher::(&self.hash_builder)); } /// Gets the given key's corresponding entry in the map for in-place manipulation. @@ -802,7 +832,7 @@ where /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn entry(&mut self, key: K) -> Entry<'_, K, V, S> { - let hash = make_hash(&self.hash_builder, &key); + let hash = make_insert_hash::(&self.hash_builder, &key); if let Some(elem) = self.table.find(hash, equivalent_key(&key)) { Entry::Occupied(OccupiedEntry { hash, @@ -889,7 +919,7 @@ where K: Borrow, Q: Hash + Eq, { - let hash = make_hash(&self.hash_builder, k); + let hash = make_hash::(&self.hash_builder, k); self.table.get(hash, equivalent_key(k)) } @@ -997,7 +1027,7 @@ where K: Borrow, Q: Hash + Eq, { - let hash = make_hash(&self.hash_builder, k); + let hash = make_hash::(&self.hash_builder, k); self.table.get_mut(hash, equivalent_key(k)) } @@ -1028,12 +1058,12 @@ where /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn insert(&mut self, k: K, v: V) -> Option { - let hash = make_hash(&self.hash_builder, &k); + let hash = make_insert_hash::(&self.hash_builder, &k); if let Some((_, item)) = self.table.get_mut(hash, equivalent_key(&k)) { Some(mem::replace(item, v)) } else { self.table - .insert(hash, (k, v), make_hasher(&self.hash_builder)); + .insert(hash, (k, v), make_hasher::(&self.hash_builder)); None } } @@ -1097,7 +1127,7 @@ where K: Borrow, Q: Hash + Eq, { - let hash = make_hash(&self.hash_builder, &k); + let hash = make_hash::(&self.hash_builder, k); self.table.remove_entry(hash, equivalent_key(k)) } } @@ -1158,6 +1188,7 @@ impl HashMap { pub fn raw_entry(&self) -> RawEntryBuilder<'_, K, V, S> { RawEntryBuilder { map: self } } + } impl PartialEq for HashMap @@ -1552,7 +1583,7 @@ impl<'a, K, V, S> RawEntryBuilderMut<'a, K, V, S> { K: Borrow, Q: Hash + Eq, { - let hash = make_hash(&self.map.hash_builder, k); + let hash = make_hash::(&self.map.hash_builder, k); self.from_key_hashed_nocheck(hash, k) } @@ -1608,7 +1639,7 @@ impl<'a, K, V, S> RawEntryBuilder<'a, K, V, S> { K: Borrow, Q: Hash + Eq, { - let hash = make_hash(&self.map.hash_builder, k); + let hash = make_hash::(&self.map.hash_builder, k); self.from_key_hashed_nocheck(hash, k) } @@ -1966,7 +1997,7 @@ impl<'a, K, V, S> RawVacantEntryMut<'a, K, V, S> { K: Hash, S: BuildHasher, { - let hash = make_hash(self.hash_builder, &key); + let hash = make_insert_hash::(self.hash_builder, &key); self.insert_hashed_nocheck(hash, key, value) } @@ -1981,7 +2012,7 @@ impl<'a, K, V, S> RawVacantEntryMut<'a, K, V, S> { { let &mut (ref mut k, ref mut v) = self.table - .insert_entry(hash, (key, value), make_hasher(self.hash_builder)); + .insert_entry(hash, (key, value), make_hasher::(self.hash_builder)); (k, v) } @@ -2009,10 +2040,10 @@ impl<'a, K, V, S> RawVacantEntryMut<'a, K, V, S> { K: Hash, S: BuildHasher, { - let hash = make_hash(self.hash_builder, &key); + let hash = make_insert_hash::(self.hash_builder, &key); let elem = self .table - .insert(hash, (key, value), make_hasher(self.hash_builder)); + .insert(hash, (key, value), make_hasher::(self.hash_builder)); RawOccupiedEntryMut { elem, table: self.table, @@ -3009,7 +3040,7 @@ impl<'a, K, V, S> VacantEntry<'a, K, V, S> { let entry = table.insert_entry( self.hash, (self.key, value), - make_hasher(&self.table.hash_builder), + make_hasher::(&self.table.hash_builder), ); &mut entry.1 } @@ -3023,7 +3054,7 @@ impl<'a, K, V, S> VacantEntry<'a, K, V, S> { let elem = self.table.table.insert( self.hash, (self.key, value), - make_hasher(&self.table.hash_builder), + make_hasher::(&self.table.hash_builder), ); OccupiedEntry { hash: self.hash, @@ -4322,11 +4353,7 @@ mod test_map { let mut map: HashMap<_, _> = xs.iter().cloned().collect(); let compute_hash = |map: &HashMap, k: i32| -> u64 { - use core::hash::{BuildHasher, Hash, Hasher}; - - let mut hasher = map.hasher().build_hasher(); - k.hash(&mut hasher); - hasher.finish() + super::make_insert_hash::(map.hasher(), &k) }; // Existing key (insert) From 78ec5523502c1d5e1cb7d7c992bd8e701708e3a8 Mon Sep 17 00:00:00 2001 From: Tom Kaitchuck Date: Tue, 24 Nov 2020 22:50:38 -0800 Subject: [PATCH 09/14] Update benchmarks --- README.md | 71 +++++++++++++++++++++++++++---------------------------- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 0da616cf90..bd6e44f44f 100644 --- a/README.md +++ b/README.md @@ -39,45 +39,44 @@ Compared to the previous implementation of `std::collections::HashMap` (Rust 1.3 With the hashbrown default AHash hasher: -```text - name oldstdhash ns/iter hashbrown ns/iter diff ns/iter diff % speedup - insert_ahash_highbits 20,846 7,397 -13,449 -64.52% x 2.82 - insert_ahash_random 20,515 7,796 -12,719 -62.00% x 2.63 - insert_ahash_serial 21,668 7,264 -14,404 -66.48% x 2.98 - insert_erase_ahash_highbits 29,570 17,498 -12,072 -40.83% x 1.69 - insert_erase_ahash_random 39,569 17,474 -22,095 -55.84% x 2.26 - insert_erase_ahash_serial 32,073 17,332 -14,741 -45.96% x 1.85 - iter_ahash_highbits 1,572 2,087 515 32.76% x 0.75 - iter_ahash_random 1,609 2,074 465 28.90% x 0.78 - iter_ahash_serial 2,293 2,120 -173 -7.54% x 1.08 - lookup_ahash_highbits 3,460 4,403 943 27.25% x 0.79 - lookup_ahash_random 6,377 3,911 -2,466 -38.67% x 1.63 - lookup_ahash_serial 3,629 3,586 -43 -1.18% x 1.01 - lookup_fail_ahash_highbits 5,286 3,411 -1,875 -35.47% x 1.55 - lookup_fail_ahash_random 12,365 4,171 -8,194 -66.27% x 2.96 - lookup_fail_ahash_serial 4,902 3,240 -1,662 -33.90% x 1.51 -``` +| name | oldstdhash ns/iter | hashbrown ns/iter | diff ns/iter | diff % | speedup | +|:------------------------|:-------------------:|------------------:|:------------:|---------:|---------| +| insert_ahash_highbits | 18,865 | 8,020 | -10,845 | -57.49% | x 2.35 | +| insert_ahash_random | 19,711 | 8,019 | -11,692 | -59.32% | x 2.46 | +| insert_ahash_serial | 19,365 | 6,463 | -12,902 | -66.63% | x 3.00 | +| insert_erase_ahash_highbits | 51,136 | 17,916 | -33,220 | -64.96% | x 2.85 | +| insert_erase_ahash_random | 51,157 | 17,688 | -33,469 | -65.42% | x 2.89 | +| insert_erase_ahash_serial | 45,479 | 14,895 | -30,584 | -67.25% | x 3.05 | +| iter_ahash_highbits | 1,399 | 1,092 | -307 | -21.94% | x 1.28 | +| iter_ahash_random | 1,586 | 1,059 | -527 | -33.23% | x 1.50 | +| iter_ahash_serial | 3,168 | 1,079 | -2,089 | -65.94% | x 2.94 | +| lookup_ahash_highbits | 32,351 | 4,792 | -27,559 | -85.19% | x 6.75 | +| lookup_ahash_random | 17,419 | 4,817 | -12,602 | -72.35% | x 3.62 | +| lookup_ahash_serial | 15,254 | 3,606 | -11,648 | -76.36% | x 4.23 | +| lookup_fail_ahash_highbits | 21,187 | 4,369 | -16,818 | -79.38% | x 4.85 | +| lookup_fail_ahash_random | 21,550 | 4,395 | -17,155 | -79.61% | x 4.90 | +| lookup_fail_ahash_serial | 19,450 | 3,176 | -16,274 | -83.67% | x 6.12 | + With the libstd default SipHash hasher: -```text - name oldstdhash ns/iter hashbrown ns/iter diff ns/iter diff % speedup - insert_std_highbits 32,598 20,199 -12,399 -38.04% x 1.61 - insert_std_random 29,824 20,760 -9,064 -30.39% x 1.44 - insert_std_serial 33,151 17,256 -15,895 -47.95% x 1.92 - insert_erase_std_highbits 74,731 48,735 -25,996 -34.79% x 1.53 - insert_erase_std_random 73,828 47,649 -26,179 -35.46% x 1.55 - insert_erase_std_serial 73,864 40,147 -33,717 -45.65% x 1.84 - iter_std_highbits 1,518 2,264 746 49.14% x 0.67 - iter_std_random 1,502 2,414 912 60.72% x 0.62 - iter_std_serial 6,361 2,118 -4,243 -66.70% x 3.00 - lookup_std_highbits 21,705 16,962 -4,743 -21.85% x 1.28 - lookup_std_random 21,654 17,158 -4,496 -20.76% x 1.26 - lookup_std_serial 18,726 14,509 -4,217 -22.52% x 1.29 - lookup_fail_std_highbits 25,852 17,323 -8,529 -32.99% x 1.49 - lookup_fail_std_random 25,913 17,760 -8,153 -31.46% x 1.46 - lookup_fail_std_serial 22,648 14,839 -7,809 -34.48% x 1.53 -``` +|name | oldstdhash ns/iter | hashbrown ns/iter | diff ns/iter | diff % | speedup | +|:------------------------|:-------------------:|------------------:|:------------:|---------:|---------| +|insert_std_highbits |19,216 |16,885 | -2,331 | -12.13% | x 1.14 | +|insert_std_random |19,179 |17,034 | -2,145 | -11.18% | x 1.13 | +|insert_std_serial |19,462 |17,493 | -1,969 | -10.12% | x 1.11 | +|insert_erase_std_highbits |50,825 |35,847 | -14,978 | -29.47% | x 1.42 | +|insert_erase_std_random |51,448 |35,392 | -16,056 | -31.21% | x 1.45 | +|insert_erase_std_serial |87,711 |38,091 | -49,620 | -56.57% | x 2.30 | +|iter_std_highbits |1,378 |1,159 | -219 | -15.89% | x 1.19 | +|iter_std_random |1,395 |1,132 | -263 | -18.85% | x 1.23 | +|iter_std_serial |1,704 |1,105 | -599 | -35.15% | x 1.54 | +|lookup_std_highbits |17,195 |13,642 | -3,553 | -20.66% | x 1.26 | +|lookup_std_random |17,181 |13,773 | -3,408 | -19.84% | x 1.25 | +|lookup_std_serial |15,483 |13,651 | -1,832 | -11.83% | x 1.13 | +|lookup_fail_std_highbits |20,926 |13,474 | -7,452 | -35.61% | x 1.55 | +|lookup_fail_std_random |21,766 |13,505 | -8,261 | -37.95% | x 1.61 | +|lookup_fail_std_serial |19,336 |13,519 | -5,817 | -30.08% | x 1.43 | ## Usage From d5b5a1af0725de99ce6ed9ad91b82b50f225ae30 Mon Sep 17 00:00:00 2001 From: Tom Kaitchuck Date: Tue, 24 Nov 2020 22:58:29 -0800 Subject: [PATCH 10/14] Minor cleanup --- Cargo.toml | 1 + README.md | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3c7467ec98..a2cb059fc6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,6 +34,7 @@ doc-comment = "0.3.1" [features] default = ["ahash", "inline-more"] + ahash-compile-time-rng = ["ahash/compile-time-rng"] nightly = [] rustc-internal-api = [] diff --git a/README.md b/README.md index bd6e44f44f..ba7d0522cd 100644 --- a/README.md +++ b/README.md @@ -98,17 +98,16 @@ map.insert(1, "one"); ## Flags This crate has the following Cargo features: -- `nightly`: Enables nightly-only features including: `#[may_dangle]` +- `nightly`: Enables nightly-only features including: `#[may_dangle]`. - `serde`: Enables serde serialization support. - `rayon`: Enables rayon parallel iterator support. - `raw`: Enables access to the experimental and unsafe `RawTable` API. - `inline-more`: Adds inline hints to most functions, improving run-time performance at the cost - of compilation time. (enabled by default) + of compilation time. (enabled by default) - `ahash`: Compiles with ahash as default hasher. (enabled by default) - `ahash-compile-time-rng`: Activates the `compile-time-rng` feature of ahash. For targets with no random number generator this pre-generates seeds at compile time and embeds them as constants. See [aHash's documentation](https://github.com/tkaitchuck/aHash#flags) (disabled by default) - ## License Licensed under either of: From 5d0775159f0bd1f764107e33fddfac0245e4b392 Mon Sep 17 00:00:00 2001 From: Tom Kaitchuck Date: Tue, 24 Nov 2020 23:08:08 -0800 Subject: [PATCH 11/14] Cargo fmt --- src/map.rs | 60 +++++++++++++++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/src/map.rs b/src/map.rs index 32f3e4eaa5..2dbc94cc1a 100644 --- a/src/map.rs +++ b/src/map.rs @@ -213,7 +213,7 @@ pub(crate) fn make_hasher(hash_builder: &S) -> impl Fn(&(Q, V)) -> u where K: Borrow, Q: Hash, - S: BuildHasher + S: BuildHasher, { move |val| make_hash::(hash_builder, &val.0) } @@ -242,10 +242,10 @@ where #[cfg_attr(feature = "inline-more", inline)] pub(crate) fn make_hash(hash_builder: &S, val: &Q) -> u64 - where - K: Borrow, - Q: Hash + ?Sized, - S: BuildHasher +where + K: Borrow, + Q: Hash + ?Sized, + S: BuildHasher, { #[cfg(feature = "ahash")] { @@ -265,24 +265,24 @@ pub(crate) fn make_hash(hash_builder: &S, val: &Q) -> u64 #[cfg_attr(feature = "inline-more", inline)] pub(crate) fn make_insert_hash(hash_builder: &S, val: &K) -> u64 - where - K: Hash, - S: BuildHasher +where + K: Hash, + S: BuildHasher, { #[cfg(feature = "ahash")] - { - //This enables specialization to improve performance on primitive types - use ahash::CallHasher; - let state = hash_builder.build_hasher(); - K::get_hash(val, state) - } + { + //This enables specialization to improve performance on primitive types + use ahash::CallHasher; + let state = hash_builder.build_hasher(); + K::get_hash(val, state) + } #[cfg(not(feature = "ahash"))] - { - use core::hash::Hasher; - let mut state = hash_builder.build_hasher(); - val.hash(&mut state); - state.finish() - } + { + use core::hash::Hasher; + let mut state = hash_builder.build_hasher(); + val.hash(&mut state); + state.finish() + } } #[cfg(feature = "ahash")] @@ -860,7 +860,8 @@ where /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn shrink_to_fit(&mut self) { - self.table.shrink_to(0, make_hasher::(&self.hash_builder)); + self.table + .shrink_to(0, make_hasher::(&self.hash_builder)); } /// Shrinks the capacity of the map with a lower limit. It will drop @@ -1269,7 +1270,6 @@ impl HashMap { pub fn raw_entry(&self) -> RawEntryBuilder<'_, K, V, S, A> { RawEntryBuilder { map: self } } - } impl PartialEq for HashMap @@ -2098,9 +2098,11 @@ impl<'a, K, V, S, A: AllocRef + Clone> RawVacantEntryMut<'a, K, V, S, A> { K: Hash, S: BuildHasher, { - let &mut (ref mut k, ref mut v) = - self.table - .insert_entry(hash, (key, value), make_hasher::(self.hash_builder)); + let &mut (ref mut k, ref mut v) = self.table.insert_entry( + hash, + (key, value), + make_hasher::(self.hash_builder), + ); (k, v) } @@ -2129,9 +2131,11 @@ impl<'a, K, V, S, A: AllocRef + Clone> RawVacantEntryMut<'a, K, V, S, A> { S: BuildHasher, { let hash = make_insert_hash::(self.hash_builder, &key); - let elem = self - .table - .insert(hash, (key, value), make_hasher::(self.hash_builder)); + let elem = self.table.insert( + hash, + (key, value), + make_hasher::(self.hash_builder), + ); RawOccupiedEntryMut { elem, table: self.table, From 1e4b95a4ac3bfd168b4253d21074bf764965c616 Mon Sep 17 00:00:00 2001 From: Tom Kaitchuck Date: Tue, 24 Nov 2020 23:17:59 -0800 Subject: [PATCH 12/14] Clean up after merge. --- src/rustc_entry.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rustc_entry.rs b/src/rustc_entry.rs index 7290f9c484..8de4086b00 100644 --- a/src/rustc_entry.rs +++ b/src/rustc_entry.rs @@ -1,5 +1,5 @@ use self::RustcEntry::*; -use crate::map::{make_hash, Drain, HashMap, IntoIter, Iter, IterMut}; +use crate::map::{Drain, HashMap, IntoIter, Iter, IterMut, make_insert_hash}; use crate::raw::{AllocRef, Bucket, Global, RawTable}; use core::fmt::{self, Debug}; use core::hash::{BuildHasher, Hash}; @@ -32,7 +32,7 @@ where /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn rustc_entry(&mut self, key: K) -> RustcEntry<'_, K, V, A> { - let hash = make_hash(&self.hash_builder, &key); + let hash = make_insert_hash(&self.hash_builder, &key); if let Some(elem) = self.table.find(hash, |q| q.0.eq(&key)) { RustcEntry::Occupied(RustcOccupiedEntry { key: Some(key), From d9053f8cf9f677ba60ee5b028e41a50fd99486ce Mon Sep 17 00:00:00 2001 From: Tom Kaitchuck Date: Tue, 24 Nov 2020 23:46:38 -0800 Subject: [PATCH 13/14] Fix build error --- src/map.rs | 8 +++++--- src/rustc_entry.rs | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/map.rs b/src/map.rs index 2dbc94cc1a..86bbdc4e35 100644 --- a/src/map.rs +++ b/src/map.rs @@ -4636,9 +4636,11 @@ mod test_map { left -= 1; } else { assert!(removed.contains(&(i, 2 * i)), "{} not in {:?}", i, removed); - let e = m - .table - .insert(hash, (i, 2 * i), super::make_hasher(&hasher)); + let e = m.table.insert( + hash, + (i, 2 * i), + super::make_hasher::(&hasher), + ); it.reflect_insert(&e); if let Some(p) = removed.iter().position(|e| e == &(i, 2 * i)) { removed.swap_remove(p); diff --git a/src/rustc_entry.rs b/src/rustc_entry.rs index 8de4086b00..96788fb118 100644 --- a/src/rustc_entry.rs +++ b/src/rustc_entry.rs @@ -1,5 +1,5 @@ use self::RustcEntry::*; -use crate::map::{Drain, HashMap, IntoIter, Iter, IterMut, make_insert_hash}; +use crate::map::{make_insert_hash, Drain, HashMap, IntoIter, Iter, IterMut}; use crate::raw::{AllocRef, Bucket, Global, RawTable}; use core::fmt::{self, Debug}; use core::hash::{BuildHasher, Hash}; From f1d3137f836f91801be875a52f6a2ed1c7cce8d6 Mon Sep 17 00:00:00 2001 From: Tom Kaitchuck Date: Wed, 9 Dec 2020 18:33:59 -0800 Subject: [PATCH 14/14] Clean up after merge --- src/rustc_entry.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rustc_entry.rs b/src/rustc_entry.rs index fa79887acd..1793c4a600 100644 --- a/src/rustc_entry.rs +++ b/src/rustc_entry.rs @@ -1,6 +1,6 @@ use self::RustcEntry::*; use crate::map::{make_insert_hash, Drain, HashMap, IntoIter, Iter, IterMut}; -use crate::raw::{AllocRef, Bucket, Global, RawTable}; +use crate::raw::{Allocator, Bucket, Global, RawTable}; use core::fmt::{self, Debug}; use core::hash::{BuildHasher, Hash}; use core::mem;