diff --git a/Cargo.toml b/Cargo.toml index 024da844af..72e40ba490 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ edition = "2018" [dependencies] # For the default hasher -ahash = { version = "0.4.4", optional = true, default-features = false } +ahash = { version = "0.6.1", default-features = false, optional = true } # For external trait impls rayon = { version = "1.0", optional = true } @@ -25,7 +25,7 @@ compiler_builtins = { version = "0.1.2", optional = true } alloc = { version = "1.0.0", optional = true, package = "rustc-std-workspace-alloc" } [dev-dependencies] -lazy_static = "1.2" +lazy_static = "1.4" rand = { version = "0.7.3", features = ["small_rng"] } rayon = "1.0" fnv = "1.0.7" diff --git a/README.md b/README.md index 2e431710fc..ba7d0522cd 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ in environments without `std`, such as embedded systems and kernels. ## Features - Drop-in replacement for the standard library `HashMap` and `HashSet` types. -- Uses `AHash` as the default hasher, which is much faster than SipHash. +- Uses [AHash](https://github.com/tkaitchuck/aHash) as the default hasher, which is much faster than SipHash. - Around 2x faster than the previous standard library `HashMap`. - Lower memory usage: only 1 byte of overhead per entry instead of 8. - Compatible with `#[no_std]` (but requires a global allocator with the `alloc` crate). @@ -37,47 +37,46 @@ in environments without `std`, such as embedded systems and kernels. Compared to the previous implementation of `std::collections::HashMap` (Rust 1.35). -With the hashbrown default AHash hasher (not HashDoS-resistant): - -```text - name oldstdhash ns/iter hashbrown ns/iter diff ns/iter diff % speedup - insert_ahash_highbits 20,846 7,397 -13,449 -64.52% x 2.82 - insert_ahash_random 20,515 7,796 -12,719 -62.00% x 2.63 - insert_ahash_serial 21,668 7,264 -14,404 -66.48% x 2.98 - insert_erase_ahash_highbits 29,570 17,498 -12,072 -40.83% x 1.69 - insert_erase_ahash_random 39,569 17,474 -22,095 -55.84% x 2.26 - insert_erase_ahash_serial 32,073 17,332 -14,741 -45.96% x 1.85 - iter_ahash_highbits 1,572 2,087 515 32.76% x 0.75 - iter_ahash_random 1,609 2,074 465 28.90% x 0.78 - iter_ahash_serial 2,293 2,120 -173 -7.54% x 1.08 - lookup_ahash_highbits 3,460 4,403 943 27.25% x 0.79 - lookup_ahash_random 6,377 3,911 -2,466 -38.67% x 1.63 - lookup_ahash_serial 3,629 3,586 -43 -1.18% x 1.01 - lookup_fail_ahash_highbits 5,286 3,411 -1,875 -35.47% x 1.55 - lookup_fail_ahash_random 12,365 4,171 -8,194 -66.27% x 2.96 - lookup_fail_ahash_serial 4,902 3,240 -1,662 -33.90% x 1.51 -``` - -With the libstd default SipHash hasher (HashDoS-resistant): - -```text - name oldstdhash ns/iter hashbrown ns/iter diff ns/iter diff % speedup - insert_std_highbits 32,598 20,199 -12,399 -38.04% x 1.61 - insert_std_random 29,824 20,760 -9,064 -30.39% x 1.44 - insert_std_serial 33,151 17,256 -15,895 -47.95% x 1.92 - insert_erase_std_highbits 74,731 48,735 -25,996 -34.79% x 1.53 - insert_erase_std_random 73,828 47,649 -26,179 -35.46% x 1.55 - insert_erase_std_serial 73,864 40,147 -33,717 -45.65% x 1.84 - iter_std_highbits 1,518 2,264 746 49.14% x 0.67 - iter_std_random 1,502 2,414 912 60.72% x 0.62 - iter_std_serial 6,361 2,118 -4,243 -66.70% x 3.00 - lookup_std_highbits 21,705 16,962 -4,743 -21.85% x 1.28 - lookup_std_random 21,654 17,158 -4,496 -20.76% x 1.26 - lookup_std_serial 18,726 14,509 -4,217 -22.52% x 1.29 - lookup_fail_std_highbits 25,852 17,323 -8,529 -32.99% x 1.49 - lookup_fail_std_random 25,913 17,760 -8,153 -31.46% x 1.46 - lookup_fail_std_serial 22,648 14,839 -7,809 -34.48% x 1.53 -``` +With the hashbrown default AHash hasher: + +| name | oldstdhash ns/iter | hashbrown ns/iter | diff ns/iter | diff % | speedup | +|:------------------------|:-------------------:|------------------:|:------------:|---------:|---------| +| insert_ahash_highbits | 18,865 | 8,020 | -10,845 | -57.49% | x 2.35 | +| insert_ahash_random | 19,711 | 8,019 | -11,692 | -59.32% | x 2.46 | +| insert_ahash_serial | 19,365 | 6,463 | -12,902 | -66.63% | x 3.00 | +| insert_erase_ahash_highbits | 51,136 | 17,916 | -33,220 | -64.96% | x 2.85 | +| insert_erase_ahash_random | 51,157 | 17,688 | -33,469 | -65.42% | x 2.89 | +| insert_erase_ahash_serial | 45,479 | 14,895 | -30,584 | -67.25% | x 3.05 | +| iter_ahash_highbits | 1,399 | 1,092 | -307 | -21.94% | x 1.28 | +| iter_ahash_random | 1,586 | 1,059 | -527 | -33.23% | x 1.50 | +| iter_ahash_serial | 3,168 | 1,079 | -2,089 | -65.94% | x 2.94 | +| lookup_ahash_highbits | 32,351 | 4,792 | -27,559 | -85.19% | x 6.75 | +| lookup_ahash_random | 17,419 | 4,817 | -12,602 | -72.35% | x 3.62 | +| lookup_ahash_serial | 15,254 | 3,606 | -11,648 | -76.36% | x 4.23 | +| lookup_fail_ahash_highbits | 21,187 | 4,369 | -16,818 | -79.38% | x 4.85 | +| lookup_fail_ahash_random | 21,550 | 4,395 | -17,155 | -79.61% | x 4.90 | +| lookup_fail_ahash_serial | 19,450 | 3,176 | -16,274 | -83.67% | x 6.12 | + + +With the libstd default SipHash hasher: + +|name | oldstdhash ns/iter | hashbrown ns/iter | diff ns/iter | diff % | speedup | +|:------------------------|:-------------------:|------------------:|:------------:|---------:|---------| +|insert_std_highbits |19,216 |16,885 | -2,331 | -12.13% | x 1.14 | +|insert_std_random |19,179 |17,034 | -2,145 | -11.18% | x 1.13 | +|insert_std_serial |19,462 |17,493 | -1,969 | -10.12% | x 1.11 | +|insert_erase_std_highbits |50,825 |35,847 | -14,978 | -29.47% | x 1.42 | +|insert_erase_std_random |51,448 |35,392 | -16,056 | -31.21% | x 1.45 | +|insert_erase_std_serial |87,711 |38,091 | -49,620 | -56.57% | x 2.30 | +|iter_std_highbits |1,378 |1,159 | -219 | -15.89% | x 1.19 | +|iter_std_random |1,395 |1,132 | -263 | -18.85% | x 1.23 | +|iter_std_serial |1,704 |1,105 | -599 | -35.15% | x 1.54 | +|lookup_std_highbits |17,195 |13,642 | -3,553 | -20.66% | x 1.26 | +|lookup_std_random |17,181 |13,773 | -3,408 | -19.84% | x 1.25 | +|lookup_std_serial |15,483 |13,651 | -1,832 | -11.83% | x 1.13 | +|lookup_fail_std_highbits |20,926 |13,474 | -7,452 | -35.61% | x 1.55 | +|lookup_fail_std_random |21,766 |13,505 | -8,261 | -37.95% | x 1.61 | +|lookup_fail_std_serial |19,336 |13,519 | -5,817 | -30.08% | x 1.43 | ## Usage @@ -96,19 +95,18 @@ use hashbrown::HashMap; let mut map = HashMap::new(); map.insert(1, "one"); ``` - +## Flags This crate has the following Cargo features: -- `nightly`: Enables nightly-only features: `#[may_dangle]`. +- `nightly`: Enables nightly-only features including: `#[may_dangle]`. - `serde`: Enables serde serialization support. - `rayon`: Enables rayon parallel iterator support. - `raw`: Enables access to the experimental and unsafe `RawTable` API. - `inline-more`: Adds inline hints to most functions, improving run-time performance at the cost of compilation time. (enabled by default) - `ahash`: Compiles with ahash as default hasher. (enabled by default) -- `ahash-compile-time-rng`: Activates the `compile-time-rng` feature of ahash, to increase the - DOS-resistance, but can result in issues for `no_std` builds. More details in - [issue#124](https://github.com/rust-lang/hashbrown/issues/124). (enabled by default) +- `ahash-compile-time-rng`: Activates the `compile-time-rng` feature of ahash. For targets with no random number generator +this pre-generates seeds at compile time and embeds them as constants. See [aHash's documentation](https://github.com/tkaitchuck/aHash#flags) (disabled by default) ## License diff --git a/src/map.rs b/src/map.rs index bc3aa6eee8..104db47ccb 100644 --- a/src/map.rs +++ b/src/map.rs @@ -2,7 +2,7 @@ use crate::raw::{Allocator, Bucket, Global, RawDrain, RawIntoIter, RawIter, RawT use crate::TryReserveError; use core::borrow::Borrow; use core::fmt::{self, Debug}; -use core::hash::{BuildHasher, Hash, Hasher}; +use core::hash::{BuildHasher, Hash}; use core::iter::{FromIterator, FusedIterator}; use core::marker::PhantomData; use core::mem; @@ -209,10 +209,13 @@ impl Clone for HashMap { /// Ensures that a single closure type across uses of this which, in turn prevents multiple /// instances of any functions like RawTable::reserve from being generated #[cfg_attr(feature = "inline-more", inline)] -pub(crate) fn make_hasher( - hash_builder: &impl BuildHasher, -) -> impl Fn(&(K, V)) -> u64 + '_ { - move |val| make_hash(hash_builder, &val.0) +pub(crate) fn make_hasher(hash_builder: &S) -> impl Fn(&(Q, V)) -> u64 + '_ +where + K: Borrow, + Q: Hash, + S: BuildHasher, +{ + move |val| make_hash::(hash_builder, &val.0) } /// Ensures that a single closure type across uses of this which, in turn prevents multiple @@ -238,10 +241,48 @@ where } #[cfg_attr(feature = "inline-more", inline)] -pub(crate) fn make_hash(hash_builder: &impl BuildHasher, val: &K) -> u64 { - let mut state = hash_builder.build_hasher(); - val.hash(&mut state); - state.finish() +pub(crate) fn make_hash(hash_builder: &S, val: &Q) -> u64 +where + K: Borrow, + Q: Hash + ?Sized, + S: BuildHasher, +{ + #[cfg(feature = "ahash")] + { + //This enables specialization to improve performance on primitive types + use ahash::CallHasher; + let state = hash_builder.build_hasher(); + Q::get_hash(val, state) + } + #[cfg(not(feature = "ahash"))] + { + use core::hash::Hasher; + let mut state = hash_builder.build_hasher(); + val.hash(&mut state); + state.finish() + } +} + +#[cfg_attr(feature = "inline-more", inline)] +pub(crate) fn make_insert_hash(hash_builder: &S, val: &K) -> u64 +where + K: Hash, + S: BuildHasher, +{ + #[cfg(feature = "ahash")] + { + //This enables specialization to improve performance on primitive types + use ahash::CallHasher; + let state = hash_builder.build_hasher(); + K::get_hash(val, state) + } + #[cfg(not(feature = "ahash"))] + { + use core::hash::Hasher; + let mut state = hash_builder.build_hasher(); + val.hash(&mut state); + state.finish() + } } #[cfg(feature = "ahash")] @@ -776,7 +817,7 @@ where #[cfg_attr(feature = "inline-more", inline)] pub fn reserve(&mut self, additional: usize) { self.table - .reserve(additional, make_hasher(&self.hash_builder)); + .reserve(additional, make_hasher::(&self.hash_builder)); } /// Tries to reserve capacity for at least `additional` more elements to be inserted @@ -798,7 +839,7 @@ where #[cfg_attr(feature = "inline-more", inline)] pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { self.table - .try_reserve(additional, make_hasher(&self.hash_builder)) + .try_reserve(additional, make_hasher::(&self.hash_builder)) } /// Shrinks the capacity of the map as much as possible. It will drop @@ -819,7 +860,8 @@ where /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn shrink_to_fit(&mut self) { - self.table.shrink_to(0, make_hasher(&self.hash_builder)); + self.table + .shrink_to(0, make_hasher::(&self.hash_builder)); } /// Shrinks the capacity of the map with a lower limit. It will drop @@ -848,7 +890,7 @@ where #[cfg_attr(feature = "inline-more", inline)] pub fn shrink_to(&mut self, min_capacity: usize) { self.table - .shrink_to(min_capacity, make_hasher(&self.hash_builder)); + .shrink_to(min_capacity, make_hasher::(&self.hash_builder)); } /// Gets the given key's corresponding entry in the map for in-place manipulation. @@ -872,7 +914,7 @@ where /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn entry(&mut self, key: K) -> Entry<'_, K, V, S, A> { - let hash = make_hash(&self.hash_builder, &key); + let hash = make_insert_hash::(&self.hash_builder, &key); if let Some(elem) = self.table.find(hash, equivalent_key(&key)) { Entry::Occupied(OccupiedEntry { hash, @@ -959,7 +1001,7 @@ where K: Borrow, Q: Hash + Eq, { - let hash = make_hash(&self.hash_builder, k); + let hash = make_hash::(&self.hash_builder, k); self.table.get(hash, equivalent_key(k)) } @@ -1067,7 +1109,7 @@ where K: Borrow, Q: Hash + Eq, { - let hash = make_hash(&self.hash_builder, k); + let hash = make_hash::(&self.hash_builder, k); self.table.get_mut(hash, equivalent_key(k)) } @@ -1098,12 +1140,12 @@ where /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn insert(&mut self, k: K, v: V) -> Option { - let hash = make_hash(&self.hash_builder, &k); + let hash = make_insert_hash::(&self.hash_builder, &k); if let Some((_, item)) = self.table.get_mut(hash, equivalent_key(&k)) { Some(mem::replace(item, v)) } else { self.table - .insert(hash, (k, v), make_hasher(&self.hash_builder)); + .insert(hash, (k, v), make_hasher::(&self.hash_builder)); None } } @@ -1167,7 +1209,7 @@ where K: Borrow, Q: Hash + Eq, { - let hash = make_hash(&self.hash_builder, &k); + let hash = make_hash::(&self.hash_builder, k); self.table.remove_entry(hash, equivalent_key(k)) } } @@ -1629,9 +1671,8 @@ impl<'a, K, V, S, A: Allocator + Clone> RawEntryBuilderMut<'a, K, V, S, A> { K: Borrow, Q: Hash + Eq, { - let mut hasher = self.map.hash_builder.build_hasher(); - k.hash(&mut hasher); - self.from_key_hashed_nocheck(hasher.finish(), k) + let hash = make_hash::(&self.map.hash_builder, k); + self.from_key_hashed_nocheck(hash, k) } /// Creates a `RawEntryMut` from the given key and its hash. @@ -1686,9 +1727,8 @@ impl<'a, K, V, S, A: Allocator + Clone> RawEntryBuilder<'a, K, V, S, A> { K: Borrow, Q: Hash + Eq, { - let mut hasher = self.map.hash_builder.build_hasher(); - k.hash(&mut hasher); - self.from_key_hashed_nocheck(hasher.finish(), k) + let hash = make_hash::(&self.map.hash_builder, k); + self.from_key_hashed_nocheck(hash, k) } /// Access an entry by a key and its hash. @@ -2045,9 +2085,8 @@ impl<'a, K, V, S, A: Allocator + Clone> RawVacantEntryMut<'a, K, V, S, A> { K: Hash, S: BuildHasher, { - let mut hasher = self.hash_builder.build_hasher(); - key.hash(&mut hasher); - self.insert_hashed_nocheck(hasher.finish(), key, value) + let hash = make_insert_hash::(self.hash_builder, &key); + self.insert_hashed_nocheck(hash, key, value) } /// Sets the value of the entry with the VacantEntry's key, @@ -2059,9 +2098,11 @@ impl<'a, K, V, S, A: Allocator + Clone> RawVacantEntryMut<'a, K, V, S, A> { K: Hash, S: BuildHasher, { - let &mut (ref mut k, ref mut v) = - self.table - .insert_entry(hash, (key, value), make_hasher(self.hash_builder)); + let &mut (ref mut k, ref mut v) = self.table.insert_entry( + hash, + (key, value), + make_hasher::(self.hash_builder), + ); (k, v) } @@ -2089,13 +2130,11 @@ impl<'a, K, V, S, A: Allocator + Clone> RawVacantEntryMut<'a, K, V, S, A> { K: Hash, S: BuildHasher, { - let mut hasher = self.hash_builder.build_hasher(); - key.hash(&mut hasher); - + let hash = make_insert_hash::(self.hash_builder, &key); let elem = self.table.insert( - hasher.finish(), + hash, (key, value), - make_hasher(self.hash_builder), + make_hasher::(self.hash_builder), ); RawOccupiedEntryMut { elem, @@ -3099,7 +3138,7 @@ impl<'a, K, V, S, A: Allocator + Clone> VacantEntry<'a, K, V, S, A> { let entry = table.insert_entry( self.hash, (self.key, value), - make_hasher(&self.table.hash_builder), + make_hasher::(&self.table.hash_builder), ); &mut entry.1 } @@ -3113,7 +3152,7 @@ impl<'a, K, V, S, A: Allocator + Clone> VacantEntry<'a, K, V, S, A> { let elem = self.table.table.insert( self.hash, (self.key, value), - make_hasher(&self.table.hash_builder), + make_hasher::(&self.table.hash_builder), ); OccupiedEntry { hash: self.hash, @@ -4420,11 +4459,7 @@ mod test_map { let mut map: HashMap<_, _> = xs.iter().cloned().collect(); let compute_hash = |map: &HashMap, k: i32| -> u64 { - use core::hash::{BuildHasher, Hash, Hasher}; - - let mut hasher = map.hasher().build_hasher(); - k.hash(&mut hasher); - hasher.finish() + super::make_insert_hash::(map.hasher(), &k) }; // Existing key (insert) @@ -4601,9 +4636,11 @@ mod test_map { left -= 1; } else { assert!(removed.contains(&(i, 2 * i)), "{} not in {:?}", i, removed); - let e = m - .table - .insert(hash, (i, 2 * i), super::make_hasher(&hasher)); + let e = m.table.insert( + hash, + (i, 2 * i), + super::make_hasher::(&hasher), + ); it.reflect_insert(&e); if let Some(p) = removed.iter().position(|e| e == &(i, 2 * i)) { removed.swap_remove(p); diff --git a/src/rustc_entry.rs b/src/rustc_entry.rs index 071b1e8944..1793c4a600 100644 --- a/src/rustc_entry.rs +++ b/src/rustc_entry.rs @@ -1,5 +1,5 @@ use self::RustcEntry::*; -use crate::map::{make_hash, Drain, HashMap, IntoIter, Iter, IterMut}; +use crate::map::{make_insert_hash, Drain, HashMap, IntoIter, Iter, IterMut}; use crate::raw::{Allocator, Bucket, Global, RawTable}; use core::fmt::{self, Debug}; use core::hash::{BuildHasher, Hash}; @@ -32,7 +32,7 @@ where /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn rustc_entry(&mut self, key: K) -> RustcEntry<'_, K, V, A> { - let hash = make_hash(&self.hash_builder, &key); + let hash = make_insert_hash(&self.hash_builder, &key); if let Some(elem) = self.table.find(hash, |q| q.0.eq(&key)) { RustcEntry::Occupied(RustcOccupiedEntry { key: Some(key),