Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

hashmap: Store hashes as usize internally #36595

Merged
merged 1 commit into from
Nov 1, 2016
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 44 additions & 25 deletions src/libstd/collections/hash/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,18 @@ use ptr::{self, Unique, Shared};

use self::BucketState::*;

const EMPTY_BUCKET: u64 = 0;
/// Integer type used for stored hash values.
///
/// No more than bit_width(usize) bits are needed to select a bucket.
///
/// The most significant bit is ours to use for tagging `SafeHash`.
///
/// (Even if we could have usize::MAX bytes allocated for buckets,
/// each bucket stores at least a `HashUint`, so there can be no more than
/// usize::MAX / size_of(usize) buckets.)
type HashUint = usize;

const EMPTY_BUCKET: HashUint = 0;

/// The raw hashtable, providing safe-ish access to the unzipped and highly
/// optimized arrays of hashes, and key-value pairs.
Expand Down Expand Up @@ -64,7 +75,7 @@ const EMPTY_BUCKET: u64 = 0;
pub struct RawTable<K, V> {
capacity: usize,
size: usize,
hashes: Unique<u64>,
hashes: Unique<HashUint>,

// Because K/V do not appear directly in any of the types in the struct,
// inform rustc that in fact instances of K and V are reachable from here.
Expand All @@ -75,7 +86,7 @@ unsafe impl<K: Send, V: Send> Send for RawTable<K, V> {}
unsafe impl<K: Sync, V: Sync> Sync for RawTable<K, V> {}

struct RawBucket<K, V> {
hash: *mut u64,
hash: *mut HashUint,
// We use *const to ensure covariance with respect to K and V
pair: *const (K, V),
_marker: marker::PhantomData<(K, V)>,
Expand Down Expand Up @@ -136,15 +147,27 @@ pub struct GapThenFull<K, V, M> {
/// buckets.
#[derive(PartialEq, Copy, Clone)]
pub struct SafeHash {
hash: u64,
hash: HashUint,
}

impl SafeHash {
/// Peek at the hash value, which is guaranteed to be non-zero.
#[inline(always)]
pub fn inspect(&self) -> u64 {
pub fn inspect(&self) -> HashUint {
self.hash
}

#[inline(always)]
pub fn new(hash: u64) -> Self {
// We need to avoid 0 in order to prevent collisions with
// EMPTY_HASH. We can maintain our precious uniform distribution
// of initial indexes by unconditionally setting the MSB,
// effectively reducing the hashes by one bit.
//
// Truncate hash to fit in `HashUint`.
let hash_bits = size_of::<HashUint>() * 8;
SafeHash { hash: (1 << (hash_bits - 1)) | (hash as HashUint) }
}
}

/// We need to remove hashes of 0. That's reserved for empty buckets.
Expand All @@ -156,25 +179,21 @@ pub fn make_hash<T: ?Sized, S>(hash_state: &S, t: &T) -> SafeHash
{
let mut state = hash_state.build_hasher();
t.hash(&mut state);
// We need to avoid 0 in order to prevent collisions with
// EMPTY_HASH. We can maintain our precious uniform distribution
// of initial indexes by unconditionally setting the MSB,
// effectively reducing 64-bits hashes to 63 bits.
SafeHash { hash: 0x8000_0000_0000_0000 | state.finish() }
SafeHash::new(state.finish())
}

// `replace` casts a `*u64` to a `*SafeHash`. Since we statically
// `replace` casts a `*HashUint` to a `*SafeHash`. Since we statically
// ensure that a `FullBucket` points to an index with a non-zero hash,
// and a `SafeHash` is just a `u64` with a different name, this is
// and a `SafeHash` is just a `HashUint` with a different name, this is
// safe.
//
// This test ensures that a `SafeHash` really IS the same size as a
// `u64`. If you need to change the size of `SafeHash` (and
// `HashUint`. If you need to change the size of `SafeHash` (and
// consequently made this test fail), `replace` needs to be
// modified to no longer assume this.
#[test]
fn can_alias_safehash_as_u64() {
assert_eq!(size_of::<SafeHash>(), size_of::<u64>())
fn can_alias_safehash_as_hash() {
assert_eq!(size_of::<SafeHash>(), size_of::<HashUint>())
}

impl<K, V> RawBucket<K, V> {
Expand Down Expand Up @@ -605,14 +624,14 @@ impl<K, V> RawTable<K, V> {
return RawTable {
size: 0,
capacity: 0,
hashes: Unique::new(EMPTY as *mut u64),
hashes: Unique::new(EMPTY as *mut HashUint),
marker: marker::PhantomData,
};
}

// No need for `checked_mul` before a more restrictive check performed
// later in this method.
let hashes_size = capacity.wrapping_mul(size_of::<u64>());
let hashes_size = capacity.wrapping_mul(size_of::<HashUint>());
let pairs_size = capacity.wrapping_mul(size_of::<(K, V)>());

// Allocating hashmaps is a little tricky. We need to allocate two
Expand All @@ -624,13 +643,13 @@ impl<K, V> RawTable<K, V> {
// right is a little subtle. Therefore, calculating offsets has been
// factored out into a different function.
let (alignment, hash_offset, size, oflo) = calculate_allocation(hashes_size,
align_of::<u64>(),
align_of::<HashUint>(),
pairs_size,
align_of::<(K, V)>());
assert!(!oflo, "capacity overflow");

// One check for overflow that covers calculation and rounding of size.
let size_of_bucket = size_of::<u64>().checked_add(size_of::<(K, V)>()).unwrap();
let size_of_bucket = size_of::<HashUint>().checked_add(size_of::<(K, V)>()).unwrap();
assert!(size >=
capacity.checked_mul(size_of_bucket)
.expect("capacity overflow"),
Expand All @@ -641,7 +660,7 @@ impl<K, V> RawTable<K, V> {
::alloc::oom()
}

let hashes = buffer.offset(hash_offset as isize) as *mut u64;
let hashes = buffer.offset(hash_offset as isize) as *mut HashUint;

RawTable {
capacity: capacity,
Expand All @@ -652,7 +671,7 @@ impl<K, V> RawTable<K, V> {
}

fn first_bucket_raw(&self) -> RawBucket<K, V> {
let hashes_size = self.capacity * size_of::<u64>();
let hashes_size = self.capacity * size_of::<HashUint>();
let pairs_size = self.capacity * size_of::<(K, V)>();

let buffer = *self.hashes as *mut u8;
Expand Down Expand Up @@ -756,7 +775,7 @@ impl<K, V> RawTable<K, V> {
/// this interface is safe, it's not used outside this module.
struct RawBuckets<'a, K, V> {
raw: RawBucket<K, V>,
hashes_end: *mut u64,
hashes_end: *mut HashUint,

// Strictly speaking, this should be &'a (K,V), but that would
// require that K:'a, and we often use RawBuckets<'static...> for
Expand Down Expand Up @@ -802,7 +821,7 @@ impl<'a, K, V> Iterator for RawBuckets<'a, K, V> {
/// the table's remaining entries. It's used in the implementation of Drop.
struct RevMoveBuckets<'a, K, V> {
raw: RawBucket<K, V>,
hashes_end: *mut u64,
hashes_end: *mut HashUint,
elems_left: usize,

// As above, `&'a (K,V)` would seem better, but we often use
Expand Down Expand Up @@ -1036,10 +1055,10 @@ impl<K, V> Drop for RawTable<K, V> {
}
}

let hashes_size = self.capacity * size_of::<u64>();
let hashes_size = self.capacity * size_of::<HashUint>();
let pairs_size = self.capacity * size_of::<(K, V)>();
let (align, _, size, oflo) = calculate_allocation(hashes_size,
align_of::<u64>(),
align_of::<HashUint>(),
pairs_size,
align_of::<(K, V)>());

Expand Down