From 702f349f0b7860fe02efa1fea3354b6350161426 Mon Sep 17 00:00:00 2001
From: Orson Peters <orsonpeters@gmail.com>
Date: Wed, 15 May 2024 00:17:25 +0200
Subject: [PATCH 1/6] Replace hash with faster and better finalized hash

---
 Cargo.toml |   1 +
 README.md  |  21 ++-
 src/lib.rs | 366 +++++++++++++++++++++++++++++++++++++----------------
 3 files changed, 274 insertions(+), 114 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 191ef72..4ed7249 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,6 +12,7 @@ edition = "2018"
 [features]
 default = ["std"]
 std = []
+nightly = []
 rand = ["dep:rand", "std"]
 
 [dependencies]
diff --git a/README.md b/README.md
index 05991b6..2a1b0cc 100644
--- a/README.md
+++ b/README.md
@@ -3,13 +3,22 @@
 [![crates.io](https://img.shields.io/crates/v/rustc-hash.svg)](https://crates.io/crates/rustc-hash)
 [![Documentation](https://docs.rs/rustc-hash/badge.svg)](https://docs.rs/rustc-hash)
 
-A speedy, non-cryptographic hashing algorithm used by `rustc` and Firefox.
+A speedy, non-cryptographic hashing algorithm used by `rustc`.
 The [hash map in `std`](https://doc.rust-lang.org/std/collections/struct.HashMap.html) uses SipHash by default, which provides resistance against DOS attacks.
-These attacks aren't as much of a concern in the compiler so we prefer to use the quicker, non-cryptographic Fx algorithm.
-
-The Fx algorithm is a unique one used by Firefox. It is fast because it can hash eight bytes at a time.
-Within `rustc`, it consistently outperforms every other tested algorithm (such as FNV).
-The collision rate is similar or slightly worse than other low-quality hash functions.
+These attacks aren't as much of a concern in the compiler so we prefer to use a
+quicker, non-cryptographic hash algorithm.
+
+The original hash algorithm provided by this crate was one taken from Firefox,
+hence the hasher it provides is called FxHasher. This name is kept for backwards
+compatibility, but the underlying hash has since been replaced. The current
+design for the hasher is a polynomial hash finished with a single bit rotation,
+together with a wyhash-inspired compression function for strings/slices, both
+designed by Orson Peters.
+
+Within `rustc`, it consistently outperforms every other tested algorithm or
+variation, despite its simplicity. Spending more CPU cycles on a higher quality
+hash does not reduce hash collisions enough to be worth the cost on real-world
+benchmarks.
 
 ## Usage
 
diff --git a/src/lib.rs b/src/lib.rs
index b94393b..ce50aa1 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,4 +1,4 @@
-//! A speedy, non-cryptographic hashing algorithm used by `rustc` and Firefox.
+//! A speedy, non-cryptographic hashing algorithm used by `rustc`.
 //!
 //! # Example
 //!
@@ -15,6 +15,7 @@
 //! ```
 
 #![no_std]
+#![cfg_attr(feature = "nightly", feature(hasher_prefixfree_extras))]
 
 #[cfg(feature = "std")]
 extern crate std;
@@ -32,8 +33,6 @@ use core::default::Default;
 #[cfg(feature = "std")]
 use core::hash::BuildHasherDefault;
 use core::hash::Hasher;
-use core::mem::size_of;
-use core::ops::BitXor;
 #[cfg(feature = "std")]
 use std::collections::{HashMap, HashSet};
 
@@ -55,33 +54,26 @@ pub use seeded_state::{FxHashMapSeed, FxHashSetSeed, FxSeededState};
 /// compiler we're not really worried about DOS attempts, so we use a fast
 /// non-cryptographic hash.
 ///
-/// This is the same as the algorithm used by Firefox -- which is a homespun
-/// one not based on any widely-known algorithm -- though modified to produce
-/// 64-bit hash values instead of 32-bit hash values. It consistently
-/// out-performs an FNV-based hash within rustc itself -- the collision rate is
-/// similar or slightly worse than FNV, but the speed of the hash function
-/// itself is much higher because it works on up to 8 bytes at a time.
+/// The current implementation is a fast polynomial hash with a single
+/// bit rotation as a finishing step designed by Orson Peters.
 #[derive(Clone)]
 pub struct FxHasher {
     hash: usize,
 }
 
-#[cfg(target_pointer_width = "32")]
-const K: usize = 0x9e3779b9;
+// One might view a polynomial hash
+//    m[0] * k    + m[1] * k^2  + m[2] * k^3  + ...
+// as a multilinear hash with keystream k[..]
+//    m[0] * k[0] + m[1] * k[1] + m[2] * k[2] + ...
+// where keystream k just happens to be generated using a multiplicative
+// congrential pseudorandom number generator (MCG). For that reason we chose a
+// constant that was found to be good for a MCG in:
+//     "Computationally Easy, Spectrally Good Multipliers for Congruential
+//     Pseudorandom Number Generators" by Guy Steele and Sebastiano Vigna.
 #[cfg(target_pointer_width = "64")]
-const K: usize = 0x517cc1b727220a95;
-
-#[inline]
-fn take_first_chunk<'a, const N: usize>(slice: &mut &'a [u8]) -> Option<&'a [u8; N]> {
-    // TODO: use [T]::split_first_chunk() when stable
-    if slice.len() < N {
-        return None;
-    }
-
-    let (first, rest) = slice.split_at(N);
-    *slice = rest;
-    Some(first.try_into().unwrap())
-}
+const K: usize = 0xf1357aea2e62a9c5;
+#[cfg(target_pointer_width = "32")]
+const K: usize = 0x93d765dd;
 
 impl FxHasher {
     /// Creates a `fx` hasher with a given seed.
@@ -105,42 +97,200 @@ impl Default for FxHasher {
 impl FxHasher {
     #[inline]
     fn add_to_hash(&mut self, i: usize) {
-        self.hash = self.hash.rotate_left(5).bitxor(i).wrapping_mul(K);
+        self.hash = self.hash.wrapping_add(i).wrapping_mul(K);
     }
 }
 
 impl Hasher for FxHasher {
     #[inline]
-    fn write(&mut self, mut bytes: &[u8]) {
-        // Ensure all bytes will be consumed
-        const _: () = assert!(size_of::<usize>() <= size_of::<u64>());
-        // Ensure no bytes are discarded by casting to usize
-        const _: () = assert!(size_of::<u32>() <= size_of::<usize>());
-        // Copy the 1 word sized state to a local variable to ensure it
-        // is kept in a register.
-        // See: https://github.com/rust-lang/rustc-hash/pull/34
-        let mut state = self.clone();
-        while let Some(&usize_bytes) = take_first_chunk(&mut bytes) {
-            state.add_to_hash(usize::from_ne_bytes(usize_bytes));
-        }
-        if let Some(&u32_bytes) = take_first_chunk(&mut bytes) {
-            state.add_to_hash(u32::from_ne_bytes(u32_bytes) as usize);
-        }
-        if let Some(&u16_bytes) = take_first_chunk(&mut bytes) {
-            state.add_to_hash(u16::from_ne_bytes(u16_bytes) as usize);
-        }
-        if let Some(&[u8_byte]) = take_first_chunk(&mut bytes) {
-            state.add_to_hash(u8_byte as usize);
-        }
-        *self = state;
+    fn write(&mut self, bytes: &[u8]) {
+        self.write_u64(hash_bytes(bytes));
+    }
+
+    #[inline]
+    fn write_u8(&mut self, i: u8) {
+        self.add_to_hash(i as usize);
+    }
+
+    #[inline]
+    fn write_u16(&mut self, i: u16) {
+        self.add_to_hash(i as usize);
+    }
+
+    #[inline]
+    fn write_u32(&mut self, i: u32) {
+        self.add_to_hash(i as usize);
+    }
+
+    #[inline]
+    fn write_u64(&mut self, i: u64) {
+        self.add_to_hash(i as usize);
+        #[cfg(target_pointer_width = "32")]
+        self.add_to_hash((i >> 32) as usize);
+    }
+
+    #[inline]
+    fn write_u128(&mut self, i: u128) {
+        self.add_to_hash(i as usize);
+        #[cfg(target_pointer_width = "32")]
+        self.add_to_hash((i >> 32) as usize);
+        self.add_to_hash((i >> 64) as usize);
+        #[cfg(target_pointer_width = "32")]
+        self.add_to_hash((i >> 96) as usize);
+    }
+
+    #[inline]
+    fn write_usize(&mut self, i: usize) {
+        self.add_to_hash(i);
+    }
+
+    #[cfg(feature = "nightly")]
+    #[inline]
+    fn write_length_prefix(&mut self, len: usize) {
+        // Most cases will specialize hash_slice anyway which calls write(),
+        // which encodes the length already.
+    }
+
+    #[cfg(feature = "nightly")]
+    #[inline]
+    fn write_str(&mut self, s: &str) {
+        // We don't need anything special here.
+        self.write(s.as_bytes())
     }
 
     #[inline]
     fn finish(&self) -> u64 {
-        self.hash as u64
+        // Since we used a multiplicative hash our top bits have the most
+        // entropy (with the top bit having the most, decreasing as you go).
+        // As most hash table implementations (including hashbrown) compute
+        // the bucket index from the bottom bits we want to move bits from the
+        // top to the bottom. Ideally we'd rotate left by exactly the hash table
+        // size, but as we don't know this we'll choose 20 bits, giving decent
+        // entropy up until 2^20 table sizes. On 32-bit hosts we'll dial it
+        // back down a bit to 15 bits.
+
+        #[cfg(target_pointer_width = "64")]
+        const ROTATE: u32 = 20;
+        #[cfg(target_pointer_width = "32")]
+        const ROTATE: u32 = 15;
+
+        self.hash.rotate_left(ROTATE) as u64
+
+        // A bit reversal would be even better, except hashbrown also expects
+        // good entropy in the top 7 bits and a bit reverse would fill those
+        // bits with low entropy. More importantly, bit reversals are very slow
+        // on x86-64. A byte reversal is relatively fast, but still has a 2
+        // cycle latency on x86-64 compared to the 1 cycle latency of a rotate.
+        // It also suffers from the hashbrown-top-7-bit-issue.
+    }
+}
+
+// Nothing special, digits of pi.
+const SEED1: u64 = 0x243f6a8885a308d3;
+const SEED2: u64 = 0x13198a2e03707344;
+const PREVENT_TRIVIAL_ZERO_COLLAPSE: u64 = 0xa4093822299f31d0;
+
+#[inline]
+fn multiply_mix(x: u64, y: u64) -> u64 {
+    #[cfg(target_pointer_width = "64")]
+    {
+        // We compute the full u64 x u64 -> u128 product, this is a single mul
+        // instruction on x86-64, one mul plus one mulhi on ARM64.
+        let full = (x as u128) * (y as u128);
+        let lo = full as u64;
+        let hi = (full >> 64) as u64;
+
+        // The middle bits of the full product fluctuate the most with small
+        // changes in the input. This is the top bits of lo and the bottom bits
+        // of hi. We can thus make the entire output fluctuate with small
+        // changes to the input by XOR'ing these two halves.
+        lo ^ hi
+
+        // Unfortunately both 2^64 + 1 and 2^64 - 1 have small prime factors,
+        // otherwise combining with + or - could result in a really strong hash, as:
+        //     x * y = 2^64 * hi + lo = (-1) * hi + lo = lo - hi,   (mod 2^64 + 1)
+        //     x * y = 2^64 * hi + lo =    1 * hi + lo = lo + hi,   (mod 2^64 - 1)
+        // Multiplicative hashing is universal in a field (like mod p).
+    }
+
+    #[cfg(target_pointer_width = "32")]
+    {
+        // u64 x u64 -> u128 product is prohibitively expensive on 32-bit.
+        // Decompose into 32-bit parts.
+        let lx = x as u32;
+        let ly = y as u32;
+        let hx = (x >> 32) as u32;
+        let hy = (y >> 32) as u32;
+
+        // u32 x u32 -> u64 the low bits of one with the high bits of the other.
+        let afull = (lx as u64) * (hy as u64);
+        let bfull = (hx as u64) * (ly as u64);
+
+        // Combine, swapping low/high of one of them so the upper bits of the
+        // product of one combine with the lower bits of the other.
+        afull ^ bfull.rotate_right(32)
     }
 }
 
+/// A wyhash-inspired non-collision-resistant hash for strings/slices designed
+/// by Orson Peters, with a focus on small strings and small codesize.
+///
+/// The 64-bit version of this hash passes the SMHasher3 test suite on the full
+/// 64-bit output, that is, f(hash_bytes(b) ^ f(seed)) for some good avalanching
+/// permutation f() passed all tests with zero failures. When using the 32-bit
+/// version of multiply_mix this hash has a few non-catastrophic failures where
+/// there are a handful more collisions than an optimal hash would give.
+///
+/// We don't bother avalanching here as we'll feed this hash into a
+/// multiplication after which we take the high bits, which avalanches for us.
+#[inline]
+fn hash_bytes(bytes: &[u8]) -> u64 {
+    let len = bytes.len();
+    let mut s0 = SEED1;
+    let mut s1 = SEED2;
+
+    if len <= 16 {
+        // XOR the input into s0, s1.
+        if len >= 8 {
+            s0 ^= u64::from_le_bytes(bytes[0..8].try_into().unwrap());
+            s1 ^= u64::from_le_bytes(bytes[len - 8..].try_into().unwrap());
+        } else if len >= 4 {
+            s0 ^= u32::from_le_bytes(bytes[0..4].try_into().unwrap()) as u64;
+            s1 ^= u32::from_le_bytes(bytes[len - 4..].try_into().unwrap()) as u64;
+        } else if len > 0 {
+            let lo = bytes[0];
+            let mid = bytes[len / 2];
+            let hi = bytes[len - 1];
+            s0 ^= lo as u64;
+            s1 ^= ((hi as u64) << 8) | mid as u64;
+        }
+    } else {
+        // Handle bulk (can partially overlap with suffix).
+        let mut off = 0;
+        while off < len - 16 {
+            let x = u64::from_le_bytes(bytes[off..off + 8].try_into().unwrap());
+            let y = u64::from_le_bytes(bytes[off + 8..off + 16].try_into().unwrap());
+
+            // Replace s1 with a mix of s0, x, and y, and s0 with s1.
+            // This ensures the compiler can unroll this loop into two
+            // independent streams, one operating on s0, the other on s1.
+            //
+            // Since zeroes are a common input we prevent an immediate trivial
+            // collapse of the hash function by XOR'ing a constant with y.
+            let t = multiply_mix(s0 ^ x, PREVENT_TRIVIAL_ZERO_COLLAPSE ^ y);
+            s0 = s1;
+            s1 = t;
+            off += 16;
+        }
+
+        let suffix = &bytes[len - 16..];
+        s0 ^= u64::from_le_bytes(suffix[0..8].try_into().unwrap());
+        s1 ^= u64::from_le_bytes(suffix[8..16].try_into().unwrap());
+    }
+
+    multiply_mix(s0, s1) ^ (len as u64)
+}
+
 #[cfg(test)]
 mod tests {
     #[cfg(not(any(target_pointer_width = "64", target_pointer_width = "32")))]
@@ -167,75 +317,75 @@ mod tests {
     fn unsigned() {
         test_hash! {
             hash(0_u8) == 0,
-            hash(1_u8) == if B32 { 2654435769 } else { 5871781006564002453 },
-            hash(100_u8) == if B32 { 3450571844 } else { 15329034371404145204 },
-            hash(u8::MAX) == if B32 { 2571255623 } else { 3117886703346944619 },
+            hash(1_u8) == if B32 { 3001993707 } else { 12583873379513078615 },
+            hash(100_u8) == if B32 { 3844759569 } else { 4008740938959785536 },
+            hash(u8::MAX) == if B32 { 999399879 } else { 17600987023830959190 },
 
             hash(0_u16) == 0,
-            hash(1_u16) == if B32 { 2654435769 } else { 5871781006564002453 },
-            hash(100_u16) == if B32 { 3450571844 } else { 15329034371404145204 },
-            hash(u16::MAX) == if B32 { 3682698823 } else { 8086887590654047595 },
+            hash(1_u16) == if B32 { 3001993707 } else { 12583873379513078615 },
+            hash(100_u16) == if B32 { 3844759569 } else { 4008740938959785536 },
+            hash(u16::MAX) == if B32 { 3440503042 } else { 4001367065645062987 },
 
             hash(0_u32) == 0,
-            hash(1_u32) == if B32 { 2654435769 } else { 5871781006564002453 },
-            hash(100_u32) == if B32 { 3450571844 } else { 15329034371404145204 },
-            hash(u32::MAX) == if B32 { 1640531527 } else { 15394791018899305835 },
+            hash(1_u32) == if B32 { 3001993707 } else { 12583873379513078615 },
+            hash(100_u32) == if B32 { 3844759569 } else { 4008740938959785536 },
+            hash(u32::MAX) == if B32 { 1293006356 } else { 17126373362251322066 },
 
             hash(0_u64) == 0,
-            hash(1_u64) == if B32 { 703266523 } else { 5871781006564002453 },
-            hash(100_u64) == if B32 { 2407204753 } else { 15329034371404145204 },
-            hash(u64::MAX) == if B32 { 1660667835 } else { 12574963067145549163 },
+            hash(1_u64) == if B32 { 275023839 } else { 12583873379513078615 },
+            hash(100_u64) == if B32 { 1732383522 } else { 4008740938959785536 },
+            hash(u64::MAX) == if B32 { 1017982517 } else { 5862870694197521576 },
 
             hash(0_u128) == 0,
-            hash(1_u128) == if B32 { 1294492036 } else { 956286968014291186 },
-            hash(100_u128) == if B32 { 3411300242 } else { 2770938889503972258 },
-            hash(u128::MAX) == if B32 { 3723263291 } else { 15973479568771280466 },
+            hash(1_u128) == if B32 { 1860738631 } else { 12885773367358079611 },
+            hash(100_u128) == if B32 { 1389515751 } else { 15751995649841559633 },
+            hash(u128::MAX) == if B32 { 2156022013 } else { 11423841400550042156 },
 
             hash(0_usize) == 0,
-            hash(1_usize) == if B32 { 2654435769 } else { 5871781006564002453 },
-            hash(100_usize) == if B32 { 3450571844 } else { 15329034371404145204 },
-            hash(usize::MAX) == if B32 { 1640531527 } else { 12574963067145549163 },
+            hash(1_usize) == if B32 { 3001993707 } else { 12583873379513078615 },
+            hash(100_usize) == if B32 { 3844759569 } else { 4008740938959785536 },
+            hash(usize::MAX) == if B32 { 1293006356 } else { 5862870694197521576 },
         }
     }
 
     #[test]
     fn signed() {
         test_hash! {
-            hash(i8::MIN) == if B32 { 465362048 } else { 13718205891810249344 },
-            hash(0_i8) == 0,
-            hash(1_i8) == if B32 { 2654435769 } else { 5871781006564002453 },
-            hash(100_i8) == if B32 { 3450571844 } else { 15329034371404145204 },
-            hash(i8::MAX) == if B32 { 2105893575 } else { 7846424885246246891 },
-
-            hash(i16::MIN) == if B32 { 3168567296 } else { 6979334298609025024 },
-            hash(0_i16) == 0,
-            hash(1_i16) == if B32 { 2654435769 } else { 5871781006564002453 },
-            hash(100_i16) == if B32 { 3450571844 } else { 15329034371404145204 },
-            hash(i16::MAX) == if B32 { 514131527 } else { 1107553292045022571 },
-
-            hash(i32::MIN) == if B32 { 2147483648 } else { 10633286012731654144 },
-            hash(0_i32) == 0,
-            hash(1_i32) == if B32 { 2654435769 } else { 5871781006564002453 },
-            hash(100_i32) == if B32 { 3450571844 } else { 15329034371404145204 },
-            hash(i32::MAX) == if B32 { 3788015175 } else { 4761505006167651691 },
-
-            hash(i64::MIN) == if B32 { 2147483648 } else { 9223372036854775808 },
-            hash(0_i64) == 0,
-            hash(1_i64) == if B32 { 703266523 } else { 5871781006564002453 },
-            hash(100_i64) == if B32 { 2407204753 } else { 15329034371404145204 },
-            hash(i64::MAX) == if B32 { 3808151483 } else { 3351591030290773355 },
-
-            hash(i128::MIN) == if B32 { 2147483648 } else { 9223372036854775808 },
-            hash(0_i128) == 0,
-            hash(1_i128) == if B32 { 1294492036 } else { 956286968014291186 },
-            hash(100_i128) == if B32 { 3411300242 } else { 2770938889503972258 },
-            hash(i128::MAX) == if B32 { 1575779643 } else { 6750107531916504658 },
-
-            hash(isize::MIN) == if B32 { 2147483648 } else { 9223372036854775808 },
-            hash(0_isize) == 0,
-            hash(1_isize) == if B32 { 2654435769 } else { 5871781006564002453 },
-            hash(100_isize) == if B32 { 3450571844 } else { 15329034371404145204 },
-            hash(isize::MAX) == if B32 { 3788015175 } else { 3351591030290773355 },
+            hash(i8::MIN) == if B32 { 2000713177 } else { 5869058164817243095 },  
+            hash(0_i8) == 0,                    
+            hash(1_i8) == if B32 { 3001993707 } else { 12583873379513078615 }, 
+            hash(100_i8) == if B32 { 3844759569 } else { 4008740938959785536 },  
+            hash(i8::MAX) == if B32 { 3293686765 } else { 11731928859014764671 }, 
+
+            hash(i16::MIN) == if B32 { 1073764727 } else { 8292620222579070801 },  
+            hash(0_i16) == 0,                    
+            hash(1_i16) == if B32 { 3001993707 } else { 12583873379513078615 }, 
+            hash(100_i16) == if B32 { 3844759569 } else { 4008740938959785536 },  
+            hash(i16::MAX) == if B32 { 2366738315 } else { 14155490916776592377 }, 
+
+            hash(i32::MIN) == if B32 { 16384 } else { 5631751334026900245 },  
+            hash(0_i32) == 0,                    
+            hash(1_i32) == if B32 { 3001993707 } else { 12583873379513078615 }, 
+            hash(100_i32) == if B32 { 3844759569 } else { 4008740938959785536 },  
+            hash(i32::MAX) == if B32 { 1293022740 } else { 11494622028224421821 }, 
+
+            hash(i64::MIN) == if B32 { 16384 } else { 524288 },               
+            hash(0_i64) == 0,                    
+            hash(1_i64) == if B32 { 275023839 } else { 12583873379513078615 }, 
+            hash(100_i64) == if B32 { 1732383522 } else { 4008740938959785536 },  
+            hash(i64::MAX) == if B32 { 1017998901 } else { 5862870694198045864 },  
+
+            hash(i128::MIN) == if B32 { 16384 } else { 524288 },               
+            hash(0_i128) == 0,                    
+            hash(1_i128) == if B32 { 1860738631 } else { 12885773367358079611 }, 
+            hash(100_i128) == if B32 { 1389515751 } else { 15751995649841559633 }, 
+            hash(i128::MAX) == if B32 { 2156005629 } else { 11423841400549517868 }, 
+
+            hash(isize::MIN) == if B32 { 16384 } else { 524288 },               
+            hash(0_isize) == 0,                    
+            hash(1_isize) == if B32 { 3001993707 } else { 12583873379513078615 }, 
+            hash(100_isize) == if B32 { 3844759569 } else { 4008740938959785536 },  
+            hash(isize::MAX) == if B32 { 1293022740 } else { 5862870694198045864 },  
         }
     }
 
@@ -250,13 +400,13 @@ mod tests {
     #[test]
     fn bytes() {
         test_hash! {
-            hash(HashBytes(&[])) == 0,
-            hash(HashBytes(&[0])) == 0,
-            hash(HashBytes(&[0, 0, 0, 0, 0, 0])) == 0,
-            hash(HashBytes(&[1])) == if B32 { 2654435769 } else { 5871781006564002453 },
-            hash(HashBytes(&[2])) == if B32 { 1013904242 } else { 11743562013128004906 },
-            hash(HashBytes(b"uwu")) == if B32 { 3939043750 } else { 16622306935539548858 },
-            hash(HashBytes(b"These are some bytes for testing rustc_hash.")) == if B32 { 2345708736 } else { 12390864548135261390 },
+            hash(HashBytes(&[])) == if B32 { 2673204745 } else { 5175017818631658678 },  
+            hash(HashBytes(&[0])) == if B32 { 2948228584 } else { 11037888512829180254 }, 
+            hash(HashBytes(&[0, 0, 0, 0, 0, 0])) == if B32 { 3223252423 } else { 6891281800865632452 },  
+            hash(HashBytes(&[1])) == if B32 { 2943445104 } else { 4127763515449136980 },  
+            hash(HashBytes(&[2])) == if B32 { 1055423297 } else { 11322700005987241762 }, 
+            hash(HashBytes(b"uwu")) == if B32 { 2699662140 } else { 2129615206728903013 },  
+            hash(HashBytes(b"These are some bytes for testing rustc_hash.")) == if B32 { 2303640537 } else { 5513083560975408889 },  
         }
     }
 

From d5b36344139e1271104307686bb2f59c2cc4f46b Mon Sep 17 00:00:00 2001
From: Orson Peters <orsonpeters@gmail.com>
Date: Wed, 15 May 2024 00:50:40 +0200
Subject: [PATCH 2/6] Fix trailing whitespace

---
 src/lib.rs | 84 +++++++++++++++++++++++++++---------------------------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index ce50aa1..c08d4f3 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -351,41 +351,41 @@ mod tests {
     #[test]
     fn signed() {
         test_hash! {
-            hash(i8::MIN) == if B32 { 2000713177 } else { 5869058164817243095 },  
-            hash(0_i8) == 0,                    
-            hash(1_i8) == if B32 { 3001993707 } else { 12583873379513078615 }, 
-            hash(100_i8) == if B32 { 3844759569 } else { 4008740938959785536 },  
-            hash(i8::MAX) == if B32 { 3293686765 } else { 11731928859014764671 }, 
-
-            hash(i16::MIN) == if B32 { 1073764727 } else { 8292620222579070801 },  
-            hash(0_i16) == 0,                    
-            hash(1_i16) == if B32 { 3001993707 } else { 12583873379513078615 }, 
-            hash(100_i16) == if B32 { 3844759569 } else { 4008740938959785536 },  
-            hash(i16::MAX) == if B32 { 2366738315 } else { 14155490916776592377 }, 
-
-            hash(i32::MIN) == if B32 { 16384 } else { 5631751334026900245 },  
-            hash(0_i32) == 0,                    
-            hash(1_i32) == if B32 { 3001993707 } else { 12583873379513078615 }, 
-            hash(100_i32) == if B32 { 3844759569 } else { 4008740938959785536 },  
-            hash(i32::MAX) == if B32 { 1293022740 } else { 11494622028224421821 }, 
-
-            hash(i64::MIN) == if B32 { 16384 } else { 524288 },               
-            hash(0_i64) == 0,                    
-            hash(1_i64) == if B32 { 275023839 } else { 12583873379513078615 }, 
-            hash(100_i64) == if B32 { 1732383522 } else { 4008740938959785536 },  
-            hash(i64::MAX) == if B32 { 1017998901 } else { 5862870694198045864 },  
-
-            hash(i128::MIN) == if B32 { 16384 } else { 524288 },               
-            hash(0_i128) == 0,                    
-            hash(1_i128) == if B32 { 1860738631 } else { 12885773367358079611 }, 
-            hash(100_i128) == if B32 { 1389515751 } else { 15751995649841559633 }, 
-            hash(i128::MAX) == if B32 { 2156005629 } else { 11423841400549517868 }, 
-
-            hash(isize::MIN) == if B32 { 16384 } else { 524288 },               
-            hash(0_isize) == 0,                    
-            hash(1_isize) == if B32 { 3001993707 } else { 12583873379513078615 }, 
-            hash(100_isize) == if B32 { 3844759569 } else { 4008740938959785536 },  
-            hash(isize::MAX) == if B32 { 1293022740 } else { 5862870694198045864 },  
+            hash(i8::MIN) == if B32 { 2000713177 } else { 5869058164817243095 },
+            hash(0_i8) == 0,
+            hash(1_i8) == if B32 { 3001993707 } else { 12583873379513078615 },
+            hash(100_i8) == if B32 { 3844759569 } else { 4008740938959785536 },
+            hash(i8::MAX) == if B32 { 3293686765 } else { 11731928859014764671 },
+
+            hash(i16::MIN) == if B32 { 1073764727 } else { 8292620222579070801 },
+            hash(0_i16) == 0,
+            hash(1_i16) == if B32 { 3001993707 } else { 12583873379513078615 },
+            hash(100_i16) == if B32 { 3844759569 } else { 4008740938959785536 },
+            hash(i16::MAX) == if B32 { 2366738315 } else { 14155490916776592377 },
+
+            hash(i32::MIN) == if B32 { 16384 } else { 5631751334026900245 },
+            hash(0_i32) == 0,
+            hash(1_i32) == if B32 { 3001993707 } else { 12583873379513078615 },
+            hash(100_i32) == if B32 { 3844759569 } else { 4008740938959785536 },
+            hash(i32::MAX) == if B32 { 1293022740 } else { 11494622028224421821 },
+
+            hash(i64::MIN) == if B32 { 16384 } else { 524288 },
+            hash(0_i64) == 0,
+            hash(1_i64) == if B32 { 275023839 } else { 12583873379513078615 },
+            hash(100_i64) == if B32 { 1732383522 } else { 4008740938959785536 },
+            hash(i64::MAX) == if B32 { 1017998901 } else { 5862870694198045864 },
+
+            hash(i128::MIN) == if B32 { 16384 } else { 524288 },
+            hash(0_i128) == 0,
+            hash(1_i128) == if B32 { 1860738631 } else { 12885773367358079611 },
+            hash(100_i128) == if B32 { 1389515751 } else { 15751995649841559633 },
+            hash(i128::MAX) == if B32 { 2156005629 } else { 11423841400549517868 },
+
+            hash(isize::MIN) == if B32 { 16384 } else { 524288 },
+            hash(0_isize) == 0,
+            hash(1_isize) == if B32 { 3001993707 } else { 12583873379513078615 },
+            hash(100_isize) == if B32 { 3844759569 } else { 4008740938959785536 },
+            hash(isize::MAX) == if B32 { 1293022740 } else { 5862870694198045864 },
         }
     }
 
@@ -400,13 +400,13 @@ mod tests {
     #[test]
     fn bytes() {
         test_hash! {
-            hash(HashBytes(&[])) == if B32 { 2673204745 } else { 5175017818631658678 },  
-            hash(HashBytes(&[0])) == if B32 { 2948228584 } else { 11037888512829180254 }, 
-            hash(HashBytes(&[0, 0, 0, 0, 0, 0])) == if B32 { 3223252423 } else { 6891281800865632452 },  
-            hash(HashBytes(&[1])) == if B32 { 2943445104 } else { 4127763515449136980 },  
-            hash(HashBytes(&[2])) == if B32 { 1055423297 } else { 11322700005987241762 }, 
-            hash(HashBytes(b"uwu")) == if B32 { 2699662140 } else { 2129615206728903013 },  
-            hash(HashBytes(b"These are some bytes for testing rustc_hash.")) == if B32 { 2303640537 } else { 5513083560975408889 },  
+            hash(HashBytes(&[])) == if B32 { 2673204745 } else { 5175017818631658678 },
+            hash(HashBytes(&[0])) == if B32 { 2948228584 } else { 11037888512829180254 },
+            hash(HashBytes(&[0, 0, 0, 0, 0, 0])) == if B32 { 3223252423 } else { 6891281800865632452 },
+            hash(HashBytes(&[1])) == if B32 { 2943445104 } else { 4127763515449136980 },
+            hash(HashBytes(&[2])) == if B32 { 1055423297 } else { 11322700005987241762 },
+            hash(HashBytes(b"uwu")) == if B32 { 2699662140 } else { 2129615206728903013 },
+            hash(HashBytes(b"These are some bytes for testing rustc_hash.")) == if B32 { 2303640537 } else { 5513083560975408889 },
         }
     }
 

From 4f6be25b23c194281fcfc8c42ae4473ec9231fb0 Mon Sep 17 00:00:00 2001
From: Orson Peters <orsonpeters@gmail.com>
Date: Sat, 18 May 2024 14:22:32 +0200
Subject: [PATCH 3/6] address review comments

---
 README.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 2a1b0cc..36b13ca 100644
--- a/README.md
+++ b/README.md
@@ -5,8 +5,8 @@
 
 A speedy, non-cryptographic hashing algorithm used by `rustc`.
 The [hash map in `std`](https://doc.rust-lang.org/std/collections/struct.HashMap.html) uses SipHash by default, which provides resistance against DOS attacks.
-These attacks aren't as much of a concern in the compiler so we prefer to use a
-quicker, non-cryptographic hash algorithm.
+These attacks aren't a concern in the compiler so we prefer to use a quicker,
+non-cryptographic hash algorithm.
 
 The original hash algorithm provided by this crate was one taken from Firefox,
 hence the hasher it provides is called FxHasher. This name is kept for backwards
@@ -15,10 +15,10 @@ design for the hasher is a polynomial hash finished with a single bit rotation,
 together with a wyhash-inspired compression function for strings/slices, both
 designed by Orson Peters.
 
-Within `rustc`, it consistently outperforms every other tested algorithm or
-variation, despite its simplicity. Spending more CPU cycles on a higher quality
-hash does not reduce hash collisions enough to be worth the cost on real-world
-benchmarks.
+For `rustc` we have tried many different hashing algorithms. Hashing speed is
+critical, especially for single integers. Spending more CPU cycles on a higher
+quality hash does not reduce hash collisions enough to make the compiler faster
+on real-world benchmarks.
 
 ## Usage
 

From e07b577e6cb9d0cc42262b87cdcac32ee252c90f Mon Sep 17 00:00:00 2001
From: Orson Peters <orsonpeters@gmail.com>
Date: Sat, 18 May 2024 14:22:47 +0200
Subject: [PATCH 4/6] fix unused function argument warning

---
 src/lib.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lib.rs b/src/lib.rs
index c08d4f3..b967451 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -146,7 +146,7 @@ impl Hasher for FxHasher {
 
     #[cfg(feature = "nightly")]
     #[inline]
-    fn write_length_prefix(&mut self, len: usize) {
+    fn write_length_prefix(&mut self, _len: usize) {
         // Most cases will specialize hash_slice anyway which calls write(),
         // which encodes the length already.
     }

From d3453abd348365257245a4070f8937b07c44d340 Mon Sep 17 00:00:00 2001
From: Orson Peters <orsonpeters@gmail.com>
Date: Sat, 18 May 2024 23:47:07 +0200
Subject: [PATCH 5/6] Address more review comments

---
 src/lib.rs | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index b967451..62fa223 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -104,6 +104,7 @@ impl FxHasher {
 impl Hasher for FxHasher {
     #[inline]
     fn write(&mut self, bytes: &[u8]) {
+        // Compress the byte string to a single u64 and add to our hash.
         self.write_u64(hash_bytes(bytes));
     }
 
@@ -147,14 +148,19 @@ impl Hasher for FxHasher {
     #[cfg(feature = "nightly")]
     #[inline]
     fn write_length_prefix(&mut self, _len: usize) {
-        // Most cases will specialize hash_slice anyway which calls write(),
-        // which encodes the length already.
+        // Most cases will specialize hash_slice to call write(), which encodes
+        // the length already in a more efficient manner than we could here. For
+        // HashDoS-resistance you would still need to include this for the
+        // non-slice collection hashes, but for the purposes of rustc we do not
+        // care and do not wish to pay the performance penalty of mixing in len
+        // for those collections.
     }
 
     #[cfg(feature = "nightly")]
     #[inline]
     fn write_str(&mut self, s: &str) {
-        // We don't need anything special here.
+        // Similarly here, write already encodes the length, so nothing special
+        // is needed.
         self.write(s.as_bytes())
     }
 

From 4cb9c7cfd5349b7f5a9c40d8f9ad40404fdd130d Mon Sep 17 00:00:00 2001
From: Orson Peters <orsonpeters@gmail.com>
Date: Tue, 28 May 2024 11:49:23 +0200
Subject: [PATCH 6/6] Remove mention of firefox from Cargo.toml.

---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 4ed7249..e6686c4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,7 +2,7 @@
 name = "rustc-hash"
 version = "1.1.0"
 authors = ["The Rust Project Developers"]
-description = "A speedy, non-cryptographic hashing algorithm used by rustc and Firefox"
+description = "A speedy, non-cryptographic hashing algorithm used by rustc"
 license = "Apache-2.0/MIT"
 readme = "README.md"
 keywords = ["hash", "hasher", "fxhash", "rustc"]