From 69a546768e7b14935cbeac65cafad0c6ac00d365 Mon Sep 17 00:00:00 2001 From: Stiopa Koltsov Date: Mon, 15 Jan 2024 18:01:58 -0800 Subject: [PATCH] Switch to fxhash Summary: Mostly for buck2, but starlark should benefit from it too. `ahash` is supposed to to be the fastest but: - it does not guarantee to produce the same result on different machines, and generally being deterministic is not their goal - moreover, `ahash` works the best when AES instructions are available, which are not always available. In particular, when compiled by default on x86_64 (`rustc -O`), ahash works without AES. `fxhash` is roughly the same as `fnv`, but processes words instead of bytes. For both starlark and buck2 being deterministic is very important. We try carefully to use ordered collections like `SmallMap` where deterministic output is needed, so technically deterministic hashing is not important. However, I'm not 100% sure that we do that correctly in every single place, and we won't make such error in the future, so stable hasher is safer option even if performance is suboptimal. For the same reason I picked `FxHasher64` instead of `FxHasher` to make hasher behave identically on 32 and 64 bit machines. However, `FxHasher64` (or `FxHasher`) hashes strings reading words in native endianness. So if someone compiles buck2 on POWER, it may behave differently. This is not an issue practically, but ideally we should fix it. `cquery deps(...)` spends most of the time hashing. This diff makes it better. Reviewed By: dtolnay Differential Revision: D52787463 fbshipit-source-id: d8e4cd52f36f7ed7d679d08cdf31e6d00421d84c --- starlark_map/Cargo.toml | 2 +- starlark_map/src/hasher.rs | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/starlark_map/Cargo.toml b/starlark_map/Cargo.toml index 0faab2642..19903c2fa 100644 --- a/starlark_map/Cargo.toml +++ b/starlark_map/Cargo.toml @@ -15,7 +15,7 @@ allocative = { workspace = true, features = ["hashbrown"] } dupe = { workspace = true } equivalent = { workspace = true } -fnv = "1.0.7" +fxhash = "0.2.1" hashbrown = { version = "0.12.3", features = ["raw"] } serde = { version = "1.0", features = ["derive"] } diff --git a/starlark_map/src/hasher.rs b/starlark_map/src/hasher.rs index a85422637..0443c6599 100644 --- a/starlark_map/src/hasher.rs +++ b/starlark_map/src/hasher.rs @@ -19,7 +19,7 @@ use std::hash::BuildHasher; use std::hash::Hasher; use dupe::Dupe; -use fnv::FnvHasher; +use fxhash::FxHasher64; use crate::hash_value::StarlarkHashValue; @@ -27,7 +27,10 @@ use crate::hash_value::StarlarkHashValue; /// /// Starlark relies on stable hashing, and this is the hasher. #[derive(Default)] -pub struct StarlarkHasher(FnvHasher); +pub struct StarlarkHasher( + // TODO(nga): `FxHasher64` is endian-dependent, this is not right. + FxHasher64, +); impl StarlarkHasher { /// Creates a new hasher.