Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize Eq and Hash for Path/PathBuf #90596

Merged
merged 5 commits into from
Nov 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 61 additions & 9 deletions library/std/src/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -979,6 +979,25 @@ impl FusedIterator for Components<'_> {}
impl<'a> cmp::PartialEq for Components<'a> {
#[inline]
fn eq(&self, other: &Components<'a>) -> bool {
let Components { path: _, front: _, back: _, has_physical_root: _, prefix: _ } = self;

// Fast path for exact matches, e.g. for hashmap lookups.
// Don't explicitly compare the prefix or has_physical_root fields since they'll
// either be covered by the `path` buffer or are only relevant for `prefix_verbatim()`.
if self.path.len() == other.path.len()
&& self.front == other.front
&& self.back == State::Body
&& other.back == State::Body
&& self.prefix_verbatim() == other.prefix_verbatim()
{
// possible future improvement: this could bail out earlier if there were a
// reverse memcmp/bcmp comparing back to front
if self.path == other.path {
Mark-Simulacrum marked this conversation as resolved.
Show resolved Hide resolved
return true;
}
}

// compare back to front since absolute paths often share long prefixes
Iterator::eq(self.clone().rev(), other.clone().rev())
}
}
Expand Down Expand Up @@ -1013,13 +1032,12 @@ fn compare_components(mut left: Components<'_>, mut right: Components<'_>) -> cm
// The fast path isn't taken for paths with a PrefixComponent to avoid backtracking into
// the middle of one
if left.prefix.is_none() && right.prefix.is_none() && left.front == right.front {
// this might benefit from a [u8]::first_mismatch simd implementation, if it existed
let first_difference =
match left.path.iter().zip(right.path.iter()).position(|(&a, &b)| a != b) {
None if left.path.len() == right.path.len() => return cmp::Ordering::Equal,
None => left.path.len().min(right.path.len()),
Some(diff) => diff,
};
// possible future improvement: a [u8]::first_mismatch simd implementation
let first_difference = match left.path.iter().zip(right.path).position(|(&a, &b)| a != b) {
None if left.path.len() == right.path.len() => return cmp::Ordering::Equal,
None => left.path.len().min(right.path.len()),
Some(diff) => diff,
};

if let Some(previous_sep) =
left.path[..first_difference].iter().rposition(|&b| left.is_sep_byte(b))
Expand Down Expand Up @@ -2873,9 +2891,43 @@ impl cmp::PartialEq for Path {
#[stable(feature = "rust1", since = "1.0.0")]
impl Hash for Path {
fn hash<H: Hasher>(&self, h: &mut H) {
for component in self.components() {
component.hash(h);
let bytes = self.as_u8_slice();
let prefix_len = match parse_prefix(&self.inner) {
Some(prefix) => {
prefix.hash(h);
prefix.len()
}
None => 0,
};
let bytes = &bytes[prefix_len..];

let mut component_start = 0;
let mut bytes_hashed = 0;

for i in 0..bytes.len() {
if is_sep_byte(bytes[i]) {
if i > component_start {
let to_hash = &bytes[component_start..i];
h.write(to_hash);
bytes_hashed += to_hash.len();
}

// skip over separator and optionally a following CurDir item
// since components() would normalize these away
component_start = i + match bytes[i..] {
[_, b'.', b'/', ..] | [_, b'.'] => 2,
_ => 1,
};
}
}

if component_start < bytes.len() {
let to_hash = &bytes[component_start..];
h.write(to_hash);
bytes_hashed += to_hash.len();
}

h.write_usize(bytes_hashed);
Mark-Simulacrum marked this conversation as resolved.
Show resolved Hide resolved
}
}

Expand Down
80 changes: 78 additions & 2 deletions library/std/src/path/tests.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use super::*;

use crate::collections::BTreeSet;
use crate::collections::hash_map::DefaultHasher;
use crate::collections::{BTreeSet, HashSet};
use crate::hash::Hasher;
use crate::rc::Rc;
use crate::sync::Arc;
use core::hint::black_box;
Expand Down Expand Up @@ -1632,7 +1634,25 @@ fn into_rc() {
fn test_ord() {
macro_rules! ord(
($ord:ident, $left:expr, $right:expr) => ( {
assert_eq!(Path::new($left).cmp(&Path::new($right)), core::cmp::Ordering::$ord);
use core::cmp::Ordering;

let left = Path::new($left);
let right = Path::new($right);
assert_eq!(left.cmp(&right), Ordering::$ord);
if (core::cmp::Ordering::$ord == Ordering::Equal) {
assert_eq!(left, right);

let mut hasher = DefaultHasher::new();
left.hash(&mut hasher);
let left_hash = hasher.finish();
hasher = DefaultHasher::new();
right.hash(&mut hasher);
let right_hash = hasher.finish();

assert_eq!(left_hash, right_hash, "hashes for {:?} and {:?} must match", left, right);
} else {
assert_ne!(left, right);
}
});
);

Expand Down Expand Up @@ -1693,3 +1713,59 @@ fn bench_path_cmp_fast_path_short(b: &mut test::Bencher) {
set.insert(paths[500].as_path());
});
}

#[bench]
fn bench_path_hashset(b: &mut test::Bencher) {
let prefix = "/my/home/is/my/castle/and/my/castle/has/a/rusty/workbench/";
let paths: Vec<_> =
(0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect();

let mut set = HashSet::new();

paths.iter().for_each(|p| {
set.insert(p.as_path());
});

b.iter(|| {
set.remove(paths[500].as_path());
set.insert(black_box(paths[500].as_path()))
});
}

#[bench]
fn bench_path_hashset_miss(b: &mut test::Bencher) {
let prefix = "/my/home/is/my/castle/and/my/castle/has/a/rusty/workbench/";
let paths: Vec<_> =
(0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect();

let mut set = HashSet::new();

paths.iter().for_each(|p| {
set.insert(p.as_path());
});

let probe = PathBuf::from(prefix).join("other");

b.iter(|| set.remove(black_box(probe.as_path())));
}

#[bench]
fn bench_hash_path_short(b: &mut test::Bencher) {
let mut hasher = DefaultHasher::new();
let path = Path::new("explorer.exe");

b.iter(|| black_box(path).hash(&mut hasher));

black_box(hasher.finish());
}

#[bench]
fn bench_hash_path_long(b: &mut test::Bencher) {
let mut hasher = DefaultHasher::new();
let path =
Path::new("/aaaaa/aaaaaa/./../aaaaaaaa/bbbbbbbbbbbbb/ccccccccccc/ddddddddd/eeeeeee.fff");

b.iter(|| black_box(path).hash(&mut hasher));

black_box(hasher.finish());
}
1 change: 1 addition & 0 deletions library/std/src/sys/unix/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pub fn is_verbatim_sep(b: u8) -> bool {
b == b'/'
}

#[inline]
pub fn parse_prefix(_: &OsStr) -> Option<Prefix<'_>> {
None
}
Expand Down