diff --git a/library/std/src/path.rs b/library/std/src/path.rs index dc0c735a06c6f..cf2cd5adc4848 100644 --- a/library/std/src/path.rs +++ b/library/std/src/path.rs @@ -979,6 +979,25 @@ impl FusedIterator for Components<'_> {} impl<'a> cmp::PartialEq for Components<'a> { #[inline] fn eq(&self, other: &Components<'a>) -> bool { + let Components { path: _, front: _, back: _, has_physical_root: _, prefix: _ } = self; + + // Fast path for exact matches, e.g. for hashmap lookups. + // Don't explicitly compare the prefix or has_physical_root fields since they'll + // either be covered by the `path` buffer or are only relevant for `prefix_verbatim()`. + if self.path.len() == other.path.len() + && self.front == other.front + && self.back == State::Body + && other.back == State::Body + && self.prefix_verbatim() == other.prefix_verbatim() + { + // possible future improvement: this could bail out earlier if there were a + // reverse memcmp/bcmp comparing back to front + if self.path == other.path { + return true; + } + } + + // compare back to front since absolute paths often share long prefixes Iterator::eq(self.clone().rev(), other.clone().rev()) } } @@ -1013,13 +1032,12 @@ fn compare_components(mut left: Components<'_>, mut right: Components<'_>) -> cm // The fast path isn't taken for paths with a PrefixComponent to avoid backtracking into // the middle of one if left.prefix.is_none() && right.prefix.is_none() && left.front == right.front { - // this might benefit from a [u8]::first_mismatch simd implementation, if it existed - let first_difference = - match left.path.iter().zip(right.path.iter()).position(|(&a, &b)| a != b) { - None if left.path.len() == right.path.len() => return cmp::Ordering::Equal, - None => left.path.len().min(right.path.len()), - Some(diff) => diff, - }; + // possible future improvement: a [u8]::first_mismatch simd implementation + let first_difference = match left.path.iter().zip(right.path).position(|(&a, &b)| a != b) { + None if left.path.len() == right.path.len() => return cmp::Ordering::Equal, + None => left.path.len().min(right.path.len()), + Some(diff) => diff, + }; if let Some(previous_sep) = left.path[..first_difference].iter().rposition(|&b| left.is_sep_byte(b)) @@ -2873,9 +2891,43 @@ impl cmp::PartialEq for Path { #[stable(feature = "rust1", since = "1.0.0")] impl Hash for Path { fn hash(&self, h: &mut H) { - for component in self.components() { - component.hash(h); + let bytes = self.as_u8_slice(); + let prefix_len = match parse_prefix(&self.inner) { + Some(prefix) => { + prefix.hash(h); + prefix.len() + } + None => 0, + }; + let bytes = &bytes[prefix_len..]; + + let mut component_start = 0; + let mut bytes_hashed = 0; + + for i in 0..bytes.len() { + if is_sep_byte(bytes[i]) { + if i > component_start { + let to_hash = &bytes[component_start..i]; + h.write(to_hash); + bytes_hashed += to_hash.len(); + } + + // skip over separator and optionally a following CurDir item + // since components() would normalize these away + component_start = i + match bytes[i..] { + [_, b'.', b'/', ..] | [_, b'.'] => 2, + _ => 1, + }; + } + } + + if component_start < bytes.len() { + let to_hash = &bytes[component_start..]; + h.write(to_hash); + bytes_hashed += to_hash.len(); } + + h.write_usize(bytes_hashed); } } diff --git a/library/std/src/path/tests.rs b/library/std/src/path/tests.rs index 0a16ff2a721ce..2bf499e1ab823 100644 --- a/library/std/src/path/tests.rs +++ b/library/std/src/path/tests.rs @@ -1,6 +1,8 @@ use super::*; -use crate::collections::BTreeSet; +use crate::collections::hash_map::DefaultHasher; +use crate::collections::{BTreeSet, HashSet}; +use crate::hash::Hasher; use crate::rc::Rc; use crate::sync::Arc; use core::hint::black_box; @@ -1632,7 +1634,25 @@ fn into_rc() { fn test_ord() { macro_rules! ord( ($ord:ident, $left:expr, $right:expr) => ( { - assert_eq!(Path::new($left).cmp(&Path::new($right)), core::cmp::Ordering::$ord); + use core::cmp::Ordering; + + let left = Path::new($left); + let right = Path::new($right); + assert_eq!(left.cmp(&right), Ordering::$ord); + if (core::cmp::Ordering::$ord == Ordering::Equal) { + assert_eq!(left, right); + + let mut hasher = DefaultHasher::new(); + left.hash(&mut hasher); + let left_hash = hasher.finish(); + hasher = DefaultHasher::new(); + right.hash(&mut hasher); + let right_hash = hasher.finish(); + + assert_eq!(left_hash, right_hash, "hashes for {:?} and {:?} must match", left, right); + } else { + assert_ne!(left, right); + } }); ); @@ -1693,3 +1713,59 @@ fn bench_path_cmp_fast_path_short(b: &mut test::Bencher) { set.insert(paths[500].as_path()); }); } + +#[bench] +fn bench_path_hashset(b: &mut test::Bencher) { + let prefix = "/my/home/is/my/castle/and/my/castle/has/a/rusty/workbench/"; + let paths: Vec<_> = + (0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect(); + + let mut set = HashSet::new(); + + paths.iter().for_each(|p| { + set.insert(p.as_path()); + }); + + b.iter(|| { + set.remove(paths[500].as_path()); + set.insert(black_box(paths[500].as_path())) + }); +} + +#[bench] +fn bench_path_hashset_miss(b: &mut test::Bencher) { + let prefix = "/my/home/is/my/castle/and/my/castle/has/a/rusty/workbench/"; + let paths: Vec<_> = + (0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect(); + + let mut set = HashSet::new(); + + paths.iter().for_each(|p| { + set.insert(p.as_path()); + }); + + let probe = PathBuf::from(prefix).join("other"); + + b.iter(|| set.remove(black_box(probe.as_path()))); +} + +#[bench] +fn bench_hash_path_short(b: &mut test::Bencher) { + let mut hasher = DefaultHasher::new(); + let path = Path::new("explorer.exe"); + + b.iter(|| black_box(path).hash(&mut hasher)); + + black_box(hasher.finish()); +} + +#[bench] +fn bench_hash_path_long(b: &mut test::Bencher) { + let mut hasher = DefaultHasher::new(); + let path = + Path::new("/aaaaa/aaaaaa/./../aaaaaaaa/bbbbbbbbbbbbb/ccccccccccc/ddddddddd/eeeeeee.fff"); + + b.iter(|| black_box(path).hash(&mut hasher)); + + black_box(hasher.finish()); +} diff --git a/library/std/src/sys/unix/path.rs b/library/std/src/sys/unix/path.rs index 840a7ae042625..717add9ec48db 100644 --- a/library/std/src/sys/unix/path.rs +++ b/library/std/src/sys/unix/path.rs @@ -11,6 +11,7 @@ pub fn is_verbatim_sep(b: u8) -> bool { b == b'/' } +#[inline] pub fn parse_prefix(_: &OsStr) -> Option> { None }