Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fast path for Path::cmp that skips over long shared prefixes #86898

Merged
merged 3 commits into from
Aug 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 40 additions & 7 deletions library/std/src/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -962,16 +962,49 @@ impl cmp::Eq for Components<'_> {}
impl<'a> cmp::PartialOrd for Components<'a> {
#[inline]
fn partial_cmp(&self, other: &Components<'a>) -> Option<cmp::Ordering> {
Iterator::partial_cmp(self.clone(), other.clone())
Some(compare_components(self.clone(), other.clone()))
}
}

#[stable(feature = "rust1", since = "1.0.0")]
impl cmp::Ord for Components<'_> {
#[inline]
fn cmp(&self, other: &Self) -> cmp::Ordering {
Iterator::cmp(self.clone(), other.clone())
compare_components(self.clone(), other.clone())
}
}

fn compare_components(mut left: Components<'_>, mut right: Components<'_>) -> cmp::Ordering {
// Fast path for long shared prefixes
//
// - compare raw bytes to find first mismatch
// - backtrack to find separator before mismatch to avoid ambiguous parsings of '.' or '..' characters
// - if found update state to only do a component-wise comparison on the remainder,
// otherwise do it on the full path
//
// The fast path isn't taken for paths with a PrefixComponent to avoid backtracking into
// the middle of one
if left.prefix.is_none() && right.prefix.is_none() && left.front == right.front {
// this might benefit from a [u8]::first_mismatch simd implementation, if it existed
let first_difference =
match left.path.iter().zip(right.path.iter()).position(|(&a, &b)| a != b) {
None if left.path.len() == right.path.len() => return cmp::Ordering::Equal,
None => left.path.len().min(right.path.len()),
Some(diff) => diff,
};

if let Some(previous_sep) =
left.path[..first_difference].iter().rposition(|&b| left.is_sep_byte(b))
{
let mismatched_component_start = previous_sep + 1;
left.path = &left.path[mismatched_component_start..];
left.front = State::Body;
right.path = &right.path[mismatched_component_start..];
right.front = State::Body;
}
}

Iterator::cmp(left, right)
}

/// An iterator over [`Path`] and its ancestors.
Expand Down Expand Up @@ -1704,15 +1737,15 @@ impl cmp::Eq for PathBuf {}
impl cmp::PartialOrd for PathBuf {
#[inline]
fn partial_cmp(&self, other: &PathBuf) -> Option<cmp::Ordering> {
self.components().partial_cmp(other.components())
Some(compare_components(self.components(), other.components()))
}
}

#[stable(feature = "rust1", since = "1.0.0")]
impl cmp::Ord for PathBuf {
#[inline]
fn cmp(&self, other: &PathBuf) -> cmp::Ordering {
self.components().cmp(other.components())
compare_components(self.components(), other.components())
}
}

Expand Down Expand Up @@ -2686,7 +2719,7 @@ impl fmt::Display for Display<'_> {
impl cmp::PartialEq for Path {
#[inline]
fn eq(&self, other: &Path) -> bool {
self.components().eq(other.components())
self.components() == other.components()
}
}

Expand All @@ -2706,15 +2739,15 @@ impl cmp::Eq for Path {}
impl cmp::PartialOrd for Path {
#[inline]
fn partial_cmp(&self, other: &Path) -> Option<cmp::Ordering> {
self.components().partial_cmp(other.components())
Some(compare_components(self.components(), other.components()))
}
}

#[stable(feature = "rust1", since = "1.0.0")]
impl cmp::Ord for Path {
#[inline]
fn cmp(&self, other: &Path) -> cmp::Ordering {
self.components().cmp(other.components())
compare_components(self.components(), other.components())
}
}

Expand Down
68 changes: 68 additions & 0 deletions library/std/src/path/tests.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use super::*;

use crate::collections::BTreeSet;
use crate::rc::Rc;
use crate::sync::Arc;
use core::hint::black_box;

macro_rules! t(
($path:expr, iter: $iter:expr) => (
Expand Down Expand Up @@ -1392,3 +1394,69 @@ fn into_rc() {
assert_eq!(&*rc2, path);
assert_eq!(&*arc2, path);
}

#[test]
fn test_ord() {
macro_rules! ord(
($ord:ident, $left:expr, $right:expr) => ( {
assert_eq!(Path::new($left).cmp(&Path::new($right)), core::cmp::Ordering::$ord);
});
);

ord!(Less, "1", "2");
ord!(Less, "/foo/bar", "/foo./bar");
ord!(Less, "foo/bar", "foo/bar.");
ord!(Equal, "foo/./bar", "foo/bar/");
ord!(Equal, "foo/bar", "foo/bar/");
ord!(Equal, "foo/bar", "foo/bar/.");
ord!(Equal, "foo/bar", "foo/bar//");
}

#[bench]
fn bench_path_cmp_fast_path_buf_sort(b: &mut test::Bencher) {
let prefix = "my/home";
let mut paths: Vec<_> =
(0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect();

paths.sort();

b.iter(|| {
black_box(paths.as_mut_slice()).sort_unstable();
});
}

#[bench]
fn bench_path_cmp_fast_path_long(b: &mut test::Bencher) {
let prefix = "/my/home/is/my/castle/and/my/castle/has/a/rusty/workbench/";
let paths: Vec<_> =
(0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect();

let mut set = BTreeSet::new();

paths.iter().for_each(|p| {
set.insert(p.as_path());
});

b.iter(|| {
set.remove(paths[500].as_path());
set.insert(paths[500].as_path());
});
}

#[bench]
fn bench_path_cmp_fast_path_short(b: &mut test::Bencher) {
let prefix = "my/home";
let paths: Vec<_> =
(0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect();

let mut set = BTreeSet::new();

paths.iter().for_each(|p| {
set.insert(p.as_path());
});

b.iter(|| {
set.remove(paths[500].as_path());
set.insert(paths[500].as_path());
});
}