From d4c83eeb8ac097c7ce47f06c87af5d4d054351a6 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Sat, 27 Apr 2024 21:11:28 +0200 Subject: [PATCH 1/8] Do not call rm_rf_glob in a loop for paths that do not depend on file_types --- src/cargo/ops/cargo_clean.rs | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/cargo/ops/cargo_clean.rs b/src/cargo/ops/cargo_clean.rs index 32a1d4d9d47..a270db1b19a 100644 --- a/src/cargo/ops/cargo_clean.rs +++ b/src/cargo/ops/cargo_clean.rs @@ -212,28 +212,15 @@ fn clean_specs( TargetKind::Test | TargetKind::Bench => (layout.deps(), None), _ => (layout.deps(), Some(layout.dest())), }; + let dir_glob = escape_glob_path(dir)?; + let dir_glob = Path::new(&dir_glob); for file_type in file_types { // Some files include a hash in the filename, some don't. let hashed_name = file_type.output_filename(target, Some("*")); let unhashed_name = file_type.output_filename(target, None); - let dir_glob = escape_glob_path(dir)?; - let dir_glob = Path::new(&dir_glob); clean_ctx.rm_rf_glob(&dir_glob.join(&hashed_name))?; clean_ctx.rm_rf(&dir.join(&unhashed_name))?; - // Remove dep-info file generated by rustc. It is not tracked in - // file_types. It does not have a prefix. - let hashed_dep_info = dir_glob.join(format!("{}-*.d", crate_name)); - clean_ctx.rm_rf_glob(&hashed_dep_info)?; - let unhashed_dep_info = dir.join(format!("{}.d", crate_name)); - clean_ctx.rm_rf(&unhashed_dep_info)?; - // Remove split-debuginfo files generated by rustc. - let split_debuginfo_obj = dir_glob.join(format!("{}.*.o", crate_name)); - clean_ctx.rm_rf_glob(&split_debuginfo_obj)?; - let split_debuginfo_dwo = dir_glob.join(format!("{}.*.dwo", crate_name)); - clean_ctx.rm_rf_glob(&split_debuginfo_dwo)?; - let split_debuginfo_dwp = dir_glob.join(format!("{}.*.dwp", crate_name)); - clean_ctx.rm_rf_glob(&split_debuginfo_dwp)?; // Remove the uplifted copy. if let Some(uplift_dir) = uplift_dir { @@ -244,6 +231,20 @@ fn clean_specs( clean_ctx.rm_rf(&dep_info)?; } } + // Remove dep-info file generated by rustc. It is not tracked in + // file_types. It does not have a prefix. + let hashed_dep_info = dir_glob.join(format!("{}-*.d", crate_name)); + clean_ctx.rm_rf_glob(&hashed_dep_info)?; + let unhashed_dep_info = dir.join(format!("{}.d", crate_name)); + clean_ctx.rm_rf(&unhashed_dep_info)?; + // Remove split-debuginfo files generated by rustc. + let split_debuginfo_obj = dir_glob.join(format!("{}.*.o", crate_name)); + clean_ctx.rm_rf_glob(&split_debuginfo_obj)?; + let split_debuginfo_dwo = dir_glob.join(format!("{}.*.dwo", crate_name)); + clean_ctx.rm_rf_glob(&split_debuginfo_dwo)?; + let split_debuginfo_dwp = dir_glob.join(format!("{}.*.dwp", crate_name)); + clean_ctx.rm_rf_glob(&split_debuginfo_dwp)?; + // TODO: what to do about build_script_build? let dir = escape_glob_path(layout.incremental())?; let incremental = Path::new(&dir).join(format!("{}-*", crate_name)); From 9c685abf7ffcd8902c697cc51230dc7bad1d8f6a Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Sun, 28 Apr 2024 00:07:41 +0200 Subject: [PATCH 2/8] Add rm_rf_prefix_list --- src/cargo/ops/cargo_clean.rs | 46 ++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/src/cargo/ops/cargo_clean.rs b/src/cargo/ops/cargo_clean.rs index a270db1b19a..896eb6d0d31 100644 --- a/src/cargo/ops/cargo_clean.rs +++ b/src/cargo/ops/cargo_clean.rs @@ -193,6 +193,8 @@ fn clean_specs( continue; } let crate_name = target.crate_name(); + let path_dot = format!("{crate_name}."); + let path_dash = format!("{crate_name}-"); for &mode in &[ CompileMode::Build, CompileMode::Test, @@ -212,8 +214,8 @@ fn clean_specs( TargetKind::Test | TargetKind::Bench => (layout.deps(), None), _ => (layout.deps(), Some(layout.dest())), }; - let dir_glob = escape_glob_path(dir)?; - let dir_glob = Path::new(&dir_glob); + let mut dir_glob_str = escape_glob_path(dir)?; + let dir_glob = Path::new(&dir_glob_str); for file_type in file_types { // Some files include a hash in the filename, some don't. let hashed_name = file_type.output_filename(target, Some("*")); @@ -233,17 +235,21 @@ fn clean_specs( } // Remove dep-info file generated by rustc. It is not tracked in // file_types. It does not have a prefix. - let hashed_dep_info = dir_glob.join(format!("{}-*.d", crate_name)); - clean_ctx.rm_rf_glob(&hashed_dep_info)?; let unhashed_dep_info = dir.join(format!("{}.d", crate_name)); clean_ctx.rm_rf(&unhashed_dep_info)?; // Remove split-debuginfo files generated by rustc. - let split_debuginfo_obj = dir_glob.join(format!("{}.*.o", crate_name)); - clean_ctx.rm_rf_glob(&split_debuginfo_obj)?; - let split_debuginfo_dwo = dir_glob.join(format!("{}.*.dwo", crate_name)); - clean_ctx.rm_rf_glob(&split_debuginfo_dwo)?; - let split_debuginfo_dwp = dir_glob.join(format!("{}.*.dwp", crate_name)); - clean_ctx.rm_rf_glob(&split_debuginfo_dwp)?; + + let paths = [ + (path_dash.as_str(), ".d"), + (path_dot.as_str(), ".o"), + (path_dot.as_str(), ".dwo"), + (path_dot.as_str(), ".dwp"), + ]; + if !dir_glob_str.ends_with(std::path::MAIN_SEPARATOR) { + dir_glob_str.push(std::path::MAIN_SEPARATOR); + } + dir_glob_str.push('*'); + clean_ctx.rm_rf_prefix_list(&dir_glob_str.as_str(), &paths)?; // TODO: what to do about build_script_build? let dir = escape_glob_path(layout.incremental())?; @@ -323,6 +329,26 @@ impl<'gctx> CleanContext<'gctx> { Ok(()) } + fn rm_rf_prefix_list( + &mut self, + pattern: &str, + path_matchers: &[(&str, &str)], + ) -> CargoResult<()> { + // TODO: Display utf8 warning to user? Or switch to globset? + + for path in glob::glob(pattern)? { + let path = path?; + let filename = path.file_name().and_then(|name| name.to_str()).unwrap(); + if path_matchers + .iter() + .any(|(prefix, suffix)| filename.starts_with(prefix) && filename.ends_with(suffix)) + { + self.rm_rf(&path)?; + } + } + Ok(()) + } + pub fn rm_rf(&mut self, path: &Path) -> CargoResult<()> { let meta = match fs::symlink_metadata(path) { Ok(meta) => meta, From c770700885d486b5bf7d360afb269f90c50780db Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Sun, 28 Apr 2024 12:51:53 +0200 Subject: [PATCH 3/8] Clean non-filetype based entries just once --- src/cargo/ops/cargo_clean.rs | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/src/cargo/ops/cargo_clean.rs b/src/cargo/ops/cargo_clean.rs index 896eb6d0d31..a3e8a9b37ab 100644 --- a/src/cargo/ops/cargo_clean.rs +++ b/src/cargo/ops/cargo_clean.rs @@ -8,8 +8,10 @@ use crate::util::interning::InternedString; use crate::util::{human_readable_bytes, GlobalContext, Progress, ProgressStyle}; use anyhow::bail; use cargo_util::paths; +use std::collections::{HashMap, HashSet}; use std::fs; use std::path::{Path, PathBuf}; +use std::rc::Rc; pub struct CleanOptions<'gctx> { pub gctx: &'gctx GlobalContext, @@ -168,7 +170,7 @@ fn clean_specs( let packages = pkg_set.get_many(pkg_ids)?; clean_ctx.progress = Box::new(CleaningPackagesBar::new(clean_ctx.gctx, packages.len())); - + let mut dirs_to_clean: HashMap<_, HashSet<_>> = HashMap::new(); for pkg in packages { let pkg_dir = format!("{}-*", pkg.name()); clean_ctx.progress.on_cleaning_package(&pkg.name())?; @@ -193,8 +195,9 @@ fn clean_specs( continue; } let crate_name = target.crate_name(); - let path_dot = format!("{crate_name}."); - let path_dash = format!("{crate_name}-"); + let path_dot: Rc = format!("{crate_name}.").into(); + let path_dash: Rc = format!("{crate_name}-").into(); + for &mode in &[ CompileMode::Build, CompileMode::Test, @@ -240,16 +243,16 @@ fn clean_specs( // Remove split-debuginfo files generated by rustc. let paths = [ - (path_dash.as_str(), ".d"), - (path_dot.as_str(), ".o"), - (path_dot.as_str(), ".dwo"), - (path_dot.as_str(), ".dwp"), + (path_dash.clone(), ".d"), + (path_dot.clone(), ".o"), + (path_dot.clone(), ".dwo"), + (path_dot.clone(), ".dwp"), ]; if !dir_glob_str.ends_with(std::path::MAIN_SEPARATOR) { dir_glob_str.push(std::path::MAIN_SEPARATOR); } dir_glob_str.push('*'); - clean_ctx.rm_rf_prefix_list(&dir_glob_str.as_str(), &paths)?; + dirs_to_clean.entry(dir_glob_str).or_default().extend(paths); // TODO: what to do about build_script_build? let dir = escape_glob_path(layout.incremental())?; @@ -260,6 +263,9 @@ fn clean_specs( } } + for (dir, paths) in dirs_to_clean { + clean_ctx.rm_rf_prefix_list(&dir, &paths)?; + } Ok(()) } @@ -332,17 +338,16 @@ impl<'gctx> CleanContext<'gctx> { fn rm_rf_prefix_list( &mut self, pattern: &str, - path_matchers: &[(&str, &str)], + path_matchers: &HashSet<(Rc, &str)>, ) -> CargoResult<()> { // TODO: Display utf8 warning to user? Or switch to globset? for path in glob::glob(pattern)? { let path = path?; let filename = path.file_name().and_then(|name| name.to_str()).unwrap(); - if path_matchers - .iter() - .any(|(prefix, suffix)| filename.starts_with(prefix) && filename.ends_with(suffix)) - { + if path_matchers.iter().any(|(prefix, suffix)| { + filename.starts_with(&**prefix) && filename.ends_with(suffix) + }) { self.rm_rf(&path)?; } } From d33ef8fbe7ce9ff8ffd53ad02dc16e82368982d8 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Tue, 30 Apr 2024 00:10:42 +0200 Subject: [PATCH 4/8] Treat calls to rm_rf_prefix_list as idempotent Starting with this commit we deduplicate calls to rm_rf_prefix_list by crate name and not by directory; this can lead to more calls to rm_rf_prefix_list (especially in presence of multiple -p arguments), but it is also more transparent in terms of progress reporting (we're just storing away whether a given directory + glob pair has already been removed) --- src/cargo/ops/cargo_clean.rs | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/src/cargo/ops/cargo_clean.rs b/src/cargo/ops/cargo_clean.rs index a3e8a9b37ab..cbf84f2849c 100644 --- a/src/cargo/ops/cargo_clean.rs +++ b/src/cargo/ops/cargo_clean.rs @@ -170,7 +170,8 @@ fn clean_specs( let packages = pkg_set.get_many(pkg_ids)?; clean_ctx.progress = Box::new(CleaningPackagesBar::new(clean_ctx.gctx, packages.len())); - let mut dirs_to_clean: HashMap<_, HashSet<_>> = HashMap::new(); + + let mut cleaned_packages: HashMap<_, HashSet<_>> = HashMap::default(); for pkg in packages { let pkg_dir = format!("{}-*", pkg.name()); clean_ctx.progress.on_cleaning_package(&pkg.name())?; @@ -194,10 +195,9 @@ fn clean_specs( } continue; } - let crate_name = target.crate_name(); + let crate_name: Rc = target.crate_name().into(); let path_dot: Rc = format!("{crate_name}.").into(); let path_dash: Rc = format!("{crate_name}-").into(); - for &mode in &[ CompileMode::Build, CompileMode::Test, @@ -242,17 +242,24 @@ fn clean_specs( clean_ctx.rm_rf(&unhashed_dep_info)?; // Remove split-debuginfo files generated by rustc. - let paths = [ - (path_dash.clone(), ".d"), - (path_dot.clone(), ".o"), - (path_dot.clone(), ".dwo"), - (path_dot.clone(), ".dwp"), - ]; if !dir_glob_str.ends_with(std::path::MAIN_SEPARATOR) { dir_glob_str.push(std::path::MAIN_SEPARATOR); } dir_glob_str.push('*'); - dirs_to_clean.entry(dir_glob_str).or_default().extend(paths); + let dir_glob_str: Rc = dir_glob_str.into(); + if cleaned_packages + .entry(dir_glob_str.clone()) + .or_default() + .insert(crate_name.clone()) + { + let paths = [ + (path_dash.clone(), ".d"), + (path_dot.clone(), ".o"), + (path_dot.clone(), ".dwo"), + (path_dot.clone(), ".dwp"), + ]; + clean_ctx.rm_rf_prefix_list(&dir_glob_str, &paths)?; + } // TODO: what to do about build_script_build? let dir = escape_glob_path(layout.incremental())?; @@ -263,9 +270,6 @@ fn clean_specs( } } - for (dir, paths) in dirs_to_clean { - clean_ctx.rm_rf_prefix_list(&dir, &paths)?; - } Ok(()) } @@ -338,7 +342,7 @@ impl<'gctx> CleanContext<'gctx> { fn rm_rf_prefix_list( &mut self, pattern: &str, - path_matchers: &HashSet<(Rc, &str)>, + path_matchers: &[(Rc, &str)], ) -> CargoResult<()> { // TODO: Display utf8 warning to user? Or switch to globset? From 634dca4dddba6150dcf59277ef1372da301d333c Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Tue, 30 Apr 2024 00:44:17 +0200 Subject: [PATCH 5/8] Clean up commentary for new code --- src/cargo/ops/cargo_clean.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/cargo/ops/cargo_clean.rs b/src/cargo/ops/cargo_clean.rs index cbf84f2849c..3cea031a460 100644 --- a/src/cargo/ops/cargo_clean.rs +++ b/src/cargo/ops/cargo_clean.rs @@ -171,6 +171,8 @@ fn clean_specs( clean_ctx.progress = Box::new(CleaningPackagesBar::new(clean_ctx.gctx, packages.len())); + // Try to reduce the amount of times we iterate over the same target directory by storing away + // the directories we've iterated over (and cleaned for a given package). let mut cleaned_packages: HashMap<_, HashSet<_>> = HashMap::default(); for pkg in packages { let pkg_dir = format!("{}-*", pkg.name()); @@ -236,11 +238,8 @@ fn clean_specs( clean_ctx.rm_rf(&dep_info)?; } } - // Remove dep-info file generated by rustc. It is not tracked in - // file_types. It does not have a prefix. let unhashed_dep_info = dir.join(format!("{}.d", crate_name)); clean_ctx.rm_rf(&unhashed_dep_info)?; - // Remove split-debuginfo files generated by rustc. if !dir_glob_str.ends_with(std::path::MAIN_SEPARATOR) { dir_glob_str.push(std::path::MAIN_SEPARATOR); @@ -253,7 +252,10 @@ fn clean_specs( .insert(crate_name.clone()) { let paths = [ + // Remove dep-info file generated by rustc. It is not tracked in + // file_types. It does not have a prefix. (path_dash.clone(), ".d"), + // Remove split-debuginfo files generated by rustc. (path_dot.clone(), ".o"), (path_dot.clone(), ".dwo"), (path_dot.clone(), ".dwp"), @@ -339,13 +341,14 @@ impl<'gctx> CleanContext<'gctx> { Ok(()) } + /// Iterates over files matching a glob (`pattern`), removing any files whose filenames start and end with provided prefix/suffix pair. + /// Compared to multiple separate calls to [`Self::rm_rf_glob`], this method iterates over the directory just once, which is why + /// it may be preferable for working with multiple prefix/suffix pairs. fn rm_rf_prefix_list( &mut self, pattern: &str, path_matchers: &[(Rc, &str)], ) -> CargoResult<()> { - // TODO: Display utf8 warning to user? Or switch to globset? - for path in glob::glob(pattern)? { let path = path?; let filename = path.file_name().and_then(|name| name.to_str()).unwrap(); From a49a2b9584e85d98171b69c66a9eba5b94a59921 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Thu, 2 May 2024 12:29:28 +0200 Subject: [PATCH 6/8] Update src/cargo/ops/cargo_clean.rs Change Rc to &str Co-authored-by: Weihang Lo --- src/cargo/ops/cargo_clean.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cargo/ops/cargo_clean.rs b/src/cargo/ops/cargo_clean.rs index 3cea031a460..9adaa24581b 100644 --- a/src/cargo/ops/cargo_clean.rs +++ b/src/cargo/ops/cargo_clean.rs @@ -198,8 +198,8 @@ fn clean_specs( continue; } let crate_name: Rc = target.crate_name().into(); - let path_dot: Rc = format!("{crate_name}.").into(); - let path_dash: Rc = format!("{crate_name}-").into(); + let path_dot: &str = &format!("{crate_name}."); + let path_dash: &str = &format!("{crate_name}-"); for &mode in &[ CompileMode::Build, CompileMode::Test, From 574d086cb3597ad696bb7c6b0f5e3200d81cc109 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Thu, 2 May 2024 12:30:38 +0200 Subject: [PATCH 7/8] fixup! Update src/cargo/ops/cargo_clean.rs --- src/cargo/ops/cargo_clean.rs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/cargo/ops/cargo_clean.rs b/src/cargo/ops/cargo_clean.rs index 9adaa24581b..b2ea533fbb8 100644 --- a/src/cargo/ops/cargo_clean.rs +++ b/src/cargo/ops/cargo_clean.rs @@ -254,11 +254,11 @@ fn clean_specs( let paths = [ // Remove dep-info file generated by rustc. It is not tracked in // file_types. It does not have a prefix. - (path_dash.clone(), ".d"), + (path_dash, ".d"), // Remove split-debuginfo files generated by rustc. - (path_dot.clone(), ".o"), - (path_dot.clone(), ".dwo"), - (path_dot.clone(), ".dwp"), + (path_dot, ".o"), + (path_dot, ".dwo"), + (path_dot, ".dwp"), ]; clean_ctx.rm_rf_prefix_list(&dir_glob_str, &paths)?; } @@ -347,14 +347,15 @@ impl<'gctx> CleanContext<'gctx> { fn rm_rf_prefix_list( &mut self, pattern: &str, - path_matchers: &[(Rc, &str)], + path_matchers: &[(&str, &str)], ) -> CargoResult<()> { for path in glob::glob(pattern)? { let path = path?; let filename = path.file_name().and_then(|name| name.to_str()).unwrap(); - if path_matchers.iter().any(|(prefix, suffix)| { - filename.starts_with(&**prefix) && filename.ends_with(suffix) - }) { + if path_matchers + .iter() + .any(|(prefix, suffix)| filename.starts_with(prefix) && filename.ends_with(suffix)) + { self.rm_rf(&path)?; } } From 9c1139ec72b7fbc9bfc8678a7c50276a533c7765 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Thu, 2 May 2024 12:37:25 +0200 Subject: [PATCH 8/8] feedback: rewrite doc comment for rm_rf_prefix_list --- src/cargo/ops/cargo_clean.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/cargo/ops/cargo_clean.rs b/src/cargo/ops/cargo_clean.rs index b2ea533fbb8..5969bf287eb 100644 --- a/src/cargo/ops/cargo_clean.rs +++ b/src/cargo/ops/cargo_clean.rs @@ -341,9 +341,12 @@ impl<'gctx> CleanContext<'gctx> { Ok(()) } - /// Iterates over files matching a glob (`pattern`), removing any files whose filenames start and end with provided prefix/suffix pair. - /// Compared to multiple separate calls to [`Self::rm_rf_glob`], this method iterates over the directory just once, which is why - /// it may be preferable for working with multiple prefix/suffix pairs. + /// Removes files matching a glob and any of the provided filename patterns (prefix/suffix pairs). + /// + /// This function iterates over files matching a glob (`pattern`) and removes those whose + /// filenames start and end with specific prefix/suffix pairs. It should be more efficient for + /// operations involving multiple prefix/suffix pairs, as it iterates over the directory + /// only once, unlike making multiple calls to [`Self::rm_rf_glob`]. fn rm_rf_prefix_list( &mut self, pattern: &str,