diff --git a/Cargo.lock b/Cargo.lock index 13f37e989b0..ab580991d16 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3102,6 +3102,7 @@ dependencies = [ "data-encoding-macro", "dns-lookup", "dunce", + "glob", "itertools", "libc", "nix", diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index e13382147aa..3187f4da204 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -37,6 +37,7 @@ use uucore::display::{print_verbatim, Quotable}; use uucore::error::FromIo; use uucore::error::{UError, UResult}; use uucore::format_usage; +use uucore::parse_glob; use uucore::parse_size::{parse_size, ParseSizeError}; use uucore::InvalidEncodingHandling; #[cfg(windows)] @@ -488,55 +489,28 @@ fn file_as_vec(filename: impl AsRef) -> Vec { // Given the --exclude-from and/or --exclude arguments, returns the globset lists // to ignore the files -fn get_glob_ignore(matches: &ArgMatches) -> UResult> { - let mut excludes_from = if matches.contains_id(options::EXCLUDE_FROM) { - match matches.values_of(options::EXCLUDE_FROM) { - Some(all_files) => { - let mut exclusion = Vec::::new(); - // Read the exclude lists from all the files - // and add them into a vector of string - let files: Vec = all_files.clone().map(|v| v.to_owned()).collect(); - for f in files { - exclusion.extend(file_as_vec(&f)); - } - exclusion - } - None => Vec::::new(), +fn build_exclude_patterns(matches: &ArgMatches) -> UResult> { + let exclude_from_iterator = matches + .values_of(options::EXCLUDE_FROM) + .unwrap_or_default() + .flat_map(|f| file_as_vec(&f)); + + let excludes_iterator = matches + .values_of(options::EXCLUDE) + .unwrap_or_default() + .map(|v| v.to_owned()); + + let mut exclude_patterns = Vec::new(); + for f in excludes_iterator.chain(exclude_from_iterator) { + if matches.is_present(options::VERBOSE) { + println!("adding {:?} to the exclude list ", &f); } - } else { - Vec::::new() - }; - - let mut excludes = if matches.contains_id(options::EXCLUDE) { - match matches.values_of(options::EXCLUDE) { - Some(v) => { - // Read the various arguments - v.clone().map(|v| v.to_owned()).collect() - } - None => Vec::::new(), + match parse_glob::from_str(&f) { + Ok(glob) => exclude_patterns.push(glob), + Err(err) => return Err(DuError::InvalidGlob(err.to_string()).into()), } - } else { - Vec::::new() - }; - - // Merge the two lines - excludes.append(&mut excludes_from); - if !&excludes.is_empty() { - let mut builder = Vec::new(); - // Create the `Vec` of excludes - for f in excludes { - if matches.contains_id(options::VERBOSE) { - println!("adding {:?} to the exclude list ", &f); - } - match Pattern::new(&f) { - Ok(glob) => builder.push(glob), - Err(err) => return Err(DuError::InvalidGlob(err.to_string()).into()), - }; - } - Ok(builder) - } else { - Ok(Vec::new()) } + Ok(exclude_patterns) } #[uucore::main] @@ -615,85 +589,84 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { "\n" }; - let excludes = get_glob_ignore(&matches)?; + let excludes = build_exclude_patterns(&matches)?; let mut grand_total = 0; 'loop_file: for path_string in files { // Skip if we don't want to ignore anything if !&excludes.is_empty() { for pattern in &excludes { - { - if pattern.matches(path_string) { - // if the directory is ignored, leave early - if options.verbose { - println!("{} ignored", path_string.quote()); - } - continue 'loop_file; + if pattern.matches(path_string) { + // if the directory is ignored, leave early + if options.verbose { + println!("{} ignored", path_string.quote()); } + continue 'loop_file; } } } let path = PathBuf::from(&path_string); - match Stat::new(path, &options) { - Ok(stat) => { - let mut inodes: HashSet = HashSet::new(); - if let Some(inode) = stat.inode { - inodes.insert(inode); + // Check existence of path provided in argument + if let Ok(stat) = Stat::new(path, &options) { + // Kick off the computation of disk usage from the initial path + let mut inodes: HashSet = HashSet::new(); + if let Some(inode) = stat.inode { + inodes.insert(inode); + } + let iter = du(stat, &options, 0, &mut inodes, &excludes); + + // Sum up all the returned `Stat`s and display results + let (_, len) = iter.size_hint(); + let len = len.unwrap(); + for (index, stat) in iter.enumerate() { + let size = choose_size(&matches, &stat); + + if threshold.map_or(false, |threshold| threshold.should_exclude(size)) { + continue; } - let iter = du(stat, &options, 0, &mut inodes, &excludes); - let (_, len) = iter.size_hint(); - let len = len.unwrap(); - for (index, stat) in iter.enumerate() { - let size = choose_size(&matches, &stat); - - if threshold.map_or(false, |threshold| threshold.should_exclude(size)) { - continue; - } - if matches.contains_id(options::TIME) { - let tm = { - let secs = { - match matches.value_of(options::TIME) { - Some(s) => match s { - "ctime" | "status" => stat.modified, - "access" | "atime" | "use" => stat.accessed, - "birth" | "creation" => stat - .created - .ok_or_else(|| DuError::InvalidTimeArg(s.into()))?, - // below should never happen as clap already restricts the values. - _ => unreachable!("Invalid field for --time"), - }, - None => stat.modified, - } - }; - DateTime::::from(UNIX_EPOCH + Duration::from_secs(secs)) + if matches.is_present(options::TIME) { + let tm = { + let secs = { + match matches.value_of(options::TIME) { + Some(s) => match s { + "ctime" | "status" => stat.modified, + "access" | "atime" | "use" => stat.accessed, + "birth" | "creation" => stat + .created + .ok_or_else(|| DuError::InvalidTimeArg(s.into()))?, + // below should never happen as clap already restricts the values. + _ => unreachable!("Invalid field for --time"), + }, + None => stat.modified, + } }; - if !summarize || index == len - 1 { - let time_str = tm.format(time_format_str).to_string(); - print!("{}\t{}\t", convert_size(size), time_str); - print_verbatim(stat.path).unwrap(); - print!("{}", line_separator); - } - } else if !summarize || index == len - 1 { - print!("{}\t", convert_size(size)); + DateTime::::from(UNIX_EPOCH + Duration::from_secs(secs)) + }; + if !summarize || index == len - 1 { + let time_str = tm.format(time_format_str).to_string(); + print!("{}\t{}\t", convert_size(size), time_str); print_verbatim(stat.path).unwrap(); print!("{}", line_separator); } - if options.total && index == (len - 1) { - // The last element will be the total size of the the path under - // path_string. We add it to the grand total. - grand_total += size; - } + } else if !summarize || index == len - 1 { + print!("{}\t", convert_size(size)); + print_verbatim(stat.path).unwrap(); + print!("{}", line_separator); + } + if options.total && index == (len - 1) { + // The last element will be the total size of the the path under + // path_string. We add it to the grand total. + grand_total += size; } } - Err(_) => { - show_error!( - "{}: {}", - path_string.maybe_quote(), - "No such file or directory" - ); - } + } else { + show_error!( + "{}: {}", + path_string.maybe_quote(), + "No such file or directory" + ); } } diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index 55a8133a4c9..45c750739b3 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -23,6 +23,7 @@ clap = "3.2" dns-lookup = { version="1.0.5", optional=true } dunce = "1.0.0" wild = "2.0" +glob = "0.3.0" # * optional itertools = { version="0.10.0", optional=true } thiserror = { version="1.0", optional=true } diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index 1c405ce98de..d8860cfdab7 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -29,6 +29,7 @@ pub use crate::mods::ranges; pub use crate::mods::version_cmp; // * string parsing modules +pub use crate::parser::parse_glob; pub use crate::parser::parse_size; pub use crate::parser::parse_time; diff --git a/src/uucore/src/lib/parser.rs b/src/uucore/src/lib/parser.rs index d09777e109a..8eae16bbf28 100644 --- a/src/uucore/src/lib/parser.rs +++ b/src/uucore/src/lib/parser.rs @@ -1,2 +1,3 @@ +pub mod parse_glob; pub mod parse_size; pub mod parse_time; diff --git a/src/uucore/src/lib/parser/parse_glob.rs b/src/uucore/src/lib/parser/parse_glob.rs new file mode 100644 index 00000000000..8605f7450e2 --- /dev/null +++ b/src/uucore/src/lib/parser/parse_glob.rs @@ -0,0 +1,109 @@ +//! Parsing a glob Pattern from a string. +//! +//! Use the [`from_str`] function to parse a [`Pattern`] from a string. + +// cSpell:words fnmatch + +use glob::{Pattern, PatternError}; + +fn fix_negation(glob: &str) -> String { + let mut chars = glob.chars().collect::>(); + + let mut i = 0; + while i < chars.len() { + if chars[i] == '[' && i + 4 <= glob.len() && chars[i + 1] == '^' { + match chars[i + 3..].iter().position(|x| *x == ']') { + None => (), + Some(j) => { + chars[i + 1] = '!'; + i += j + 4; + continue; + } + } + } + + i += 1; + } + + chars.into_iter().collect::() +} + +/// Parse a glob Pattern from a string. +/// +/// This function amends the input string to replace any caret or circumflex +/// character (^) used to negate a set of characters with an exclamation mark +/// (!), which adapts rust's glob matching to function the way the GNU utils' +/// fnmatch does. +/// +/// # Examples +/// +/// ```rust +/// use std::time::Duration; +/// use uucore::parse_glob::from_str; +/// assert!(!from_str("[^abc]").unwrap().matches("a")); +/// assert!(from_str("[^abc]").unwrap().matches("x")); +/// ``` +pub fn from_str(glob: &str) -> Result { + Pattern::new(&fix_negation(glob)) +} + +#[cfg(test)] +mod tests { + + use super::*; + + #[test] + fn test_from_str() { + assert_eq!(from_str("[^abc]").unwrap(), Pattern::new("[!abc]").unwrap()); + } + + #[test] + fn test_fix_negation() { + // Happy/Simple case + assert_eq!(fix_negation("[^abc]"), "[!abc]"); + + // Should fix negations in a long regex + assert_eq!(fix_negation("foo[abc] bar[^def]"), "foo[abc] bar[!def]"); + + // Should fix multiple negations in a regex + assert_eq!(fix_negation("foo[^abc]bar[^def]"), "foo[!abc]bar[!def]"); + + // Should fix negation of the single character ] + assert_eq!(fix_negation("[^]]"), "[!]]"); + + // Should fix negation of the single character ^ + assert_eq!(fix_negation("[^^]"), "[!^]"); + + // Should fix negation of the space character + assert_eq!(fix_negation("[^ ]"), "[! ]"); + + // Complicated patterns + assert_eq!(fix_negation("[^][]"), "[!][]"); + assert_eq!(fix_negation("[^[]]"), "[![]]"); + + // More complex patterns that should be replaced + assert_eq!(fix_negation("[[]] [^a]"), "[[]] [!a]"); + assert_eq!(fix_negation("[[] [^a]"), "[[] [!a]"); + assert_eq!(fix_negation("[]] [^a]"), "[]] [!a]"); + } + + #[test] + fn test_fix_negation_should_not_amend() { + assert_eq!(fix_negation("abc"), "abc"); + + // Regex specifically matches either [ or ^ + assert_eq!(fix_negation("[[^]"), "[[^]"); + + // Regex that specifically matches either space or ^ + assert_eq!(fix_negation("[ ^]"), "[ ^]"); + + // Regex that specifically matches either [, space or ^ + assert_eq!(fix_negation("[[ ^]"), "[[ ^]"); + assert_eq!(fix_negation("[ [^]"), "[ [^]"); + + // Invalid globs (according to rust's glob implementation) will remain unamended + assert_eq!(fix_negation("[^]"), "[^]"); + assert_eq!(fix_negation("[^"), "[^"); + assert_eq!(fix_negation("[][^]"), "[][^]"); + } +} diff --git a/tests/by-util/test_du.rs b/tests/by-util/test_du.rs index b421d5e84d2..4eea79d4597 100644 --- a/tests/by-util/test_du.rs +++ b/tests/by-util/test_du.rs @@ -747,6 +747,40 @@ fn test_du_exclude_mix() { assert!(result.stdout_str().contains("xcwww")); } +#[test] +// Disable on Windows because we are looking for / +// And the tests would be more complex if we have to support \ too +#[cfg(not(target_os = "windows"))] +fn test_du_complex_exclude_patterns() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + at.mkdir_all("azerty/xcwww/azeaze"); + at.mkdir_all("azerty/xcwww/qzerty"); + at.mkdir_all("azerty/xcwww/amazing"); + + // Negation in glob should work with both ^ and ! + let result = ts + .ucmd() + .arg("--exclude=azerty/*/[^q]*") + .arg("azerty") + .succeeds(); + assert!(!result.stdout_str().contains("amazing")); + assert!(result.stdout_str().contains("qzerty")); + assert!(!result.stdout_str().contains("azeaze")); + assert!(result.stdout_str().contains("xcwww")); + + let result = ts + .ucmd() + .arg("--exclude=azerty/*/[!q]*") + .arg("azerty") + .succeeds(); + assert!(!result.stdout_str().contains("amazing")); + assert!(result.stdout_str().contains("qzerty")); + assert!(!result.stdout_str().contains("azeaze")); + assert!(result.stdout_str().contains("xcwww")); +} + #[test] fn test_du_exclude_several_components() { let ts = TestScenario::new(util_name!());