Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add parse_glob module and update du to use parse_glob #3754

Merged
merged 3 commits into from
Aug 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

183 changes: 78 additions & 105 deletions src/uu/du/src/du.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ use uucore::display::{print_verbatim, Quotable};
use uucore::error::FromIo;
use uucore::error::{UError, UResult};
use uucore::format_usage;
use uucore::parse_glob;
use uucore::parse_size::{parse_size, ParseSizeError};
use uucore::InvalidEncodingHandling;
#[cfg(windows)]
Expand Down Expand Up @@ -488,55 +489,28 @@ fn file_as_vec(filename: impl AsRef<Path>) -> Vec<String> {

// Given the --exclude-from and/or --exclude arguments, returns the globset lists
// to ignore the files
fn get_glob_ignore(matches: &ArgMatches) -> UResult<Vec<Pattern>> {
let mut excludes_from = if matches.contains_id(options::EXCLUDE_FROM) {
match matches.values_of(options::EXCLUDE_FROM) {
Some(all_files) => {
let mut exclusion = Vec::<String>::new();
// Read the exclude lists from all the files
// and add them into a vector of string
let files: Vec<String> = all_files.clone().map(|v| v.to_owned()).collect();
for f in files {
exclusion.extend(file_as_vec(&f));
}
exclusion
}
None => Vec::<String>::new(),
fn build_exclude_patterns(matches: &ArgMatches) -> UResult<Vec<Pattern>> {
let exclude_from_iterator = matches
.values_of(options::EXCLUDE_FROM)
.unwrap_or_default()
.flat_map(|f| file_as_vec(&f));

let excludes_iterator = matches
.values_of(options::EXCLUDE)
.unwrap_or_default()
.map(|v| v.to_owned());

let mut exclude_patterns = Vec::new();
for f in excludes_iterator.chain(exclude_from_iterator) {
if matches.is_present(options::VERBOSE) {
println!("adding {:?} to the exclude list ", &f);
}
} else {
Vec::<String>::new()
};

let mut excludes = if matches.contains_id(options::EXCLUDE) {
match matches.values_of(options::EXCLUDE) {
Some(v) => {
// Read the various arguments
v.clone().map(|v| v.to_owned()).collect()
}
None => Vec::<String>::new(),
match parse_glob::from_str(&f) {
Ok(glob) => exclude_patterns.push(glob),
Err(err) => return Err(DuError::InvalidGlob(err.to_string()).into()),
}
} else {
Vec::<String>::new()
};

// Merge the two lines
excludes.append(&mut excludes_from);
if !&excludes.is_empty() {
let mut builder = Vec::new();
// Create the `Vec` of excludes
for f in excludes {
if matches.contains_id(options::VERBOSE) {
println!("adding {:?} to the exclude list ", &f);
}
match Pattern::new(&f) {
Ok(glob) => builder.push(glob),
Err(err) => return Err(DuError::InvalidGlob(err.to_string()).into()),
};
}
Ok(builder)
} else {
Ok(Vec::new())
}
Ok(exclude_patterns)
}

#[uucore::main]
Expand Down Expand Up @@ -615,85 +589,84 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
"\n"
};

let excludes = get_glob_ignore(&matches)?;
let excludes = build_exclude_patterns(&matches)?;

let mut grand_total = 0;
'loop_file: for path_string in files {
// Skip if we don't want to ignore anything
if !&excludes.is_empty() {
for pattern in &excludes {
{
if pattern.matches(path_string) {
// if the directory is ignored, leave early
if options.verbose {
println!("{} ignored", path_string.quote());
}
continue 'loop_file;
if pattern.matches(path_string) {
// if the directory is ignored, leave early
if options.verbose {
println!("{} ignored", path_string.quote());
}
continue 'loop_file;
}
}
}

let path = PathBuf::from(&path_string);
match Stat::new(path, &options) {
Ok(stat) => {
let mut inodes: HashSet<FileInfo> = HashSet::new();
if let Some(inode) = stat.inode {
inodes.insert(inode);
// Check existence of path provided in argument
if let Ok(stat) = Stat::new(path, &options) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please add a comment explaining what this block is doing? It isn't super obvious reading the code

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please see newest changes!

// Kick off the computation of disk usage from the initial path
let mut inodes: HashSet<FileInfo> = HashSet::new();
if let Some(inode) = stat.inode {
inodes.insert(inode);
}
let iter = du(stat, &options, 0, &mut inodes, &excludes);

// Sum up all the returned `Stat`s and display results
let (_, len) = iter.size_hint();
let len = len.unwrap();
for (index, stat) in iter.enumerate() {
let size = choose_size(&matches, &stat);

if threshold.map_or(false, |threshold| threshold.should_exclude(size)) {
continue;
}
let iter = du(stat, &options, 0, &mut inodes, &excludes);
let (_, len) = iter.size_hint();
let len = len.unwrap();
for (index, stat) in iter.enumerate() {
let size = choose_size(&matches, &stat);

if threshold.map_or(false, |threshold| threshold.should_exclude(size)) {
continue;
}

if matches.contains_id(options::TIME) {
let tm = {
let secs = {
match matches.value_of(options::TIME) {
Some(s) => match s {
"ctime" | "status" => stat.modified,
"access" | "atime" | "use" => stat.accessed,
"birth" | "creation" => stat
.created
.ok_or_else(|| DuError::InvalidTimeArg(s.into()))?,
// below should never happen as clap already restricts the values.
_ => unreachable!("Invalid field for --time"),
},
None => stat.modified,
}
};
DateTime::<Local>::from(UNIX_EPOCH + Duration::from_secs(secs))
if matches.is_present(options::TIME) {
let tm = {
let secs = {
match matches.value_of(options::TIME) {
Some(s) => match s {
"ctime" | "status" => stat.modified,
"access" | "atime" | "use" => stat.accessed,
"birth" | "creation" => stat
.created
.ok_or_else(|| DuError::InvalidTimeArg(s.into()))?,
// below should never happen as clap already restricts the values.
_ => unreachable!("Invalid field for --time"),
},
None => stat.modified,
}
};
if !summarize || index == len - 1 {
let time_str = tm.format(time_format_str).to_string();
print!("{}\t{}\t", convert_size(size), time_str);
print_verbatim(stat.path).unwrap();
print!("{}", line_separator);
}
} else if !summarize || index == len - 1 {
print!("{}\t", convert_size(size));
DateTime::<Local>::from(UNIX_EPOCH + Duration::from_secs(secs))
};
if !summarize || index == len - 1 {
let time_str = tm.format(time_format_str).to_string();
print!("{}\t{}\t", convert_size(size), time_str);
print_verbatim(stat.path).unwrap();
print!("{}", line_separator);
}
if options.total && index == (len - 1) {
// The last element will be the total size of the the path under
// path_string. We add it to the grand total.
grand_total += size;
}
} else if !summarize || index == len - 1 {
print!("{}\t", convert_size(size));
print_verbatim(stat.path).unwrap();
print!("{}", line_separator);
}
if options.total && index == (len - 1) {
// The last element will be the total size of the the path under
// path_string. We add it to the grand total.
grand_total += size;
}
}
Err(_) => {
show_error!(
"{}: {}",
path_string.maybe_quote(),
"No such file or directory"
);
}
} else {
show_error!(
"{}: {}",
path_string.maybe_quote(),
"No such file or directory"
);
}
}

Expand Down
1 change: 1 addition & 0 deletions src/uucore/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ clap = "3.2"
dns-lookup = { version="1.0.5", optional=true }
dunce = "1.0.0"
wild = "2.0"
glob = "0.3.0"
# * optional
itertools = { version="0.10.0", optional=true }
thiserror = { version="1.0", optional=true }
Expand Down
1 change: 1 addition & 0 deletions src/uucore/src/lib/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ pub use crate::mods::ranges;
pub use crate::mods::version_cmp;

// * string parsing modules
pub use crate::parser::parse_glob;
pub use crate::parser::parse_size;
pub use crate::parser::parse_time;

Expand Down
1 change: 1 addition & 0 deletions src/uucore/src/lib/parser.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pub mod parse_glob;
pub mod parse_size;
pub mod parse_time;
109 changes: 109 additions & 0 deletions src/uucore/src/lib/parser/parse_glob.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
//! Parsing a glob Pattern from a string.
//!
//! Use the [`from_str`] function to parse a [`Pattern`] from a string.

// cSpell:words fnmatch

use glob::{Pattern, PatternError};

fn fix_negation(glob: &str) -> String {
let mut chars = glob.chars().collect::<Vec<_>>();

let mut i = 0;
while i < chars.len() {
if chars[i] == '[' && i + 4 <= glob.len() && chars[i + 1] == '^' {
match chars[i + 3..].iter().position(|x| *x == ']') {
None => (),
Some(j) => {
chars[i + 1] = '!';
i += j + 4;
continue;
}
}
}

i += 1;
}

chars.into_iter().collect::<String>()
}

/// Parse a glob Pattern from a string.
///
/// This function amends the input string to replace any caret or circumflex
/// character (^) used to negate a set of characters with an exclamation mark
/// (!), which adapts rust's glob matching to function the way the GNU utils'
/// fnmatch does.
///
/// # Examples
///
/// ```rust
/// use std::time::Duration;
/// use uucore::parse_glob::from_str;
/// assert!(!from_str("[^abc]").unwrap().matches("a"));
/// assert!(from_str("[^abc]").unwrap().matches("x"));
/// ```
pub fn from_str(glob: &str) -> Result<Pattern, PatternError> {
Pattern::new(&fix_negation(glob))
}

#[cfg(test)]
mod tests {

use super::*;

#[test]
fn test_from_str() {
assert_eq!(from_str("[^abc]").unwrap(), Pattern::new("[!abc]").unwrap());
}

#[test]
fn test_fix_negation() {
// Happy/Simple case
assert_eq!(fix_negation("[^abc]"), "[!abc]");

// Should fix negations in a long regex
assert_eq!(fix_negation("foo[abc] bar[^def]"), "foo[abc] bar[!def]");

// Should fix multiple negations in a regex
assert_eq!(fix_negation("foo[^abc]bar[^def]"), "foo[!abc]bar[!def]");

// Should fix negation of the single character ]
assert_eq!(fix_negation("[^]]"), "[!]]");

// Should fix negation of the single character ^
assert_eq!(fix_negation("[^^]"), "[!^]");

// Should fix negation of the space character
assert_eq!(fix_negation("[^ ]"), "[! ]");

// Complicated patterns
assert_eq!(fix_negation("[^][]"), "[!][]");
assert_eq!(fix_negation("[^[]]"), "[![]]");

// More complex patterns that should be replaced
assert_eq!(fix_negation("[[]] [^a]"), "[[]] [!a]");
assert_eq!(fix_negation("[[] [^a]"), "[[] [!a]");
assert_eq!(fix_negation("[]] [^a]"), "[]] [!a]");
}

#[test]
fn test_fix_negation_should_not_amend() {
assert_eq!(fix_negation("abc"), "abc");

// Regex specifically matches either [ or ^
assert_eq!(fix_negation("[[^]"), "[[^]");

// Regex that specifically matches either space or ^
assert_eq!(fix_negation("[ ^]"), "[ ^]");

// Regex that specifically matches either [, space or ^
assert_eq!(fix_negation("[[ ^]"), "[[ ^]");
assert_eq!(fix_negation("[ [^]"), "[ [^]");

// Invalid globs (according to rust's glob implementation) will remain unamended
assert_eq!(fix_negation("[^]"), "[^]");
assert_eq!(fix_negation("[^"), "[^");
assert_eq!(fix_negation("[][^]"), "[][^]");
}
}
Loading