Skip to content

Commit

Permalink
feat: support Dust tree by age
Browse files Browse the repository at this point in the history
  • Loading branch information
wugeer authored and bootandy committed Aug 7, 2024
1 parent 733117d commit f48fcc7
Show file tree
Hide file tree
Showing 14 changed files with 269 additions and 24 deletions.
2 changes: 2 additions & 0 deletions completions/_dust
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ _dust() {
'-y+[just like -mtime, but based on file change time]: : ' \
'--ctime=[just like -mtime, but based on file change time]: : ' \
'--files0-from=[run dust on NUL-terminated file names specified in file; if argument is -, then read names from standard input]: :_files' \
'-m+[Directory '\''size'\'' is max filetime of child files instead of disk size. while a/c/m for last accessed/changed/modified time]: :(a c m)' \
'--filetime=[Directory '\''size'\'' is max filetime of child files instead of disk size. while a/c/m for last accessed/changed/modified time]: :(a c m)' \
'-p[Subdirectories will not have their path shortened]' \
'--full-paths[Subdirectories will not have their path shortened]' \
'-L[dereference sym links - Treat sym links as directories and go into them]' \
Expand Down
2 changes: 2 additions & 0 deletions completions/_dust.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ Register-ArgumentCompleter -Native -CommandName 'dust' -ScriptBlock {
[CompletionResult]::new('-y', 'y', [CompletionResultType]::ParameterName, 'just like -mtime, but based on file change time')
[CompletionResult]::new('--ctime', 'ctime', [CompletionResultType]::ParameterName, 'just like -mtime, but based on file change time')
[CompletionResult]::new('--files0-from', 'files0-from', [CompletionResultType]::ParameterName, 'run dust on NUL-terminated file names specified in file; if argument is -, then read names from standard input')
[CompletionResult]::new('-m', 'm', [CompletionResultType]::ParameterName, 'Directory ''size'' is max filetime of child files instead of disk size. while a/c/m for last accessed/changed/modified time')
[CompletionResult]::new('--filetime', 'filetime', [CompletionResultType]::ParameterName, 'Directory ''size'' is max filetime of child files instead of disk size. while a/c/m for last accessed/changed/modified time')
[CompletionResult]::new('-p', 'p', [CompletionResultType]::ParameterName, 'Subdirectories will not have their path shortened')
[CompletionResult]::new('--full-paths', 'full-paths', [CompletionResultType]::ParameterName, 'Subdirectories will not have their path shortened')
[CompletionResult]::new('-L', 'L ', [CompletionResultType]::ParameterName, 'dereference sym links - Treat sym links as directories and go into them')
Expand Down
10 changes: 9 additions & 1 deletion completions/dust.bash
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ _dust() {

case "${cmd}" in
dust)
opts="-d -T -n -p -X -I -L -x -s -r -c -C -b -B -z -R -f -i -v -e -t -w -P -D -F -o -S -j -M -A -y -h -V --depth --threads --number-of-lines --full-paths --ignore-directory --ignore-all-in-file --dereference-links --limit-filesystem --apparent-size --reverse --no-colors --force-colors --no-percent-bars --bars-on-right --min-size --screen-reader --skip-total --filecount --ignore_hidden --invert-filter --filter --file_types --terminal_width --no-progress --print-errors --only-dir --only-file --output-format --stack-size --output-json --mtime --atime --ctime --files0-from --help --version [PATH]..."
opts="-d -T -n -p -X -I -L -x -s -r -c -C -b -B -z -R -f -i -v -e -t -w -P -D -F -o -S -j -M -A -y -m -h -V --depth --threads --number-of-lines --full-paths --ignore-directory --ignore-all-in-file --dereference-links --limit-filesystem --apparent-size --reverse --no-colors --force-colors --no-percent-bars --bars-on-right --min-size --screen-reader --skip-total --filecount --ignore_hidden --invert-filter --filter --file_types --terminal_width --no-progress --print-errors --only-dir --only-file --output-format --stack-size --output-json --mtime --atime --ctime --files0-from --filetime --help --version [PATH]..."
if [[ ${cur} == -* || ${COMP_CWORD} -eq 1 ]] ; then
COMPREPLY=( $(compgen -W "${opts}" -- "${cur}") )
return 0
Expand Down Expand Up @@ -163,6 +163,14 @@ _dust() {
COMPREPLY=($(compgen -f "${cur}"))
return 0
;;
--filetime)
COMPREPLY=($(compgen -W "a c m" -- "${cur}"))
return 0
;;
-m)
COMPREPLY=($(compgen -W "a c m" -- "${cur}"))
return 0
;;
*)
COMPREPLY=()
;;
Expand Down
2 changes: 2 additions & 0 deletions completions/dust.elv
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ set edit:completion:arg-completer[dust] = {|@words|
cand -y 'just like -mtime, but based on file change time'
cand --ctime 'just like -mtime, but based on file change time'
cand --files0-from 'run dust on NUL-terminated file names specified in file; if argument is -, then read names from standard input'
cand -m 'Directory ''size'' is max filetime of child files instead of disk size. while a/c/m for last accessed/changed/modified time'
cand --filetime 'Directory ''size'' is max filetime of child files instead of disk size. while a/c/m for last accessed/changed/modified time'
cand -p 'Subdirectories will not have their path shortened'
cand --full-paths 'Subdirectories will not have their path shortened'
cand -L 'dereference sym links - Treat sym links as directories and go into them'
Expand Down
1 change: 1 addition & 0 deletions completions/dust.fish
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ complete -c dust -s M -l mtime -d '+/-n matches files modified more/less than n
complete -c dust -s A -l atime -d 'just like -mtime, but based on file access time' -r
complete -c dust -s y -l ctime -d 'just like -mtime, but based on file change time' -r
complete -c dust -l files0-from -d 'run dust on NUL-terminated file names specified in file; if argument is -, then read names from standard input' -r -F
complete -c dust -s m -l filetime -d 'Directory \'size\' is max filetime of child files instead of disk size. while a/c/m for last accessed/changed/modified time' -r -f -a "{a '',c '',m ''}"
complete -c dust -s p -l full-paths -d 'Subdirectories will not have their path shortened'
complete -c dust -s L -l dereference-links -d 'dereference sym links - Treat sym links as directories and go into them'
complete -c dust -s x -l limit-filesystem -d 'Only count the files and directories on the same filesystem as the supplied directory'
Expand Down
9 changes: 8 additions & 1 deletion man-page/dust.1
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
.SH NAME
Dust \- Like du but more intuitive
.SH SYNOPSIS
\fBdust\fR [\fB\-d\fR|\fB\-\-depth\fR] [\fB\-T\fR|\fB\-\-threads\fR] [\fB\-n\fR|\fB\-\-number\-of\-lines\fR] [\fB\-p\fR|\fB\-\-full\-paths\fR] [\fB\-X\fR|\fB\-\-ignore\-directory\fR] [\fB\-I\fR|\fB\-\-ignore\-all\-in\-file\fR] [\fB\-L\fR|\fB\-\-dereference\-links\fR] [\fB\-x\fR|\fB\-\-limit\-filesystem\fR] [\fB\-s\fR|\fB\-\-apparent\-size\fR] [\fB\-r\fR|\fB\-\-reverse\fR] [\fB\-c\fR|\fB\-\-no\-colors\fR] [\fB\-C\fR|\fB\-\-force\-colors\fR] [\fB\-b\fR|\fB\-\-no\-percent\-bars\fR] [\fB\-B\fR|\fB\-\-bars\-on\-right\fR] [\fB\-z\fR|\fB\-\-min\-size\fR] [\fB\-R\fR|\fB\-\-screen\-reader\fR] [\fB\-\-skip\-total\fR] [\fB\-f\fR|\fB\-\-filecount\fR] [\fB\-i\fR|\fB\-\-ignore_hidden\fR] [\fB\-v\fR|\fB\-\-invert\-filter\fR] [\fB\-e\fR|\fB\-\-filter\fR] [\fB\-t\fR|\fB\-\-file_types\fR] [\fB\-w\fR|\fB\-\-terminal_width\fR] [\fB\-P\fR|\fB\-\-no\-progress\fR] [\fB\-\-print\-errors\fR] [\fB\-D\fR|\fB\-\-only\-dir\fR] [\fB\-F\fR|\fB\-\-only\-file\fR] [\fB\-o\fR|\fB\-\-output\-format\fR] [\fB\-S\fR|\fB\-\-stack\-size\fR] [\fB\-j\fR|\fB\-\-output\-json\fR] [\fB\-M\fR|\fB\-\-mtime\fR] [\fB\-A\fR|\fB\-\-atime\fR] [\fB\-y\fR|\fB\-\-ctime\fR] [\fB\-\-files0\-from\fR] [\fB\-h\fR|\fB\-\-help\fR] [\fB\-V\fR|\fB\-\-version\fR] [\fIPATH\fR]
\fBdust\fR [\fB\-d\fR|\fB\-\-depth\fR] [\fB\-T\fR|\fB\-\-threads\fR] [\fB\-n\fR|\fB\-\-number\-of\-lines\fR] [\fB\-p\fR|\fB\-\-full\-paths\fR] [\fB\-X\fR|\fB\-\-ignore\-directory\fR] [\fB\-I\fR|\fB\-\-ignore\-all\-in\-file\fR] [\fB\-L\fR|\fB\-\-dereference\-links\fR] [\fB\-x\fR|\fB\-\-limit\-filesystem\fR] [\fB\-s\fR|\fB\-\-apparent\-size\fR] [\fB\-r\fR|\fB\-\-reverse\fR] [\fB\-c\fR|\fB\-\-no\-colors\fR] [\fB\-C\fR|\fB\-\-force\-colors\fR] [\fB\-b\fR|\fB\-\-no\-percent\-bars\fR] [\fB\-B\fR|\fB\-\-bars\-on\-right\fR] [\fB\-z\fR|\fB\-\-min\-size\fR] [\fB\-R\fR|\fB\-\-screen\-reader\fR] [\fB\-\-skip\-total\fR] [\fB\-f\fR|\fB\-\-filecount\fR] [\fB\-i\fR|\fB\-\-ignore_hidden\fR] [\fB\-v\fR|\fB\-\-invert\-filter\fR] [\fB\-e\fR|\fB\-\-filter\fR] [\fB\-t\fR|\fB\-\-file_types\fR] [\fB\-w\fR|\fB\-\-terminal_width\fR] [\fB\-P\fR|\fB\-\-no\-progress\fR] [\fB\-\-print\-errors\fR] [\fB\-D\fR|\fB\-\-only\-dir\fR] [\fB\-F\fR|\fB\-\-only\-file\fR] [\fB\-o\fR|\fB\-\-output\-format\fR] [\fB\-S\fR|\fB\-\-stack\-size\fR] [\fB\-j\fR|\fB\-\-output\-json\fR] [\fB\-M\fR|\fB\-\-mtime\fR] [\fB\-A\fR|\fB\-\-atime\fR] [\fB\-y\fR|\fB\-\-ctime\fR] [\fB\-\-files0\-from\fR] [\fB\-m\fR|\fB\-\-filetime\fR] [\fB\-h\fR|\fB\-\-help\fR] [\fB\-V\fR|\fB\-\-version\fR] [\fIPATH\fR]
.SH DESCRIPTION
Like du but more intuitive
.SH OPTIONS
Expand Down Expand Up @@ -115,6 +115,13 @@ just like \-mtime, but based on file change time
\fB\-\-files0\-from\fR
run dust on NUL\-terminated file names specified in file; if argument is \-, then read names from standard input
.TP
\fB\-m\fR, \fB\-\-filetime\fR
Directory \*(Aqsize\*(Aq is max filetime of child files instead of disk size. while a/c/m for last accessed/changed/modified time
.br

.br
[\fIpossible values: \fRa, c, m]
.TP
\fB\-h\fR, \fB\-\-help\fR
Print help
.TP
Expand Down
12 changes: 12 additions & 0 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -294,4 +294,16 @@ pub fn build_cli() -> Command {
.num_args(1)
.help("run dust on NUL-terminated file names specified in file; if argument is -, then read names from standard input"),
)
.arg(
Arg::new("filetime")
.short('m')
.long("filetime")
.num_args(1)
.value_parser([
PossibleValue::new("a").alias("accessed"),
PossibleValue::new("c").alias("changed"),
PossibleValue::new("m").alias("modified"),
])
.help("Directory 'size' is max filetime of child files instead of disk size. while a/c/m for last accessed/changed/modified time"),
)
}
70 changes: 69 additions & 1 deletion src/config.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::node::FileTime;
use chrono::{Local, TimeZone};
use clap::ArgMatches;
use config_file::FromConfigFile;
Expand Down Expand Up @@ -84,6 +85,20 @@ impl Config {
})
.to_lowercase()
}

pub fn get_filetime(&self, options: &ArgMatches) -> Option<FileTime> {
let out_fmt = options.get_one::<String>("filetime");
match out_fmt {
None => None,
Some(x) => match x.as_str() {
"m" | "modified" => Some(FileTime::Modified),
"a" | "accessed" => Some(FileTime::Accessed),
"c" | "changed" => Some(FileTime::Changed),
_ => unreachable!(),
},
}
}

pub fn get_skip_total(&self, options: &ArgMatches) -> bool {
Some(true) == self.skip_total || options.get_flag("skip_total")
}
Expand Down Expand Up @@ -159,7 +174,7 @@ impl Config {
)
}

pub fn get_created_time_operator(&self, options: &ArgMatches) -> Option<(Operater, i64)> {
pub fn get_changed_time_operator(&self, options: &ArgMatches) -> Option<(Operater, i64)> {
get_filter_time_operator(
options.get_one::<String>("ctime"),
get_current_date_epoch_seconds(),
Expand Down Expand Up @@ -259,6 +274,7 @@ mod tests {
#[allow(unused_imports)]
use super::*;
use chrono::{Datelike, Timelike};
use clap::builder::PossibleValue;
use clap::{value_parser, Arg, ArgMatches, Command};

#[test]
Expand Down Expand Up @@ -338,4 +354,56 @@ mod tests {
)
.get_matches_from(args)
}

#[test]
fn test_get_filetime() {
// No config and no flag.
let c = Config::default();
let args = get_filetime_args(vec!["dust"]);
assert_eq!(c.get_filetime(&args), None);

// Config is not defined and flag is defined as access time
let c = Config::default();
let args = get_filetime_args(vec!["dust", "--filetime", "a"]);
assert_eq!(c.get_filetime(&args), Some(FileTime::Accessed));

let c = Config::default();
let args = get_filetime_args(vec!["dust", "--filetime", "accessed"]);
assert_eq!(c.get_filetime(&args), Some(FileTime::Accessed));

// Config is not defined and flag is defined as modified time
let c = Config::default();
let args = get_filetime_args(vec!["dust", "--filetime", "m"]);
assert_eq!(c.get_filetime(&args), Some(FileTime::Modified));

let c = Config::default();
let args = get_filetime_args(vec!["dust", "--filetime", "modified"]);
assert_eq!(c.get_filetime(&args), Some(FileTime::Modified));

// Config is not defined and flag is defined as changed time
let c = Config::default();
let args = get_filetime_args(vec!["dust", "--filetime", "c"]);
assert_eq!(c.get_filetime(&args), Some(FileTime::Changed));

let c = Config::default();
let args = get_filetime_args(vec!["dust", "--filetime", "changed"]);
assert_eq!(c.get_filetime(&args), Some(FileTime::Changed));
}

fn get_filetime_args(args: Vec<&str>) -> ArgMatches {
Command::new("Dust")
.arg(
Arg::new("filetime")
.short('m')
.long("filetime")
.num_args(1)
.value_parser([
PossibleValue::new("a").alias("accessed"),
PossibleValue::new("c").alias("changed"),
PossibleValue::new("m").alias("modified"),
])
.help("Directory 'size' is max filetime of child files instead of disk size. while a/c/m for accessed/changed/modified time"),
)
.get_matches_from(args)
}
}
70 changes: 57 additions & 13 deletions src/dir_walker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use std::collections::HashSet;
use crate::node::build_node;
use std::fs::DirEntry;

use crate::node::FileTime;
use crate::platform::get_metadata;

#[derive(Debug)]
Expand All @@ -40,6 +41,7 @@ pub struct WalkData<'a> {
pub filter_changed_time: Option<(Operater, i64)>,
pub use_apparent_size: bool,
pub by_filecount: bool,
pub by_filetime: &'a Option<FileTime>,
pub ignore_hidden: bool,
pub follow_links: bool,
pub progress_data: Arc<PAtomicInfo>,
Expand All @@ -57,19 +59,15 @@ pub fn walk_it(dirs: HashSet<PathBuf>, walk_data: &WalkData) -> Vec<Node> {

prog_data.state.store(Operation::PREPARING, ORDERING);

clean_inodes(node, &mut inodes, walk_data.use_apparent_size)
clean_inodes(node, &mut inodes, walk_data)
})
.collect();
top_level_nodes
}

// Remove files which have the same inode, we don't want to double count them.
fn clean_inodes(
x: Node,
inodes: &mut HashSet<(u64, u64)>,
use_apparent_size: bool,
) -> Option<Node> {
if !use_apparent_size {
fn clean_inodes(x: Node, inodes: &mut HashSet<(u64, u64)>, walk_data: &WalkData) -> Option<Node> {
if !walk_data.use_apparent_size {
if let Some(id) = x.inode_device {
if !inodes.insert(id) {
return None;
Expand All @@ -82,12 +80,25 @@ fn clean_inodes(
tmp.sort_by(sort_by_inode);
let new_children: Vec<_> = tmp
.into_iter()
.filter_map(|c| clean_inodes(c, inodes, use_apparent_size))
.filter_map(|c| clean_inodes(c, inodes, walk_data))
.collect();

let actual_size = if walk_data.by_filetime.is_some() {
// If by_filetime is Some, directory 'size' is the maximum filetime among child files instead of disk size
new_children
.iter()
.map(|c| c.size)
.chain(std::iter::once(x.size))
.max()
.unwrap_or(0)
} else {
// If by_filetime is None, directory 'size' is the sum of disk sizes or file counts of child files
x.size + new_children.iter().map(|c| c.size).sum::<u64>()
};

Some(Node {
name: x.name,
size: x.size + new_children.iter().map(|c| c.size).sum::<u64>(),
size: actual_size,
children: new_children,
inode_device: x.inode_device,
depth: x.depth,
Expand Down Expand Up @@ -280,28 +291,61 @@ mod tests {
}
}

#[cfg(test)]
fn create_walker<'a>(use_apparent_size: bool) -> WalkData<'a> {
use crate::PIndicator;
let indicator = PIndicator::build_me();
WalkData {
ignore_directories: HashSet::new(),
filter_regex: &[],
invert_filter_regex: &[],
allowed_filesystems: HashSet::new(),
filter_modified_time: Some((Operater::GreaterThan, 0)),
filter_accessed_time: Some((Operater::GreaterThan, 0)),
filter_changed_time: Some((Operater::GreaterThan, 0)),
use_apparent_size,
by_filecount: false,
by_filetime: &None,
ignore_hidden: false,
follow_links: false,
progress_data: indicator.data.clone(),
errors: Arc::new(Mutex::new(RuntimeErrors::default())),
}
}

#[test]
#[allow(clippy::redundant_clone)]
fn test_should_ignore_file() {
let mut inodes = HashSet::new();
let n = create_node();
let walkdata = create_walker(false);

// First time we insert the node
assert_eq!(clean_inodes(n.clone(), &mut inodes, false), Some(n.clone()));
assert_eq!(
clean_inodes(n.clone(), &mut inodes, &walkdata),
Some(n.clone())
);

// Second time is a duplicate - we ignore it
assert_eq!(clean_inodes(n.clone(), &mut inodes, false), None);
assert_eq!(clean_inodes(n.clone(), &mut inodes, &walkdata), None);
}

#[test]
#[allow(clippy::redundant_clone)]
fn test_should_not_ignore_files_if_using_apparent_size() {
let mut inodes = HashSet::new();
let n = create_node();
let walkdata = create_walker(true);

// If using apparent size we include Nodes, even if duplicate inodes
assert_eq!(clean_inodes(n.clone(), &mut inodes, true), Some(n.clone()));
assert_eq!(clean_inodes(n.clone(), &mut inodes, true), Some(n.clone()));
assert_eq!(
clean_inodes(n.clone(), &mut inodes, &walkdata),
Some(n.clone())
);
assert_eq!(
clean_inodes(n.clone(), &mut inodes, &walkdata),
Some(n.clone())
);
}

#[test]
Expand Down
Loading

0 comments on commit f48fcc7

Please sign in to comment.