From 804240164b0fbae78f6b78c347c4d2c391674628 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Sun, 5 Jun 2022 15:16:48 +0200 Subject: [PATCH 01/12] expand: allow specifier only with last value --- src/uu/expand/src/expand.rs | 21 +++++++++++++++++++++ tests/by-util/test_expand.rs | 18 ++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/src/uu/expand/src/expand.rs b/src/uu/expand/src/expand.rs index d81d2d313d0..eb65d63abec 100644 --- a/src/uu/expand/src/expand.rs +++ b/src/uu/expand/src/expand.rs @@ -41,6 +41,7 @@ static DEFAULT_TABSTOP: usize = 8; /// The mode to use when replacing tabs beyond the last one specified in /// the `--tabs` argument. +#[derive(PartialEq)] enum RemainingMode { None, Slash, @@ -65,6 +66,7 @@ fn is_space_or_comma(c: char) -> bool { enum ParseError { InvalidCharacter(String), SpecifierNotAtStartOfNumber(String, String), + SpecifierOnlyAllowedWithLastValue(String), TabSizeCannotBeZero, TabSizeTooLarge(String), TabSizesMustBeAscending, @@ -85,6 +87,11 @@ impl fmt::Display for ParseError { specifier.quote(), s.quote(), ), + Self::SpecifierOnlyAllowedWithLastValue(specifier) => write!( + f, + "{} specifier only allowed with the last value", + specifier.quote() + ), Self::TabSizeCannotBeZero => write!(f, "tab size cannot be 0"), Self::TabSizeTooLarge(s) => write!(f, "tab stop is too large {}", s.quote()), Self::TabSizesMustBeAscending => write!(f, "tab sizes must be ascending"), @@ -112,6 +119,7 @@ fn tabstops_parse(s: &str) -> Result<(RemainingMode, Vec), ParseError> { let mut nums = vec![]; let mut remaining_mode = RemainingMode::None; + let mut is_specifier_already_used = false; for word in s.split(is_space_or_comma) { let bytes = word.as_bytes(); for i in 0..bytes.len() { @@ -139,6 +147,19 @@ fn tabstops_parse(s: &str) -> Result<(RemainingMode, Vec), ParseError> { } } + if is_specifier_already_used { + let specifier = if remaining_mode == RemainingMode::Slash { + "/".to_string() + } else { + "+".to_string() + }; + return Err(ParseError::SpecifierOnlyAllowedWithLastValue( + specifier, + )); + } else if remaining_mode != RemainingMode::None { + is_specifier_already_used = true; + } + // Append this tab stop to the list of all tabstops. nums.push(num); break; diff --git a/tests/by-util/test_expand.rs b/tests/by-util/test_expand.rs index 289743ce8dd..e981dab2a09 100644 --- a/tests/by-util/test_expand.rs +++ b/tests/by-util/test_expand.rs @@ -226,6 +226,24 @@ fn test_tabs_with_specifier_not_at_start() { run_cmd("--tabs=1+2", "+", "+2"); } +#[test] +fn test_tabs_with_specifier_only_allowed_with_last_value() { + fn run_cmd(arg: &str, specifier: &str) { + let expected_msg = format!( + "{} specifier only allowed with the last value", + specifier.quote() + ); + new_ucmd!().arg(arg).fails().stderr_contains(expected_msg); + } + run_cmd("--tabs=/1,2,3", "/"); + run_cmd("--tabs=1,/2,3", "/"); + new_ucmd!().arg("--tabs=1,2,/3").succeeds(); + + run_cmd("--tabs=+1,2,3", "+"); + run_cmd("--tabs=1,+2,3", "+"); + new_ucmd!().arg("--tabs=1,2,+3").succeeds(); +} + #[test] fn test_tabs_with_invalid_chars() { new_ucmd!() From 5a42c06b25d6b9848a9831349a7fb4730ce0e2eb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 6 Jun 2022 19:18:28 +0000 Subject: [PATCH 02/12] build(deps): bump once_cell from 1.11.0 to 1.12.0 Bumps [once_cell](https://github.com/matklad/once_cell) from 1.11.0 to 1.12.0. - [Release notes](https://github.com/matklad/once_cell/releases) - [Changelog](https://github.com/matklad/once_cell/blob/master/CHANGELOG.md) - [Commits](https://github.com/matklad/once_cell/compare/v1.11.0...v1.12.0) --- updated-dependencies: - dependency-name: once_cell dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- src/uu/ls/Cargo.toml | 2 +- src/uucore/Cargo.toml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 10e68455bad..af17098cafd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1219,9 +1219,9 @@ checksum = "b8f8bdf33df195859076e54ab11ee78a1b208382d3a26ec40d142ffc1ecc49ef" [[package]] name = "once_cell" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b10983b38c53aebdf33f542c6275b0f58a238129d00c4ae0e6fb59738d783ca" +checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225" [[package]] name = "onig" diff --git a/src/uu/ls/Cargo.toml b/src/uu/ls/Cargo.toml index 5be5e993c77..49254cf8de7 100644 --- a/src/uu/ls/Cargo.toml +++ b/src/uu/ls/Cargo.toml @@ -24,7 +24,7 @@ termsize = "0.1.6" glob = "0.3.0" lscolors = { version = "0.10.0", features = ["ansi_term"] } uucore = { version = ">=0.0.8", package = "uucore", path = "../../uucore", features = ["entries", "fs"] } -once_cell = "1.10.0" +once_cell = "1.12.0" atty = "0.2" selinux = { version="0.2", optional = true } diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index e382193778b..961bcbb203d 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -32,7 +32,7 @@ data-encoding = { version="2.1", optional=true } data-encoding-macro = { version="0.1.12", optional=true } z85 = { version="3.0.5", optional=true } libc = { version="0.2.126", optional=true } -once_cell = "1.10.0" +once_cell = "1.12.0" os_display = "0.1.3" [target.'cfg(unix)'.dependencies] From 8b1236c081c19c8d826658a91a51083cd9b2b385 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 6 Jun 2022 21:21:19 +0200 Subject: [PATCH 03/12] github: continue the other coverage jobs if one fails --- .github/workflows/CICD.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CICD.yml b/.github/workflows/CICD.yml index c58b725c428..8620120ff1e 100644 --- a/.github/workflows/CICD.yml +++ b/.github/workflows/CICD.yml @@ -972,7 +972,7 @@ jobs: needs: build runs-on: ${{ matrix.job.os }} strategy: - fail-fast: true + fail-fast: false matrix: job: - { os: ubuntu-latest , features: unix } From 1ba0bfc67a67288aa21736705841292fae030a4c Mon Sep 17 00:00:00 2001 From: Junji Wei Date: Thu, 9 Jun 2022 14:48:05 +0800 Subject: [PATCH 04/12] mktemp: respect POSIXLY_CORRECT env var when parsing args Signed-off-by: Junji Wei --- src/uu/mktemp/src/mktemp.rs | 25 +++++++++++++++++++++++-- tests/by-util/test_mktemp.rs | 22 ++++++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/src/uu/mktemp/src/mktemp.rs b/src/uu/mktemp/src/mktemp.rs index 6a848b7ad19..c3525651295 100644 --- a/src/uu/mktemp/src/mktemp.rs +++ b/src/uu/mktemp/src/mktemp.rs @@ -53,9 +53,14 @@ enum MkTempError { /// The template suffix contains a path separator (e.g. `"XXXa/b"`). SuffixContainsDirSeparator(String), InvalidTemplate(String), + TooManyTemplates, } -impl UError for MkTempError {} +impl UError for MkTempError { + fn usage(&self) -> bool { + matches!(self, Self::TooManyTemplates) + } +} impl Error for MkTempError {} @@ -85,6 +90,9 @@ impl Display for MkTempError { "invalid template, {}; with --tmpdir, it may not be absolute", s.quote() ), + TooManyTemplates => { + write!(f, "too many templates") + } } } } @@ -308,11 +316,24 @@ impl Params { #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { - let matches = uu_app().try_get_matches_from(args)?; + let args = args.collect_str_lossy().accept_any(); + + let matches = uu_app().try_get_matches_from(&args)?; // Parse command-line options into a format suitable for the // application logic. let options = Options::from(&matches); + + if env::var("POSIXLY_CORRECT").is_ok() { + // If POSIXLY_CORRECT was set, template MUST be the last argument. + if is_tmpdir_argument_actually_the_template(&matches) || matches.is_present(ARG_TEMPLATE) { + // Template argument was provided, check if was the last one. + if args.last().unwrap() != &options.template { + return Err(Box::new(MkTempError::TooManyTemplates)); + } + } + } + let dry_run = options.dry_run; let suppress_file_err = options.quiet; let make_dir = options.directory; diff --git a/tests/by-util/test_mktemp.rs b/tests/by-util/test_mktemp.rs index 79ff9271d6a..9e4a2742cd8 100644 --- a/tests/by-util/test_mktemp.rs +++ b/tests/by-util/test_mktemp.rs @@ -641,3 +641,25 @@ fn test_suffix_empty_template() { .fails() .stderr_is("mktemp: with --suffix, template '' must end in X\n"); } + +#[test] +fn test_mktemp_with_posixly_correct() { + let scene = TestScenario::new(util_name!()); + + scene + .ucmd() + .env("POSIXLY_CORRECT", "1") + .args(&["aXXXX", "--suffix=b"]) + .fails() + .stderr_is(&format!( + "mktemp: too many templates\nTry '{} {} --help' for more information.\n", + scene.bin_path.to_string_lossy(), + scene.util_name + )); + + scene + .ucmd() + .env("POSIXLY_CORRECT", "1") + .args(&["--suffix=b", "aXXXX"]) + .succeeds(); +} From fa64407cb059db17030ee0bbecf07a6a273a608a Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Thu, 9 Jun 2022 22:14:43 +0200 Subject: [PATCH 05/12] cut: fix argument parsing for the delimiter This fixes the argument parsing for the delimiter for the two special cases `-d=` and `-d ''`. --- src/uu/cut/src/cut.rs | 9 +++++---- tests/by-util/test_cut.rs | 20 +++++++++++++++++++- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/uu/cut/src/cut.rs b/src/uu/cut/src/cut.rs index b809b765de5..bf83b2cd514 100644 --- a/src/uu/cut/src/cut.rs +++ b/src/uu/cut/src/cut.rs @@ -402,6 +402,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { .collect_str(InvalidEncodingHandling::Ignore) .accept_any(); + let delimiter_is_equal = args.contains(&"-d=".to_string()); // special case let matches = uu_app().get_matches_from(args); let complement = matches.is_present(options::COMPLEMENT); @@ -460,11 +461,11 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { // GNU's `cut` supports `-d=` to set the delimiter to `=`. // Clap parsing is limited in this situation, see: // https://github.com/uutils/coreutils/issues/2424#issuecomment-863825242 - // Since clap parsing handles `-d=` as delimiter explicitly set to "" and - // an empty delimiter is not accepted by GNU's `cut` (and makes no sense), - // we can use this as basis for a simple workaround: - if delim.is_empty() { + if delimiter_is_equal { delim = "="; + } else if delim == "''" { + // treat `''` as empty delimiter + delim = ""; } if delim.chars().count() > 1 { Err("invalid input: The '--delimiter' ('-d') option expects empty or 1 character long, but was provided a value 2 characters or longer".into()) diff --git a/tests/by-util/test_cut.rs b/tests/by-util/test_cut.rs index 0fe222b060a..457ba7096d6 100644 --- a/tests/by-util/test_cut.rs +++ b/tests/by-util/test_cut.rs @@ -172,10 +172,28 @@ fn test_directory_and_no_such_file() { } #[test] -fn test_equal_as_delimiter() { +fn test_equal_as_delimiter1() { new_ucmd!() .args(&["-f", "2", "-d="]) .pipe_in("--dir=./out/lib") .succeeds() .stdout_only("./out/lib\n"); } + +#[test] +fn test_equal_as_delimiter2() { + new_ucmd!() + .args(&["-f2", "--delimiter="]) + .pipe_in("a=b\n") + .succeeds() + .stdout_only("a=b\n"); +} + +#[test] +fn test_equal_as_delimiter3() { + new_ucmd!() + .args(&["-f", "1,2", "-d", "''", "--output-delimiter=Z"]) + .pipe_in("ab\0cd\n") + .succeeds() + .stdout_only_bytes("abZcd\n"); +} From 6412232e2bdf9b9a077665d44b7d7d275d44431f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 10 Jun 2022 06:39:32 +0000 Subject: [PATCH 06/12] build(deps): bump exacl from 0.8.0 to 0.9.0 Bumps [exacl](https://github.com/byllyfish/exacl) from 0.8.0 to 0.9.0. - [Release notes](https://github.com/byllyfish/exacl/releases) - [Changelog](https://github.com/byllyfish/exacl/blob/main/CHANGELOG.md) - [Commits](https://github.com/byllyfish/exacl/compare/v0.8.0...v0.9.0) --- updated-dependencies: - dependency-name: exacl dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Cargo.lock | 8 ++++---- src/uu/cp/Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b191c36efba..56023548789 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -775,9 +775,9 @@ dependencies = [ [[package]] name = "exacl" -version = "0.8.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d5d9a2fa7d72579802c22bb97c37953cf1007f21f7ac2247d150c4c2d40c2ab" +checksum = "129c7b60e19ea8393c47b2110f8e3cea800530fd962380ef110d1fef6591faee" dependencies = [ "bitflags", "log", @@ -3141,9 +3141,9 @@ dependencies = [ [[package]] name = "uuid" -version = "0.8.2" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" +checksum = "c6d5d669b51467dcf7b2f1a796ce0f955f05f01cafda6c19d6e95f730df29238" [[package]] name = "vec_map" diff --git a/src/uu/cp/Cargo.toml b/src/uu/cp/Cargo.toml index c350e158eec..43a8c35da44 100644 --- a/src/uu/cp/Cargo.toml +++ b/src/uu/cp/Cargo.toml @@ -35,7 +35,7 @@ winapi = { version="0.3", features=["fileapi"] } [target.'cfg(unix)'.dependencies] xattr="0.2.3" -exacl= { version = "0.8.0", optional=true } +exacl= { version = "0.9.0", optional=true } [[bin]] name = "cp" From d8f73c3f244699eedf4ac8232768bdcf3c7535c7 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Thu, 9 Jun 2022 09:40:25 +0200 Subject: [PATCH 07/12] unexpand: set value name of arg --- src/uu/unexpand/src/unexpand.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/uu/unexpand/src/unexpand.rs b/src/uu/unexpand/src/unexpand.rs index 0ebfea6134d..1d22a067f4f 100644 --- a/src/uu/unexpand/src/unexpand.rs +++ b/src/uu/unexpand/src/unexpand.rs @@ -134,6 +134,7 @@ pub fn uu_app<'a>() -> Command<'a> { .long(options::TABS) .help("use comma separated LIST of tab positions or have tabs N characters apart instead of 8 (enables -a)") .takes_value(true) + .value_name("N, LIST") ) .arg( Arg::new(options::NO_UTF8) From 0c0e4dbda4f8594e2fe4dc32db66bebf24381a9d Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Thu, 9 Jun 2022 08:18:50 +0200 Subject: [PATCH 08/12] unexpand: return Result instead of calling crash! --- src/uu/unexpand/src/unexpand.rs | 63 ++++++++++++++++++++++++--------- tests/by-util/test_unexpand.rs | 28 +++++++++++++++ 2 files changed, 74 insertions(+), 17 deletions(-) diff --git a/src/uu/unexpand/src/unexpand.rs b/src/uu/unexpand/src/unexpand.rs index 0ebfea6134d..0a143a80847 100644 --- a/src/uu/unexpand/src/unexpand.rs +++ b/src/uu/unexpand/src/unexpand.rs @@ -12,12 +12,14 @@ #[macro_use] extern crate uucore; use clap::{crate_version, Arg, Command}; +use std::error::Error; +use std::fmt; use std::fs::File; use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Stdout, Write}; use std::str::from_utf8; use unicode_width::UnicodeWidthChar; use uucore::display::Quotable; -use uucore::error::{FromIo, UResult}; +use uucore::error::{FromIo, UError, UResult}; use uucore::{format_usage, InvalidEncodingHandling}; static NAME: &str = "unexpand"; @@ -27,28 +29,55 @@ static SUMMARY: &str = "Convert blanks in each FILE to tabs, writing to standard const DEFAULT_TABSTOP: usize = 8; -fn tabstops_parse(s: &str) -> Vec { +#[derive(Debug)] +enum ParseError { + InvalidCharacter(String), + TabSizeCannotBeZero, + TabSizesMustBeAscending, +} + +impl Error for ParseError {} +impl UError for ParseError {} + +impl fmt::Display for ParseError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::InvalidCharacter(s) => { + write!(f, "tab size contains invalid character(s): {}", s.quote()) + } + Self::TabSizeCannotBeZero => write!(f, "tab size cannot be 0"), + Self::TabSizesMustBeAscending => write!(f, "tab sizes must be ascending"), + } + } +} + +fn tabstops_parse(s: &str) -> Result, ParseError> { let words = s.split(','); - let nums = words - .map(|sn| { - sn.parse() - .unwrap_or_else(|_| crash!(1, "{}\n", "tab size contains invalid character(s)")) - }) - .collect::>(); + let mut nums = Vec::new(); + + for word in words { + if let Ok(num) = word.parse() { + nums.push(num); + } else { + return Err(ParseError::InvalidCharacter( + word.trim_start_matches(char::is_numeric).to_string(), + )); + } + } if nums.iter().any(|&n| n == 0) { - crash!(1, "{}\n", "tab size cannot be 0"); + return Err(ParseError::TabSizeCannotBeZero); } if let (false, _) = nums .iter() - .fold((true, 0), |(acc, last), &n| (acc && last <= n, n)) + .fold((true, 0), |(acc, last), &n| (acc && last < n, n)) { - crash!(1, "{}\n", "tab sizes must be ascending"); + return Err(ParseError::TabSizesMustBeAscending); } - nums + Ok(nums) } mod options { @@ -67,10 +96,10 @@ struct Options { } impl Options { - fn new(matches: &clap::ArgMatches) -> Self { + fn new(matches: &clap::ArgMatches) -> Result { let tabstops = match matches.value_of(options::TABS) { None => vec![DEFAULT_TABSTOP], - Some(s) => tabstops_parse(s), + Some(s) => tabstops_parse(s)?, }; let aflag = (matches.is_present(options::ALL) || matches.is_present(options::TABS)) @@ -82,12 +111,12 @@ impl Options { None => vec!["-".to_owned()], }; - Self { + Ok(Self { files, tabstops, aflag, uflag, - } + }) } } @@ -99,7 +128,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let matches = uu_app().get_matches_from(args); - unexpand(&Options::new(&matches)).map_err_context(String::new) + unexpand(&Options::new(&matches)?).map_err_context(String::new) } pub fn uu_app<'a>() -> Command<'a> { diff --git a/tests/by-util/test_unexpand.rs b/tests/by-util/test_unexpand.rs index 692599361a8..ccb217393e0 100644 --- a/tests/by-util/test_unexpand.rs +++ b/tests/by-util/test_unexpand.rs @@ -155,3 +155,31 @@ fn unexpand_read_from_two_file() { .run() .success(); } + +#[test] +fn test_tabs_cannot_be_zero() { + new_ucmd!() + .arg("--tabs=0") + .fails() + .stderr_contains("tab size cannot be 0"); +} + +#[test] +fn test_tabs_must_be_ascending() { + new_ucmd!() + .arg("--tabs=1,1") + .fails() + .stderr_contains("tab sizes must be ascending"); +} + +#[test] +fn test_tabs_with_invalid_chars() { + new_ucmd!() + .arg("--tabs=x") + .fails() + .stderr_contains("tab size contains invalid character(s): 'x'"); + new_ucmd!() + .arg("--tabs=1x2") + .fails() + .stderr_contains("tab size contains invalid character(s): 'x2'"); +} From 4cc058a78903252cdf1eec3c11e735f158b176f7 Mon Sep 17 00:00:00 2001 From: Patrick Jackson Date: Tue, 7 Jun 2022 10:45:19 -0700 Subject: [PATCH 09/12] dd: reuse buffer for the most common cases --- src/uu/dd/src/dd.rs | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/src/uu/dd/src/dd.rs b/src/uu/dd/src/dd.rs index 115da8bccfd..46c35d0ecec 100644 --- a/src/uu/dd/src/dd.rs +++ b/src/uu/dd/src/dd.rs @@ -414,6 +414,10 @@ where let (prog_tx, rx) = mpsc::channel(); thread::spawn(gen_prog_updater(rx, i.print_level)); + // Create a common buffer with a capacity of the block size. + // This is the max size needed. + let mut buf = vec![BUF_INIT_BYTE; bsize]; + // The main read/write loop. // // Each iteration reads blocks from the input and writes @@ -427,7 +431,7 @@ where // best buffer size for reading based on the number of // blocks already read and the number of blocks remaining. let loop_bsize = calc_loop_bsize(&i.count, &rstat, &wstat, i.ibs, bsize); - let (rstat_update, buf) = read_helper(&mut i, loop_bsize)?; + let rstat_update = read_helper(&mut i, &mut buf, loop_bsize)?; if rstat_update.is_empty() { break; } @@ -600,7 +604,11 @@ impl Write for Output { } /// Read helper performs read operations common to all dd reads, and dispatches the buffer to relevant helper functions as dictated by the operations requested by the user. -fn read_helper(i: &mut Input, bsize: usize) -> std::io::Result<(ReadStat, Vec)> { +fn read_helper( + i: &mut Input, + mut buf: &mut Vec, + bsize: usize, +) -> std::io::Result { // Local Helper Fns ------------------------------------------------- fn perform_swab(buf: &mut [u8]) { for base in (1..buf.len()).step_by(2) { @@ -609,14 +617,16 @@ fn read_helper(i: &mut Input, bsize: usize) -> std::io::Result<(Read } // ------------------------------------------------------------------ // Read - let mut buf = vec![BUF_INIT_BYTE; bsize]; + // Resize the buffer to the bsize. Any garbage data in the buffer is overwritten or truncated, so there is no need to fill with BUF_INIT_BYTE first. + buf.resize(bsize, BUF_INIT_BYTE); + let mut rstat = match i.cflags.sync { Some(ch) => i.fill_blocks(&mut buf, ch)?, _ => i.fill_consecutive(&mut buf)?, }; // Return early if no data if rstat.reads_complete == 0 && rstat.reads_partial == 0 { - return Ok((rstat, buf)); + return Ok(rstat); } // Perform any conv=x[,x...] options @@ -626,10 +636,10 @@ fn read_helper(i: &mut Input, bsize: usize) -> std::io::Result<(Read match i.cflags.mode { Some(ref mode) => { - let buf = conv_block_unblock_helper(buf, mode, &mut rstat); - Ok((rstat, buf)) + *buf = conv_block_unblock_helper(buf.clone(), mode, &mut rstat); + Ok(rstat) } - None => Ok((rstat, buf)), + None => Ok(rstat), } } From a186adbff1119adfeba54e6ab1aa8751e0635fb9 Mon Sep 17 00:00:00 2001 From: Patrick Jackson Date: Thu, 9 Jun 2022 09:27:42 -0700 Subject: [PATCH 10/12] dd: fixing clippy warnings. --- src/uu/dd/src/dd.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/uu/dd/src/dd.rs b/src/uu/dd/src/dd.rs index 46c35d0ecec..e47f923eec6 100644 --- a/src/uu/dd/src/dd.rs +++ b/src/uu/dd/src/dd.rs @@ -606,7 +606,7 @@ impl Write for Output { /// Read helper performs read operations common to all dd reads, and dispatches the buffer to relevant helper functions as dictated by the operations requested by the user. fn read_helper( i: &mut Input, - mut buf: &mut Vec, + buf: &mut Vec, bsize: usize, ) -> std::io::Result { // Local Helper Fns ------------------------------------------------- @@ -621,8 +621,8 @@ fn read_helper( buf.resize(bsize, BUF_INIT_BYTE); let mut rstat = match i.cflags.sync { - Some(ch) => i.fill_blocks(&mut buf, ch)?, - _ => i.fill_consecutive(&mut buf)?, + Some(ch) => i.fill_blocks(buf, ch)?, + _ => i.fill_consecutive(buf)?, }; // Return early if no data if rstat.reads_complete == 0 && rstat.reads_partial == 0 { @@ -631,7 +631,7 @@ fn read_helper( // Perform any conv=x[,x...] options if i.cflags.swab { - perform_swab(&mut buf); + perform_swab(buf); } match i.cflags.mode { From 881f0c3d0604e1c2098e7c4e2d05677046b0d8d3 Mon Sep 17 00:00:00 2001 From: Patrick Jackson Date: Thu, 9 Jun 2022 11:01:30 -0700 Subject: [PATCH 11/12] dd: add BENCHMARKING instructions --- src/uu/dd/BENCHMARKING.md | 62 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 src/uu/dd/BENCHMARKING.md diff --git a/src/uu/dd/BENCHMARKING.md b/src/uu/dd/BENCHMARKING.md new file mode 100644 index 00000000000..57a19bf5c23 --- /dev/null +++ b/src/uu/dd/BENCHMARKING.md @@ -0,0 +1,62 @@ +# Benchmarking dd + +`dd` is a utility used for copying and converting files. It is often used for +writing directly to devices, such as when writing an `.iso` file directly to a +drive. + +## Understanding dd + +At the core, `dd` has a simple loop of operation. It reads in `blocksize` bytes +from an input, optionally performs a conversion on the bytes, and then writes +`blocksize` bytes to an output file. + +In typical usage, the performance of `dd` is dominated by the speed at which it +can read or write to the filesystem. For those scenarios it is best to optimize +the blocksize for the performance of the devices being read/written to. Devices +typically have an optimal block size that they work best at, so for maximum +performance `dd` should be using a block size, or multiple of the block size, +that the underlying devices prefer. + +For benchmarking `dd` itself we will use fast special files provided by the +operating system that work out of RAM, `/dev/zero` and `/dev/null`. This reduces +the time taken reading/writing files to a minimum and maximises the percentage +time we spend in the `dd` tool itself, but care still needs to be taken to +understand where we are benchmarking the `dd` tool and where we are just +benchmarking memory performance. + +The main parameter to vary for a `dd` benchmark is the blocksize, but benchmarks +testing the conversions that are supported by `dd` could also be interesting. + +`dd` has a convenient `count` argument, that will copy `count` blocks of data +from the input to the output, which is useful for benchmarking. + +## Blocksize Benchmarks + +When measuring the impact of blocksize on the throughput, we want to avoid +testing the startup time of `dd`. `dd` itself will give a report on the +throughput speed once complete, but it's probably better to use an external +tool, such as `hyperfine` to measure the performance. + +Benchmarks should be sized so that they run for a handful of seconds at a +minimum to avoid measuring the startup time unnecessarily. The total time will +be roughly equivalent to the total bytes copied (`blocksize` x `count`). + +Some useful invocations for testing would be the following: + +``` +hyperfine "./target/release/dd bs=4k count=1000000 < /dev/zero > /dev/null" +hyperfine "./target/release/dd bs=1M count=20000 < /dev/zero > /dev/null" +hyperfine "./target/release/dd bs=1G count=10 < /dev/zero > /dev/null" +``` + +Choosing what to benchmark depends greatly on what you want to measure. +Typically you would choose a small blocksize for measuring the performance of +`dd`, as that would maximize the overhead introduced by the `dd` tool. `dd` +typically does some set amount of work per block which only depends on the size +of the block if conversions are used. + +As an example, https://github.com/uutils/coreutils/pull/3600 made a change to +reuse the same buffer between block copies, avoiding the need to reallocate a +new block of memory for each copy. The impact of that change mostly had an +impact on large block size copies because those are the circumstances where the +memory performance dominated the total performance. From fccab8a6910245f6e20c012dae3a200cb9432435 Mon Sep 17 00:00:00 2001 From: Jack Grigg Date: Mon, 31 Jan 2022 13:47:09 +0000 Subject: [PATCH 12/12] hashsum: Refactor `uu_app` to isolate non-"GNU Coreutils" options Several binaries have been added to `hashsum` that have never been part of GNU Coreutils: - `sha3*sum` (uutils/coreutils#869) - `shake*sum` (uutils/coreutils#987) - `b3sum` (uutils/coreutils#3108 and uutils/coreutils#3164) In particular, the `--bits` option, and the `--no-names` option added in uutils/coreutils#3361, are not valid for any GNU Coreutils `*sum` binary (as of Coreutils 9.0). This commit refactors the argument parsing so that `--bits` and `--no-names` become invalid options for the binaries intended to match the GNU Coreutils API, instead of being ignored options. It also refactors the custom binary name handling to distinguish between binaries intended to match the GNU Coreutils API, and binaries that don't have that constraint. Part of uutils/coreutils#2930. --- build.rs | 12 +++-- src/uu/hashsum/src/hashsum.rs | 89 +++++++++++++++++++---------------- 2 files changed, 56 insertions(+), 45 deletions(-) diff --git a/build.rs b/build.rs index 3df8d891d58..734dfc350a8 100644 --- a/build.rs +++ b/build.rs @@ -104,21 +104,25 @@ pub fn main() { ); let map_value = format!("({krate}::uumain, {krate}::uu_app_common)", krate = krate); + let map_value_bits = + format!("({krate}::uumain, {krate}::uu_app_bits)", krate = krate); + let map_value_b3sum = + format!("({krate}::uumain, {krate}::uu_app_b3sum)", krate = krate); phf_map.entry("md5sum", &map_value); phf_map.entry("sha1sum", &map_value); phf_map.entry("sha224sum", &map_value); phf_map.entry("sha256sum", &map_value); phf_map.entry("sha384sum", &map_value); phf_map.entry("sha512sum", &map_value); - phf_map.entry("sha3sum", &map_value); + phf_map.entry("sha3sum", &map_value_bits); phf_map.entry("sha3-224sum", &map_value); phf_map.entry("sha3-256sum", &map_value); phf_map.entry("sha3-384sum", &map_value); phf_map.entry("sha3-512sum", &map_value); - phf_map.entry("shake128sum", &map_value); - phf_map.entry("shake256sum", &map_value); + phf_map.entry("shake128sum", &map_value_bits); + phf_map.entry("shake256sum", &map_value_bits); phf_map.entry("b2sum", &map_value); - phf_map.entry("b3sum", &map_value); + phf_map.entry("b3sum", &map_value_b3sum); tf.write_all( format!( "#[path=\"{dir}/test_{krate}.rs\"]\nmod test_{krate};\n", diff --git a/src/uu/hashsum/src/hashsum.rs b/src/uu/hashsum/src/hashsum.rs index 872cb5c21d1..5cb37a3b30b 100644 --- a/src/uu/hashsum/src/hashsum.rs +++ b/src/uu/hashsum/src/hashsum.rs @@ -54,27 +54,6 @@ struct Options { output_bits: usize, } -fn is_custom_binary(program: &str) -> bool { - matches!( - program, - "md5sum" - | "sha1sum" - | "sha224sum" - | "sha256sum" - | "sha384sum" - | "sha512sum" - | "sha3sum" - | "sha3-224sum" - | "sha3-256sum" - | "sha3-384sum" - | "sha3-512sum" - | "shake128sum" - | "shake256sum" - | "b2sum" - | "b3sum" - ) -} - #[allow(clippy::cognitive_complexity)] fn detect_algo( program: &str, @@ -373,11 +352,6 @@ pub fn uu_app_common<'a>() -> Command<'a> { .long("tag") .help("create a BSD-style checksum"), ) - .arg( - Arg::new("no-names") - .long("no-names") - .help("Omits filenames in the output (option not present in GNU/Coreutils)"), - ) .arg( Arg::new("text") .short('t') @@ -408,16 +382,6 @@ pub fn uu_app_common<'a>() -> Command<'a> { .long("warn") .help("warn about improperly formatted checksum lines"), ) - // Needed for variable-length output sums (e.g. SHAKE) - .arg( - Arg::new("bits") - .long("bits") - .help("set the size of the output (only for SHAKE)") - .takes_value(true) - .value_name("BITS") - // XXX: should we actually use validators? they're not particularly efficient - .validator(is_valid_bit_num), - ) .arg( Arg::new("FILE") .index(1) @@ -428,8 +392,37 @@ pub fn uu_app_common<'a>() -> Command<'a> { ) } +pub fn uu_app_b3sum<'a>() -> Command<'a> { + uu_app_b3sum_opts(uu_app_common()) +} + +fn uu_app_b3sum_opts(command: Command) -> Command { + command.arg( + Arg::new("no-names") + .long("no-names") + .help("Omits filenames in the output (option not present in GNU/Coreutils)"), + ) +} + +pub fn uu_app_bits<'a>() -> Command<'a> { + uu_app_opt_bits(uu_app_common()) +} + +fn uu_app_opt_bits(command: Command) -> Command { + // Needed for variable-length output sums (e.g. SHAKE) + command.arg( + Arg::new("bits") + .long("bits") + .help("set the size of the output (only for SHAKE)") + .takes_value(true) + .value_name("BITS") + // XXX: should we actually use validators? they're not particularly efficient + .validator(is_valid_bit_num), + ) +} + pub fn uu_app_custom<'a>() -> Command<'a> { - let mut command = uu_app_common(); + let mut command = uu_app_b3sum_opts(uu_app_opt_bits(uu_app_common())); let algorithms = &[ ("md5", "work with MD5"), ("sha1", "work with SHA1"), @@ -463,10 +456,24 @@ pub fn uu_app_custom<'a>() -> Command<'a> { // hashsum is handled differently in build.rs, therefore this is not the same // as in other utilities. fn uu_app<'a>(binary_name: &str) -> Command<'a> { - if !is_custom_binary(binary_name) { - uu_app_custom() - } else { - uu_app_common() + match binary_name { + // These all support the same options. + "md5sum" | "sha1sum" | "sha224sum" | "sha256sum" | "sha384sum" | "sha512sum" => { + uu_app_common() + } + // b2sum supports the md5sum options plus -l/--length. + "b2sum" => uu_app_common(), // TODO: Implement -l/--length + // These have never been part of GNU Coreutils, but can function with the same + // options as md5sum. + "sha3-224sum" | "sha3-256sum" | "sha3-384sum" | "sha3-512sum" => uu_app_common(), + // These have never been part of GNU Coreutils, and require an additional --bits + // option to specify their output size. + "sha3sum" | "shake128sum" | "shake256sum" => uu_app_bits(), + // b3sum has never been part of GNU Coreutils, and has a --no-names option in + // addition to the b2sum options. + "b3sum" => uu_app_b3sum(), + // We're probably just being called as `hashsum`, so give them everything. + _ => uu_app_custom(), } }