Skip to content

Commit

Permalink
split: better handle numeric and hex suffixes, short and long, with a…
Browse files Browse the repository at this point in the history
…nd without values (#5198)

* split: better handle numeric and hex suffixes, short and long, with and without values
Fixes #5171

* refactoring with overrides_with_all() in args definitions

* fixed comments

* updated help on suffixes to match GNU

* comments

* refactor to remove value_parser()

* split: refactor suffix processing + updated tests

* split: minor formatting
  • Loading branch information
zhitkoff authored Aug 28, 2023
1 parent e4ea64a commit 1eae064
Show file tree
Hide file tree
Showing 2 changed files with 263 additions and 32 deletions.
105 changes: 76 additions & 29 deletions src/uu/split/src/split.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,15 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.

// spell-checker:ignore nbbbb ncccc
// spell-checker:ignore nbbbb ncccc hexdigit

mod filenames;
mod number;
mod platform;

use crate::filenames::FilenameIterator;
use crate::filenames::SuffixType;
use clap::ArgAction;
use clap::{crate_version, parser::ValueSource, Arg, ArgMatches, Command};
use clap::{crate_version, parser::ValueSource, Arg, ArgAction, ArgMatches, Command};
use std::env;
use std::fmt;
use std::fs::{metadata, File};
Expand All @@ -32,7 +31,9 @@ static OPT_ADDITIONAL_SUFFIX: &str = "additional-suffix";
static OPT_FILTER: &str = "filter";
static OPT_NUMBER: &str = "number";
static OPT_NUMERIC_SUFFIXES: &str = "numeric-suffixes";
static OPT_NUMERIC_SUFFIXES_SHORT: &str = "-d";
static OPT_HEX_SUFFIXES: &str = "hex-suffixes";
static OPT_HEX_SUFFIXES_SHORT: &str = "-x";
static OPT_SUFFIX_LENGTH: &str = "suffix-length";
static OPT_DEFAULT_SUFFIX_LENGTH: &str = "0";
static OPT_VERBOSE: &str = "verbose";
Expand Down Expand Up @@ -122,28 +123,64 @@ pub fn uu_app() -> Command {
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(OPT_NUMERIC_SUFFIXES)
Arg::new(OPT_NUMERIC_SUFFIXES_SHORT)
.short('d')
.action(clap::ArgAction::SetTrue)
.overrides_with_all([
OPT_NUMERIC_SUFFIXES,
OPT_NUMERIC_SUFFIXES_SHORT,
OPT_HEX_SUFFIXES,
OPT_HEX_SUFFIXES_SHORT
])
.help("use numeric suffixes starting at 0, not alphabetic"),
)
.arg(
Arg::new(OPT_NUMERIC_SUFFIXES)
.long(OPT_NUMERIC_SUFFIXES)
.require_equals(true)
.default_missing_value("0")
.num_args(0..=1)
.help("use numeric suffixes instead of alphabetic"),
.overrides_with_all([
OPT_NUMERIC_SUFFIXES,
OPT_NUMERIC_SUFFIXES_SHORT,
OPT_HEX_SUFFIXES,
OPT_HEX_SUFFIXES_SHORT
])
.help("same as -d, but allow setting the start value"),
)
.arg(
Arg::new(OPT_SUFFIX_LENGTH)
.short('a')
.long(OPT_SUFFIX_LENGTH)
.value_name("N")
.default_value(OPT_DEFAULT_SUFFIX_LENGTH)
.help("use suffixes of fixed length N. 0 implies dynamic length."),
Arg::new(OPT_HEX_SUFFIXES_SHORT)
.short('x')
.action(clap::ArgAction::SetTrue)
.overrides_with_all([
OPT_NUMERIC_SUFFIXES,
OPT_NUMERIC_SUFFIXES_SHORT,
OPT_HEX_SUFFIXES,
OPT_HEX_SUFFIXES_SHORT
])
.help("use hex suffixes starting at 0, not alphabetic"),
)
.arg(
Arg::new(OPT_HEX_SUFFIXES)
.short('x')
.long(OPT_HEX_SUFFIXES)
.default_missing_value("0")
.require_equals(true)
.num_args(0..=1)
.help("use hex suffixes instead of alphabetic"),
.overrides_with_all([
OPT_NUMERIC_SUFFIXES,
OPT_NUMERIC_SUFFIXES_SHORT,
OPT_HEX_SUFFIXES,
OPT_HEX_SUFFIXES_SHORT
])
.help("same as -x, but allow setting the start value"),
)
.arg(
Arg::new(OPT_SUFFIX_LENGTH)
.short('a')
.long(OPT_SUFFIX_LENGTH)
.value_name("N")
.default_value(OPT_DEFAULT_SUFFIX_LENGTH)
.help("use suffixes of fixed length N. 0 implies dynamic length."),
)
.arg(
Arg::new(OPT_VERBOSE)
Expand Down Expand Up @@ -409,22 +446,32 @@ impl Strategy {

/// Parse the suffix type from the command-line arguments.
fn suffix_type_from(matches: &ArgMatches) -> Result<(SuffixType, usize), SettingsError> {
if matches.value_source(OPT_NUMERIC_SUFFIXES) == Some(ValueSource::CommandLine) {
let suffix_start = matches.get_one::<String>(OPT_NUMERIC_SUFFIXES);
let suffix_start = suffix_start.ok_or(SettingsError::SuffixNotParsable(String::new()))?;
let suffix_start = suffix_start
.parse()
.map_err(|_| SettingsError::SuffixNotParsable(suffix_start.to_string()))?;
Ok((SuffixType::Decimal, suffix_start))
} else if matches.value_source(OPT_HEX_SUFFIXES) == Some(ValueSource::CommandLine) {
let suffix_start = matches.get_one::<String>(OPT_HEX_SUFFIXES);
let suffix_start = suffix_start.ok_or(SettingsError::SuffixNotParsable(String::new()))?;
let suffix_start = usize::from_str_radix(suffix_start, 16)
.map_err(|_| SettingsError::SuffixNotParsable(suffix_start.to_string()))?;
Ok((SuffixType::Hexadecimal, suffix_start))
} else {
// no numeric/hex suffix
Ok((SuffixType::Alphabetic, 0))
// Check if the user is specifying one or more than one suffix
// Any combination of suffixes is allowed
// Since all suffixes are setup with 'overrides_with_all()' against themselves and each other,
// last one wins, all others are ignored
match (
matches.get_one::<String>(OPT_NUMERIC_SUFFIXES),
matches.get_one::<String>(OPT_HEX_SUFFIXES),
matches.get_flag(OPT_NUMERIC_SUFFIXES_SHORT),
matches.get_flag(OPT_HEX_SUFFIXES_SHORT),
) {
(Some(v), _, _, _) => {
let suffix_start = v;
let suffix_start = suffix_start
.parse::<usize>()
.map_err(|_| SettingsError::SuffixNotParsable(suffix_start.to_string()))?;
Ok((SuffixType::Decimal, suffix_start))
}
(_, Some(v), _, _) => {
let suffix_start = v;
let suffix_start = usize::from_str_radix(suffix_start, 16)
.map_err(|_| SettingsError::SuffixNotParsable(suffix_start.to_string()))?;
Ok((SuffixType::Hexadecimal, suffix_start))
}
(_, _, true, _) => Ok((SuffixType::Decimal, 0)), // short numeric suffix '-d', default start 0
(_, _, _, true) => Ok((SuffixType::Hexadecimal, 0)), // short hex suffix '-x', default start 0
_ => Ok((SuffixType::Alphabetic, 0)), // no numeric/hex suffix, using default alphabetic
}
}

Expand Down
190 changes: 187 additions & 3 deletions tests/by-util/test_split.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes onehundredlines nbbbb
// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes onehundredlines nbbbb dxen

use crate::common::util::{AtPath, TestScenario};
use rand::{thread_rng, Rng, SeedableRng};
Expand Down Expand Up @@ -715,7 +715,7 @@ fn test_multiple_of_input_chunk() {
#[test]
fn test_numeric_suffix() {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["-n", "4", "--numeric-suffixes", "9", "threebytes.txt"])
ucmd.args(&["-n", "4", "--numeric-suffixes=9", "threebytes.txt"])
.succeeds()
.no_stdout()
.no_stderr();
Expand All @@ -728,7 +728,7 @@ fn test_numeric_suffix() {
#[test]
fn test_hex_suffix() {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["-n", "4", "--hex-suffixes", "9", "threebytes.txt"])
ucmd.args(&["-n", "4", "--hex-suffixes=9", "threebytes.txt"])
.succeeds()
.no_stdout()
.no_stderr();
Expand All @@ -738,6 +738,190 @@ fn test_hex_suffix() {
assert_eq!(at.read("x0c"), "");
}

#[test]
fn test_numeric_suffix_no_equal() {
new_ucmd!()
.args(&["-n", "4", "--numeric-suffixes", "9", "threebytes.txt"])
.fails()
.stderr_contains("split: cannot open '9' for reading: No such file or directory");
}

#[test]
fn test_hex_suffix_no_equal() {
new_ucmd!()
.args(&["-n", "4", "--hex-suffixes", "9", "threebytes.txt"])
.fails()
.stderr_contains("split: cannot open '9' for reading: No such file or directory");
}

/// Test for short numeric suffix not having any value
#[test]
fn test_short_numeric_suffix_no_value() {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["-l", "9", "-d", "onehundredlines.txt"])
.succeeds()
.no_stdout()
.no_stderr();
assert_eq!(at.read("x00"), "00\n01\n02\n03\n04\n05\n06\n07\n08\n");
assert_eq!(at.read("x01"), "09\n10\n11\n12\n13\n14\n15\n16\n17\n");
assert_eq!(at.read("x02"), "18\n19\n20\n21\n22\n23\n24\n25\n26\n");
assert_eq!(at.read("x03"), "27\n28\n29\n30\n31\n32\n33\n34\n35\n");
assert_eq!(at.read("x04"), "36\n37\n38\n39\n40\n41\n42\n43\n44\n");
assert_eq!(at.read("x05"), "45\n46\n47\n48\n49\n50\n51\n52\n53\n");
assert_eq!(at.read("x06"), "54\n55\n56\n57\n58\n59\n60\n61\n62\n");
assert_eq!(at.read("x07"), "63\n64\n65\n66\n67\n68\n69\n70\n71\n");
assert_eq!(at.read("x08"), "72\n73\n74\n75\n76\n77\n78\n79\n80\n");
assert_eq!(at.read("x09"), "81\n82\n83\n84\n85\n86\n87\n88\n89\n");
assert_eq!(at.read("x10"), "90\n91\n92\n93\n94\n95\n96\n97\n98\n");
assert_eq!(at.read("x11"), "99\n");
}

/// Test for long numeric suffix not having any value
#[test]
fn test_numeric_suffix_no_value() {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["-l", "9", "--numeric-suffixes", "onehundredlines.txt"])
.succeeds()
.no_stdout()
.no_stderr();
assert_eq!(at.read("x00"), "00\n01\n02\n03\n04\n05\n06\n07\n08\n");
assert_eq!(at.read("x01"), "09\n10\n11\n12\n13\n14\n15\n16\n17\n");
assert_eq!(at.read("x02"), "18\n19\n20\n21\n22\n23\n24\n25\n26\n");
assert_eq!(at.read("x03"), "27\n28\n29\n30\n31\n32\n33\n34\n35\n");
assert_eq!(at.read("x04"), "36\n37\n38\n39\n40\n41\n42\n43\n44\n");
assert_eq!(at.read("x05"), "45\n46\n47\n48\n49\n50\n51\n52\n53\n");
assert_eq!(at.read("x06"), "54\n55\n56\n57\n58\n59\n60\n61\n62\n");
assert_eq!(at.read("x07"), "63\n64\n65\n66\n67\n68\n69\n70\n71\n");
assert_eq!(at.read("x08"), "72\n73\n74\n75\n76\n77\n78\n79\n80\n");
assert_eq!(at.read("x09"), "81\n82\n83\n84\n85\n86\n87\n88\n89\n");
assert_eq!(at.read("x10"), "90\n91\n92\n93\n94\n95\n96\n97\n98\n");
assert_eq!(at.read("x11"), "99\n");
}

/// Test for short hex suffix not having any value
#[test]
fn test_short_hex_suffix_no_value() {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["-l", "9", "-x", "onehundredlines.txt"])
.succeeds()
.no_stdout()
.no_stderr();
assert_eq!(at.read("x00"), "00\n01\n02\n03\n04\n05\n06\n07\n08\n");
assert_eq!(at.read("x01"), "09\n10\n11\n12\n13\n14\n15\n16\n17\n");
assert_eq!(at.read("x02"), "18\n19\n20\n21\n22\n23\n24\n25\n26\n");
assert_eq!(at.read("x03"), "27\n28\n29\n30\n31\n32\n33\n34\n35\n");
assert_eq!(at.read("x04"), "36\n37\n38\n39\n40\n41\n42\n43\n44\n");
assert_eq!(at.read("x05"), "45\n46\n47\n48\n49\n50\n51\n52\n53\n");
assert_eq!(at.read("x06"), "54\n55\n56\n57\n58\n59\n60\n61\n62\n");
assert_eq!(at.read("x07"), "63\n64\n65\n66\n67\n68\n69\n70\n71\n");
assert_eq!(at.read("x08"), "72\n73\n74\n75\n76\n77\n78\n79\n80\n");
assert_eq!(at.read("x09"), "81\n82\n83\n84\n85\n86\n87\n88\n89\n");
assert_eq!(at.read("x0a"), "90\n91\n92\n93\n94\n95\n96\n97\n98\n");
assert_eq!(at.read("x0b"), "99\n");
}

/// Test for long hex suffix not having any value
#[test]
fn test_hex_suffix_no_value() {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["-l", "9", "--hex-suffixes", "onehundredlines.txt"])
.succeeds()
.no_stdout()
.no_stderr();
assert_eq!(at.read("x00"), "00\n01\n02\n03\n04\n05\n06\n07\n08\n");
assert_eq!(at.read("x01"), "09\n10\n11\n12\n13\n14\n15\n16\n17\n");
assert_eq!(at.read("x02"), "18\n19\n20\n21\n22\n23\n24\n25\n26\n");
assert_eq!(at.read("x03"), "27\n28\n29\n30\n31\n32\n33\n34\n35\n");
assert_eq!(at.read("x04"), "36\n37\n38\n39\n40\n41\n42\n43\n44\n");
assert_eq!(at.read("x05"), "45\n46\n47\n48\n49\n50\n51\n52\n53\n");
assert_eq!(at.read("x06"), "54\n55\n56\n57\n58\n59\n60\n61\n62\n");
assert_eq!(at.read("x07"), "63\n64\n65\n66\n67\n68\n69\n70\n71\n");
assert_eq!(at.read("x08"), "72\n73\n74\n75\n76\n77\n78\n79\n80\n");
assert_eq!(at.read("x09"), "81\n82\n83\n84\n85\n86\n87\n88\n89\n");
assert_eq!(at.read("x0a"), "90\n91\n92\n93\n94\n95\n96\n97\n98\n");
assert_eq!(at.read("x0b"), "99\n");
}

/// Test for short numeric suffix having value provided after space - should fail
#[test]
fn test_short_numeric_suffix_with_value_spaced() {
new_ucmd!()
.args(&["-n", "4", "-d", "9", "threebytes.txt"])
.fails()
.stderr_contains("split: cannot open '9' for reading: No such file or directory");
}

/// Test for short numeric suffix having value provided after space - should fail
#[test]
fn test_short_hex_suffix_with_value_spaced() {
new_ucmd!()
.args(&["-n", "4", "-x", "9", "threebytes.txt"])
.fails()
.stderr_contains("split: cannot open '9' for reading: No such file or directory");
}

/// Test for some combined short options
#[test]
fn test_short_combination() {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["-dxen", "4", "threebytes.txt"])
.succeeds()
.no_stdout()
.no_stderr();
assert_eq!(at.read("x00"), "a");
assert_eq!(at.read("x01"), "b");
assert_eq!(at.read("x02"), "c");
assert_eq!(at.file_exists("x03"), false);
}

/// Test for the last effective suffix, ignoring all others - numeric long last
/// Any combination of short and long (as well as duplicates) should be allowed
#[test]
fn test_effective_suffix_numeric_last() {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&[
"-n",
"4",
"--numeric-suffixes=7",
"--hex-suffixes=4",
"-d",
"-x",
"--numeric-suffixes=9",
"threebytes.txt",
])
.succeeds()
.no_stdout()
.no_stderr();
assert_eq!(at.read("x09"), "a");
assert_eq!(at.read("x10"), "b");
assert_eq!(at.read("x11"), "c");
assert_eq!(at.read("x12"), "");
}

/// Test for the last effective suffix, ignoring all others - hex long last
/// Any combination of short and long (as well as duplicates) should be allowed
#[test]
fn test_effective_suffix_hex_last() {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&[
"-n",
"4",
"--hex-suffixes=7",
"--numeric-suffixes=4",
"-x",
"-d",
"--hex-suffixes=9",
"threebytes.txt",
])
.succeeds()
.no_stdout()
.no_stderr();
assert_eq!(at.read("x09"), "a");
assert_eq!(at.read("x0a"), "b");
assert_eq!(at.read("x0b"), "c");
assert_eq!(at.read("x0c"), "");
}

#[test]
fn test_round_robin() {
let (at, mut ucmd) = at_and_ucmd!();
Expand Down

0 comments on commit 1eae064

Please sign in to comment.