Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

split: better handle numeric and hex suffixes, short and long, with and without values #5198

Merged
merged 9 commits into from
Aug 28, 2023
105 changes: 76 additions & 29 deletions src/uu/split/src/split.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,15 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.

// spell-checker:ignore nbbbb ncccc
// spell-checker:ignore nbbbb ncccc hexdigit

mod filenames;
mod number;
mod platform;

use crate::filenames::FilenameIterator;
use crate::filenames::SuffixType;
use clap::ArgAction;
use clap::{crate_version, parser::ValueSource, Arg, ArgMatches, Command};
use clap::{crate_version, parser::ValueSource, Arg, ArgAction, ArgMatches, Command};
use std::env;
use std::fmt;
use std::fs::{metadata, File};
Expand All @@ -32,7 +31,9 @@ static OPT_ADDITIONAL_SUFFIX: &str = "additional-suffix";
static OPT_FILTER: &str = "filter";
static OPT_NUMBER: &str = "number";
static OPT_NUMERIC_SUFFIXES: &str = "numeric-suffixes";
static OPT_NUMERIC_SUFFIXES_SHORT: &str = "-d";
static OPT_HEX_SUFFIXES: &str = "hex-suffixes";
static OPT_HEX_SUFFIXES_SHORT: &str = "-x";
static OPT_SUFFIX_LENGTH: &str = "suffix-length";
static OPT_DEFAULT_SUFFIX_LENGTH: &str = "0";
static OPT_VERBOSE: &str = "verbose";
Expand Down Expand Up @@ -122,28 +123,64 @@ pub fn uu_app() -> Command {
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(OPT_NUMERIC_SUFFIXES)
Arg::new(OPT_NUMERIC_SUFFIXES_SHORT)
.short('d')
.action(clap::ArgAction::SetTrue)
.overrides_with_all([
OPT_NUMERIC_SUFFIXES,
OPT_NUMERIC_SUFFIXES_SHORT,
OPT_HEX_SUFFIXES,
OPT_HEX_SUFFIXES_SHORT
])
.help("use numeric suffixes starting at 0, not alphabetic"),
)
.arg(
Arg::new(OPT_NUMERIC_SUFFIXES)
.long(OPT_NUMERIC_SUFFIXES)
.require_equals(true)
.default_missing_value("0")
.num_args(0..=1)
.help("use numeric suffixes instead of alphabetic"),
.overrides_with_all([
OPT_NUMERIC_SUFFIXES,
OPT_NUMERIC_SUFFIXES_SHORT,
OPT_HEX_SUFFIXES,
OPT_HEX_SUFFIXES_SHORT
])
.help("same as -d, but allow setting the start value"),
)
.arg(
Arg::new(OPT_SUFFIX_LENGTH)
.short('a')
.long(OPT_SUFFIX_LENGTH)
.value_name("N")
.default_value(OPT_DEFAULT_SUFFIX_LENGTH)
.help("use suffixes of fixed length N. 0 implies dynamic length."),
Arg::new(OPT_HEX_SUFFIXES_SHORT)
.short('x')
.action(clap::ArgAction::SetTrue)
.overrides_with_all([
OPT_NUMERIC_SUFFIXES,
OPT_NUMERIC_SUFFIXES_SHORT,
OPT_HEX_SUFFIXES,
OPT_HEX_SUFFIXES_SHORT
])
.help("use hex suffixes starting at 0, not alphabetic"),
)
.arg(
Arg::new(OPT_HEX_SUFFIXES)
.short('x')
.long(OPT_HEX_SUFFIXES)
.default_missing_value("0")
.require_equals(true)
.num_args(0..=1)
.help("use hex suffixes instead of alphabetic"),
.overrides_with_all([
OPT_NUMERIC_SUFFIXES,
OPT_NUMERIC_SUFFIXES_SHORT,
OPT_HEX_SUFFIXES,
OPT_HEX_SUFFIXES_SHORT
])
.help("same as -x, but allow setting the start value"),
)
.arg(
Arg::new(OPT_SUFFIX_LENGTH)
.short('a')
.long(OPT_SUFFIX_LENGTH)
.value_name("N")
.default_value(OPT_DEFAULT_SUFFIX_LENGTH)
.help("use suffixes of fixed length N. 0 implies dynamic length."),
)
.arg(
Arg::new(OPT_VERBOSE)
Expand Down Expand Up @@ -409,22 +446,32 @@ impl Strategy {

/// Parse the suffix type from the command-line arguments.
fn suffix_type_from(matches: &ArgMatches) -> Result<(SuffixType, usize), SettingsError> {
if matches.value_source(OPT_NUMERIC_SUFFIXES) == Some(ValueSource::CommandLine) {
let suffix_start = matches.get_one::<String>(OPT_NUMERIC_SUFFIXES);
let suffix_start = suffix_start.ok_or(SettingsError::SuffixNotParsable(String::new()))?;
let suffix_start = suffix_start
.parse()
.map_err(|_| SettingsError::SuffixNotParsable(suffix_start.to_string()))?;
Ok((SuffixType::Decimal, suffix_start))
} else if matches.value_source(OPT_HEX_SUFFIXES) == Some(ValueSource::CommandLine) {
let suffix_start = matches.get_one::<String>(OPT_HEX_SUFFIXES);
let suffix_start = suffix_start.ok_or(SettingsError::SuffixNotParsable(String::new()))?;
let suffix_start = usize::from_str_radix(suffix_start, 16)
.map_err(|_| SettingsError::SuffixNotParsable(suffix_start.to_string()))?;
Ok((SuffixType::Hexadecimal, suffix_start))
} else {
// no numeric/hex suffix
Ok((SuffixType::Alphabetic, 0))
// Check if the user is specifying one or more than one suffix
// Any combination of suffixes is allowed
// Since all suffixes are setup with 'overrides_with_all()' against themselves and each other,
// last one wins, all others are ignored
match (
matches.get_one::<String>(OPT_NUMERIC_SUFFIXES),
matches.get_one::<String>(OPT_HEX_SUFFIXES),
matches.get_flag(OPT_NUMERIC_SUFFIXES_SHORT),
matches.get_flag(OPT_HEX_SUFFIXES_SHORT),
) {
(Some(v), _, _, _) => {
let suffix_start = v;
let suffix_start = suffix_start
.parse::<usize>()
.map_err(|_| SettingsError::SuffixNotParsable(suffix_start.to_string()))?;
Ok((SuffixType::Decimal, suffix_start))
}
(_, Some(v), _, _) => {
let suffix_start = v;
let suffix_start = usize::from_str_radix(suffix_start, 16)
.map_err(|_| SettingsError::SuffixNotParsable(suffix_start.to_string()))?;
Ok((SuffixType::Hexadecimal, suffix_start))
}
(_, _, true, _) => Ok((SuffixType::Decimal, 0)), // short numeric suffix '-d', default start 0
(_, _, _, true) => Ok((SuffixType::Hexadecimal, 0)), // short hex suffix '-x', default start 0
_ => Ok((SuffixType::Alphabetic, 0)), // no numeric/hex suffix, using default alphabetic
}
}

Expand Down
190 changes: 187 additions & 3 deletions tests/by-util/test_split.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes onehundredlines nbbbb
// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes onehundredlines nbbbb dxen

use crate::common::util::{AtPath, TestScenario};
use rand::{thread_rng, Rng, SeedableRng};
Expand Down Expand Up @@ -715,7 +715,7 @@ fn test_multiple_of_input_chunk() {
#[test]
fn test_numeric_suffix() {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["-n", "4", "--numeric-suffixes", "9", "threebytes.txt"])
ucmd.args(&["-n", "4", "--numeric-suffixes=9", "threebytes.txt"])
.succeeds()
.no_stdout()
.no_stderr();
Expand All @@ -728,7 +728,7 @@ fn test_numeric_suffix() {
#[test]
fn test_hex_suffix() {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["-n", "4", "--hex-suffixes", "9", "threebytes.txt"])
ucmd.args(&["-n", "4", "--hex-suffixes=9", "threebytes.txt"])
.succeeds()
.no_stdout()
.no_stderr();
Expand All @@ -738,6 +738,190 @@ fn test_hex_suffix() {
assert_eq!(at.read("x0c"), "");
}

#[test]
fn test_numeric_suffix_no_equal() {
new_ucmd!()
.args(&["-n", "4", "--numeric-suffixes", "9", "threebytes.txt"])
.fails()
.stderr_contains("split: cannot open '9' for reading: No such file or directory");
}

#[test]
fn test_hex_suffix_no_equal() {
new_ucmd!()
.args(&["-n", "4", "--hex-suffixes", "9", "threebytes.txt"])
.fails()
.stderr_contains("split: cannot open '9' for reading: No such file or directory");
}

/// Test for short numeric suffix not having any value
#[test]
fn test_short_numeric_suffix_no_value() {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["-l", "9", "-d", "onehundredlines.txt"])
.succeeds()
.no_stdout()
.no_stderr();
assert_eq!(at.read("x00"), "00\n01\n02\n03\n04\n05\n06\n07\n08\n");
assert_eq!(at.read("x01"), "09\n10\n11\n12\n13\n14\n15\n16\n17\n");
assert_eq!(at.read("x02"), "18\n19\n20\n21\n22\n23\n24\n25\n26\n");
assert_eq!(at.read("x03"), "27\n28\n29\n30\n31\n32\n33\n34\n35\n");
assert_eq!(at.read("x04"), "36\n37\n38\n39\n40\n41\n42\n43\n44\n");
assert_eq!(at.read("x05"), "45\n46\n47\n48\n49\n50\n51\n52\n53\n");
assert_eq!(at.read("x06"), "54\n55\n56\n57\n58\n59\n60\n61\n62\n");
assert_eq!(at.read("x07"), "63\n64\n65\n66\n67\n68\n69\n70\n71\n");
assert_eq!(at.read("x08"), "72\n73\n74\n75\n76\n77\n78\n79\n80\n");
assert_eq!(at.read("x09"), "81\n82\n83\n84\n85\n86\n87\n88\n89\n");
assert_eq!(at.read("x10"), "90\n91\n92\n93\n94\n95\n96\n97\n98\n");
assert_eq!(at.read("x11"), "99\n");
}

/// Test for long numeric suffix not having any value
#[test]
fn test_numeric_suffix_no_value() {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["-l", "9", "--numeric-suffixes", "onehundredlines.txt"])
.succeeds()
.no_stdout()
.no_stderr();
assert_eq!(at.read("x00"), "00\n01\n02\n03\n04\n05\n06\n07\n08\n");
assert_eq!(at.read("x01"), "09\n10\n11\n12\n13\n14\n15\n16\n17\n");
assert_eq!(at.read("x02"), "18\n19\n20\n21\n22\n23\n24\n25\n26\n");
assert_eq!(at.read("x03"), "27\n28\n29\n30\n31\n32\n33\n34\n35\n");
assert_eq!(at.read("x04"), "36\n37\n38\n39\n40\n41\n42\n43\n44\n");
assert_eq!(at.read("x05"), "45\n46\n47\n48\n49\n50\n51\n52\n53\n");
assert_eq!(at.read("x06"), "54\n55\n56\n57\n58\n59\n60\n61\n62\n");
assert_eq!(at.read("x07"), "63\n64\n65\n66\n67\n68\n69\n70\n71\n");
assert_eq!(at.read("x08"), "72\n73\n74\n75\n76\n77\n78\n79\n80\n");
assert_eq!(at.read("x09"), "81\n82\n83\n84\n85\n86\n87\n88\n89\n");
assert_eq!(at.read("x10"), "90\n91\n92\n93\n94\n95\n96\n97\n98\n");
assert_eq!(at.read("x11"), "99\n");
}

/// Test for short hex suffix not having any value
#[test]
fn test_short_hex_suffix_no_value() {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["-l", "9", "-x", "onehundredlines.txt"])
.succeeds()
.no_stdout()
.no_stderr();
assert_eq!(at.read("x00"), "00\n01\n02\n03\n04\n05\n06\n07\n08\n");
assert_eq!(at.read("x01"), "09\n10\n11\n12\n13\n14\n15\n16\n17\n");
assert_eq!(at.read("x02"), "18\n19\n20\n21\n22\n23\n24\n25\n26\n");
assert_eq!(at.read("x03"), "27\n28\n29\n30\n31\n32\n33\n34\n35\n");
assert_eq!(at.read("x04"), "36\n37\n38\n39\n40\n41\n42\n43\n44\n");
assert_eq!(at.read("x05"), "45\n46\n47\n48\n49\n50\n51\n52\n53\n");
assert_eq!(at.read("x06"), "54\n55\n56\n57\n58\n59\n60\n61\n62\n");
assert_eq!(at.read("x07"), "63\n64\n65\n66\n67\n68\n69\n70\n71\n");
assert_eq!(at.read("x08"), "72\n73\n74\n75\n76\n77\n78\n79\n80\n");
assert_eq!(at.read("x09"), "81\n82\n83\n84\n85\n86\n87\n88\n89\n");
assert_eq!(at.read("x0a"), "90\n91\n92\n93\n94\n95\n96\n97\n98\n");
assert_eq!(at.read("x0b"), "99\n");
}

/// Test for long hex suffix not having any value
#[test]
fn test_hex_suffix_no_value() {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["-l", "9", "--hex-suffixes", "onehundredlines.txt"])
.succeeds()
.no_stdout()
.no_stderr();
assert_eq!(at.read("x00"), "00\n01\n02\n03\n04\n05\n06\n07\n08\n");
assert_eq!(at.read("x01"), "09\n10\n11\n12\n13\n14\n15\n16\n17\n");
assert_eq!(at.read("x02"), "18\n19\n20\n21\n22\n23\n24\n25\n26\n");
assert_eq!(at.read("x03"), "27\n28\n29\n30\n31\n32\n33\n34\n35\n");
assert_eq!(at.read("x04"), "36\n37\n38\n39\n40\n41\n42\n43\n44\n");
assert_eq!(at.read("x05"), "45\n46\n47\n48\n49\n50\n51\n52\n53\n");
assert_eq!(at.read("x06"), "54\n55\n56\n57\n58\n59\n60\n61\n62\n");
assert_eq!(at.read("x07"), "63\n64\n65\n66\n67\n68\n69\n70\n71\n");
assert_eq!(at.read("x08"), "72\n73\n74\n75\n76\n77\n78\n79\n80\n");
assert_eq!(at.read("x09"), "81\n82\n83\n84\n85\n86\n87\n88\n89\n");
assert_eq!(at.read("x0a"), "90\n91\n92\n93\n94\n95\n96\n97\n98\n");
assert_eq!(at.read("x0b"), "99\n");
}

/// Test for short numeric suffix having value provided after space - should fail
#[test]
fn test_short_numeric_suffix_with_value_spaced() {
new_ucmd!()
.args(&["-n", "4", "-d", "9", "threebytes.txt"])
.fails()
.stderr_contains("split: cannot open '9' for reading: No such file or directory");
}

/// Test for short numeric suffix having value provided after space - should fail
#[test]
fn test_short_hex_suffix_with_value_spaced() {
new_ucmd!()
.args(&["-n", "4", "-x", "9", "threebytes.txt"])
.fails()
.stderr_contains("split: cannot open '9' for reading: No such file or directory");
}

/// Test for some combined short options
#[test]
fn test_short_combination() {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["-dxen", "4", "threebytes.txt"])
.succeeds()
.no_stdout()
.no_stderr();
assert_eq!(at.read("x00"), "a");
assert_eq!(at.read("x01"), "b");
assert_eq!(at.read("x02"), "c");
assert_eq!(at.file_exists("x03"), false);
}

/// Test for the last effective suffix, ignoring all others - numeric long last
/// Any combination of short and long (as well as duplicates) should be allowed
#[test]
fn test_effective_suffix_numeric_last() {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&[
"-n",
"4",
"--numeric-suffixes=7",
"--hex-suffixes=4",
"-d",
"-x",
"--numeric-suffixes=9",
"threebytes.txt",
])
.succeeds()
.no_stdout()
.no_stderr();
assert_eq!(at.read("x09"), "a");
assert_eq!(at.read("x10"), "b");
assert_eq!(at.read("x11"), "c");
assert_eq!(at.read("x12"), "");
}

/// Test for the last effective suffix, ignoring all others - hex long last
/// Any combination of short and long (as well as duplicates) should be allowed
#[test]
fn test_effective_suffix_hex_last() {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&[
"-n",
"4",
"--hex-suffixes=7",
"--numeric-suffixes=4",
"-x",
"-d",
"--hex-suffixes=9",
"threebytes.txt",
])
.succeeds()
.no_stdout()
.no_stderr();
assert_eq!(at.read("x09"), "a");
assert_eq!(at.read("x0a"), "b");
assert_eq!(at.read("x0b"), "c");
assert_eq!(at.read("x0c"), "");
}

#[test]
fn test_round_robin() {
let (at, mut ucmd) = at_and_ucmd!();
Expand Down