diff --git a/src/uu/split/src/filenames.rs b/src/uu/split/src/filenames.rs index 6f68caeb41f..95ae5d40f33 100644 --- a/src/uu/split/src/filenames.rs +++ b/src/uu/split/src/filenames.rs @@ -28,6 +28,7 @@ use crate::number::DynamicWidthNumber; use crate::number::FixedWidthNumber; use crate::number::Number; +use uucore::error::{UResult, USimpleError}; /// The format to use for suffixes in the filename for each output chunk. #[derive(Clone, Copy)] @@ -119,19 +120,28 @@ impl<'a> FilenameIterator<'a> { additional_suffix: &'a str, suffix_length: usize, suffix_type: SuffixType, - ) -> FilenameIterator<'a> { + suffix_start: usize, + ) -> UResult> { let radix = suffix_type.radix(); let number = if suffix_length == 0 { - Number::DynamicWidth(DynamicWidthNumber::new(radix)) + Number::DynamicWidth(DynamicWidthNumber::new(radix, suffix_start)) } else { - Number::FixedWidth(FixedWidthNumber::new(radix, suffix_length)) + Number::FixedWidth( + FixedWidthNumber::new(radix, suffix_length, suffix_start).map_err(|_| { + USimpleError::new( + 1, + "numerical suffix start value is too large for the suffix length", + ) + })?, + ) }; - FilenameIterator { + + Ok(FilenameIterator { prefix, additional_suffix, number, first_iteration: true, - } + }) } } @@ -161,36 +171,36 @@ mod tests { #[test] fn test_filename_iterator_alphabetic_fixed_width() { - let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::Alphabetic); + let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::Alphabetic, 0).unwrap(); assert_eq!(it.next().unwrap(), "chunk_aa.txt"); assert_eq!(it.next().unwrap(), "chunk_ab.txt"); assert_eq!(it.next().unwrap(), "chunk_ac.txt"); - let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::Alphabetic); + let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::Alphabetic, 0).unwrap(); assert_eq!(it.nth(26 * 26 - 1).unwrap(), "chunk_zz.txt"); assert_eq!(it.next(), None); } #[test] fn test_filename_iterator_numeric_fixed_width() { - let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::Decimal); + let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::Decimal, 0).unwrap(); assert_eq!(it.next().unwrap(), "chunk_00.txt"); assert_eq!(it.next().unwrap(), "chunk_01.txt"); assert_eq!(it.next().unwrap(), "chunk_02.txt"); - let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::Decimal); + let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::Decimal, 0).unwrap(); assert_eq!(it.nth(10 * 10 - 1).unwrap(), "chunk_99.txt"); assert_eq!(it.next(), None); } #[test] fn test_filename_iterator_alphabetic_dynamic_width() { - let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Alphabetic); + let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Alphabetic, 0).unwrap(); assert_eq!(it.next().unwrap(), "chunk_aa.txt"); assert_eq!(it.next().unwrap(), "chunk_ab.txt"); assert_eq!(it.next().unwrap(), "chunk_ac.txt"); - let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Alphabetic); + let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Alphabetic, 0).unwrap(); assert_eq!(it.nth(26 * 25 - 1).unwrap(), "chunk_yz.txt"); assert_eq!(it.next().unwrap(), "chunk_zaaa.txt"); assert_eq!(it.next().unwrap(), "chunk_zaab.txt"); @@ -198,14 +208,49 @@ mod tests { #[test] fn test_filename_iterator_numeric_dynamic_width() { - let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Decimal); + let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Decimal, 0).unwrap(); assert_eq!(it.next().unwrap(), "chunk_00.txt"); assert_eq!(it.next().unwrap(), "chunk_01.txt"); assert_eq!(it.next().unwrap(), "chunk_02.txt"); - let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Decimal); + let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Decimal, 0).unwrap(); assert_eq!(it.nth(10 * 9 - 1).unwrap(), "chunk_89.txt"); assert_eq!(it.next().unwrap(), "chunk_9000.txt"); assert_eq!(it.next().unwrap(), "chunk_9001.txt"); } + + #[test] + fn test_filename_iterator_numeric_suffix_decimal() { + let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Decimal, 5).unwrap(); + assert_eq!(it.next().unwrap(), "chunk_05.txt"); + assert_eq!(it.next().unwrap(), "chunk_06.txt"); + assert_eq!(it.next().unwrap(), "chunk_07.txt"); + } + + #[test] + fn test_filename_iterator_numeric_suffix_hex() { + let mut it = + FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Hexadecimal, 9).unwrap(); + assert_eq!(it.next().unwrap(), "chunk_09.txt"); + assert_eq!(it.next().unwrap(), "chunk_0a.txt"); + assert_eq!(it.next().unwrap(), "chunk_0b.txt"); + } + + #[test] + fn test_filename_iterator_numeric_suffix_err() { + let mut it = FilenameIterator::new("chunk_", ".txt", 3, SuffixType::Decimal, 999).unwrap(); + assert_eq!(it.next().unwrap(), "chunk_999.txt"); + assert!(it.next().is_none()); + + let it = FilenameIterator::new("chunk_", ".txt", 3, SuffixType::Decimal, 1000); + assert!(it.is_err()); + + let mut it = + FilenameIterator::new("chunk_", ".txt", 3, SuffixType::Hexadecimal, 0xfff).unwrap(); + assert_eq!(it.next().unwrap(), "chunk_fff.txt"); + assert!(it.next().is_none()); + + let it = FilenameIterator::new("chunk_", ".txt", 3, SuffixType::Hexadecimal, 0x1000); + assert!(it.is_err()); + } } diff --git a/src/uu/split/src/number.rs b/src/uu/split/src/number.rs index c7557271db6..4605d6fb0d7 100644 --- a/src/uu/split/src/number.rs +++ b/src/uu/split/src/number.rs @@ -100,10 +100,10 @@ impl Number { /// differently and we only intend to use these numbers for display /// purposes and not for mathematical purposes. #[allow(dead_code)] - fn digits(&self) -> &Vec { + fn digits(&self) -> Vec { match self { - Self::FixedWidth(number) => &number.digits, - Self::DynamicWidth(number) => &number.digits, + Self::FixedWidth(number) => number.digits.clone(), + Self::DynamicWidth(number) => number.digits(), } } @@ -175,7 +175,7 @@ impl Display for Number { /// /// # Displaying /// -/// This number is only displayable if `radix` is 10, 26, or 26. If +/// This number is only displayable if `radix` is 10, 16, or 26. If /// `radix` is 10 or 16, then the digits are concatenated and /// displayed as a fixed-width decimal or hexadecimal number, /// respectively. If `radix` is 26, then each digit is translated to @@ -189,10 +189,21 @@ pub struct FixedWidthNumber { impl FixedWidthNumber { /// Instantiate a number of the given radix and width. - pub fn new(radix: u8, width: usize) -> Self { - Self { - radix, - digits: vec![0; width], + pub fn new(radix: u8, width: usize, mut suffix_start: usize) -> Result { + let mut digits = vec![0_u8; width]; + + for i in (0..digits.len()).rev() { + let remainder = (suffix_start % (radix as usize)) as u8; + suffix_start /= radix as usize; + digits[i] = remainder; + if suffix_start == 0 { + break; + } + } + if suffix_start != 0 { + Err(Overflow) + } else { + Ok(Self { radix, digits }) } } @@ -229,25 +240,12 @@ impl FixedWidthNumber { impl Display for FixedWidthNumber { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - match self.radix { - 10 => { - let digits: String = self.digits.iter().map(|d| (b'0' + d) as char).collect(); - write!(f, "{}", digits) - } - 16 => { - let digits: String = self - .digits - .iter() - .map(|d| (if *d < 10 { b'0' + d } else { b'a' + (d - 10) }) as char) - .collect(); - write!(f, "{}", digits) - } - 26 => { - let digits: String = self.digits.iter().map(|d| (b'a' + d) as char).collect(); - write!(f, "{}", digits) - } - _ => Err(fmt::Error), - } + let digits: String = self + .digits + .iter() + .map(|d| map_digit(self.radix, *d)) + .collect(); + write!(f, "{}", digits) } } @@ -293,105 +291,74 @@ impl Display for FixedWidthNumber { #[derive(Clone)] pub struct DynamicWidthNumber { radix: u8, - digits: Vec, + current: usize, } impl DynamicWidthNumber { - /// Instantiate a number of the given radix, starting with width 2. - /// - /// This associated function returns a new instance of the struct - /// with the given radix and a width of two digits, both 0. - pub fn new(radix: u8) -> Self { + pub fn new(radix: u8, suffix_start: usize) -> Self { Self { radix, - digits: vec![0, 0], - } - } - - /// Set all digits to zero. - fn reset(&mut self) { - for i in 0..self.digits.len() { - self.digits[i] = 0; + current: suffix_start, } } - /// Increment this number. - /// - /// This method adds one to this number. The first time that the - /// most significant digit would achieve its highest possible - /// value (that is, `radix - 1`), then all the digits get reset to - /// 0 and the number of digits increases by one. - /// - /// This method never returns an error. fn increment(&mut self) -> Result<(), Overflow> { - for i in (0..self.digits.len()).rev() { - // Increment the current digit. - self.digits[i] += 1; + self.current += 1; + Ok(()) + } - // If the digit overflows, then set it to 0 and continue - // to the next iteration to increment the next most - // significant digit. Otherwise, terminate the loop, since - // there will be no further changes to any higher order - // digits. - if self.digits[i] == self.radix { - self.digits[i] = 0; - } else { - break; - } + fn digits(&self) -> Vec { + let radix = self.radix as usize; + let mut remaining = self.current; + let mut sub_value = (radix - 1) * radix; + let mut num_fill_chars = 2; + + // Convert the number into "num_fill_chars" and "remaining" + while remaining >= sub_value { + remaining -= sub_value; + sub_value *= radix; + num_fill_chars += 1; } - // If the most significant digit is at its maximum value, then - // add another digit and reset all digits zero. - if self.digits[0] == self.radix - 1 { - self.digits.push(0); - self.reset(); + // Convert the "remainder" to digits + let mut digits = Vec::new(); + while remaining > 0 { + digits.push((remaining % radix) as u8); + remaining /= radix; } - Ok(()) + // Left pad the vec + digits.resize(num_fill_chars, 0); + digits.reverse(); + digits } } -impl Display for DynamicWidthNumber { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - match self.radix { - 10 => { - let num_fill_chars = self.digits.len() - 2; - let digits: String = self.digits.iter().map(|d| (b'0' + d) as char).collect(); - write!( - f, - "{empty:9 { - let num_fill_chars = self.digits.len() - 2; - let digits: String = self - .digits - .iter() - .map(|d| (if *d < 10 { b'0' + d } else { b'a' + (d - 10) }) as char) - .collect(); - write!( - f, - "{empty:f { - let num_fill_chars = self.digits.len() - 2; - let digits: String = self.digits.iter().map(|d| (b'a' + d) as char).collect(); - write!( - f, - "{empty:z char { + (match radix { + 10 => b'0' + d, + 16 => { + if d < 10 { + b'0' + d + } else { + b'a' + (d - 10) } - _ => Err(fmt::Error), } + 26 => b'a' + d, + _ => 0, + }) as char +} + +impl Display for DynamicWidthNumber { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let digits: String = self + .digits() + .iter() + .map(|d| map_digit(self.radix, *d)) + .collect(); + let fill: String = (0..digits.len() - 2) + .map(|_| map_digit(self.radix, self.radix - 1)) + .collect(); + write!(f, "{fill}{digits}") } } @@ -404,35 +371,36 @@ mod tests { #[test] fn test_dynamic_width_number_increment() { - let mut n = Number::DynamicWidth(DynamicWidthNumber::new(3)); - assert_eq!(n.digits(), &vec![0, 0]); + println!("Here"); + let mut n = Number::DynamicWidth(DynamicWidthNumber::new(3, 0)); + assert_eq!(n.digits(), vec![0, 0]); n.increment().unwrap(); - assert_eq!(n.digits(), &vec![0, 1]); + assert_eq!(n.digits(), vec![0, 1]); n.increment().unwrap(); - assert_eq!(n.digits(), &vec![0, 2]); + assert_eq!(n.digits(), vec![0, 2]); n.increment().unwrap(); - assert_eq!(n.digits(), &vec![1, 0]); + assert_eq!(n.digits(), vec![1, 0]); n.increment().unwrap(); - assert_eq!(n.digits(), &vec![1, 1]); + assert_eq!(n.digits(), vec![1, 1]); n.increment().unwrap(); - assert_eq!(n.digits(), &vec![1, 2]); + assert_eq!(n.digits(), vec![1, 2]); n.increment().unwrap(); - assert_eq!(n.digits(), &vec![0, 0, 0]); + assert_eq!(n.digits(), vec![0, 0, 0]); n.increment().unwrap(); - assert_eq!(n.digits(), &vec![0, 0, 1]); + assert_eq!(n.digits(), vec![0, 0, 1]); } #[test] fn test_dynamic_width_number_display_alphabetic() { fn num(n: usize) -> Number { - let mut number = Number::DynamicWidth(DynamicWidthNumber::new(26)); + let mut number = Number::DynamicWidth(DynamicWidthNumber::new(26, 0)); for _ in 0..n { number.increment().unwrap(); } @@ -456,7 +424,7 @@ mod tests { #[test] fn test_dynamic_width_number_display_numeric_decimal() { fn num(n: usize) -> Number { - let mut number = Number::DynamicWidth(DynamicWidthNumber::new(10)); + let mut number = Number::DynamicWidth(DynamicWidthNumber::new(10, 0)); for _ in 0..n { number.increment().unwrap(); } @@ -477,7 +445,7 @@ mod tests { #[test] fn test_dynamic_width_number_display_numeric_hexadecimal() { fn num(n: usize) -> Number { - let mut number = Number::DynamicWidth(DynamicWidthNumber::new(16)); + let mut number = Number::DynamicWidth(DynamicWidthNumber::new(16, 0)); for _ in 0..n { number.increment().unwrap(); } @@ -500,31 +468,31 @@ mod tests { #[test] fn test_fixed_width_number_increment() { - let mut n = Number::FixedWidth(FixedWidthNumber::new(3, 2)); - assert_eq!(n.digits(), &vec![0, 0]); + let mut n = Number::FixedWidth(FixedWidthNumber::new(3, 2, 0).unwrap()); + assert_eq!(n.digits(), vec![0, 0]); n.increment().unwrap(); - assert_eq!(n.digits(), &vec![0, 1]); + assert_eq!(n.digits(), vec![0, 1]); n.increment().unwrap(); - assert_eq!(n.digits(), &vec![0, 2]); + assert_eq!(n.digits(), vec![0, 2]); n.increment().unwrap(); - assert_eq!(n.digits(), &vec![1, 0]); + assert_eq!(n.digits(), vec![1, 0]); n.increment().unwrap(); - assert_eq!(n.digits(), &vec![1, 1]); + assert_eq!(n.digits(), vec![1, 1]); n.increment().unwrap(); - assert_eq!(n.digits(), &vec![1, 2]); + assert_eq!(n.digits(), vec![1, 2]); n.increment().unwrap(); - assert_eq!(n.digits(), &vec![2, 0]); + assert_eq!(n.digits(), vec![2, 0]); n.increment().unwrap(); - assert_eq!(n.digits(), &vec![2, 1]); + assert_eq!(n.digits(), vec![2, 1]); n.increment().unwrap(); - assert_eq!(n.digits(), &vec![2, 2]); + assert_eq!(n.digits(), vec![2, 2]); assert!(n.increment().is_err()); } #[test] fn test_fixed_width_number_display_alphabetic() { fn num(n: usize) -> Result { - let mut number = Number::FixedWidth(FixedWidthNumber::new(26, 2)); + let mut number = Number::FixedWidth(FixedWidthNumber::new(26, 2, 0).unwrap()); for _ in 0..n { number.increment()?; } @@ -549,7 +517,7 @@ mod tests { #[test] fn test_fixed_width_number_display_numeric_decimal() { fn num(n: usize) -> Result { - let mut number = Number::FixedWidth(FixedWidthNumber::new(10, 2)); + let mut number = Number::FixedWidth(FixedWidthNumber::new(10, 2, 0).unwrap()); for _ in 0..n { number.increment()?; } @@ -568,7 +536,7 @@ mod tests { #[test] fn test_fixed_width_number_display_numeric_hexadecimal() { fn num(n: usize) -> Result { - let mut number = Number::FixedWidth(FixedWidthNumber::new(16, 2)); + let mut number = Number::FixedWidth(FixedWidthNumber::new(16, 2, 0).unwrap()); for _ in 0..n { number.increment()?; } @@ -583,4 +551,32 @@ mod tests { assert_eq!(format!("{}", num(16 * 16 - 1).unwrap()), "ff"); assert!(num(16 * 16).is_err()); } + + #[test] + fn test_fixed_width_number_start_suffix() { + fn num(n: usize) -> Result { + let mut number = Number::FixedWidth(FixedWidthNumber::new(16, 2, 0x14)?); + for _ in 0..n { + number.increment()?; + } + Ok(number) + } + + assert_eq!(format!("{}", num(0).unwrap()), "14"); + assert_eq!(format!("{}", num(0xf).unwrap()), "23"); + } + + #[test] + fn test_dynamic_width_number_start_suffix() { + fn num(n: usize) -> Result { + let mut number = Number::DynamicWidth(DynamicWidthNumber::new(10, 8)); + for _ in 0..n { + number.increment()?; + } + Ok(number) + } + + assert_eq!(format!("{}", num(0).unwrap()), "08"); + assert_eq!(format!("{}", num(8).unwrap()), "16"); + } } diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 1224c83ec9d..2406ba8b718 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -144,7 +144,7 @@ pub fn uu_app<'a>() -> Command<'a> { .takes_value(true) .value_name("N") .default_value(OPT_DEFAULT_SUFFIX_LENGTH) - .help("use suffixes of length N (default 2)"), + .help("use suffixes of fixed length N. 0 implies dynamic length."), ) .arg( Arg::new(OPT_HEX_SUFFIXES) @@ -400,13 +400,23 @@ impl Strategy { } /// Parse the suffix type from the command-line arguments. -fn suffix_type_from(matches: &ArgMatches) -> SuffixType { +fn suffix_type_from(matches: &ArgMatches) -> Result<(SuffixType, usize), SettingsError> { if matches.value_source(OPT_NUMERIC_SUFFIXES) == Some(ValueSource::CommandLine) { - SuffixType::Decimal + let suffix_start = matches.value_of(OPT_NUMERIC_SUFFIXES); + let suffix_start = suffix_start.ok_or(SettingsError::SuffixNotParsable(String::new()))?; + let suffix_start = suffix_start + .parse() + .map_err(|_| SettingsError::SuffixNotParsable(suffix_start.to_string()))?; + Ok((SuffixType::Decimal, suffix_start)) } else if matches.value_source(OPT_HEX_SUFFIXES) == Some(ValueSource::CommandLine) { - SuffixType::Hexadecimal + let suffix_start = matches.value_of(OPT_HEX_SUFFIXES); + let suffix_start = suffix_start.ok_or(SettingsError::SuffixNotParsable(String::new()))?; + let suffix_start = usize::from_str_radix(suffix_start, 16) + .map_err(|_| SettingsError::SuffixNotParsable(suffix_start.to_string()))?; + Ok((SuffixType::Hexadecimal, suffix_start)) } else { - SuffixType::Alphabetic + // no numeric/hex suffix + Ok((SuffixType::Alphabetic, 0)) } } @@ -418,6 +428,7 @@ struct Settings { prefix: String, suffix_type: SuffixType, suffix_length: usize, + suffix_start: usize, additional_suffix: String, input: String, /// When supplied, a shell command to output to instead of xaa, xab … @@ -497,7 +508,7 @@ impl Settings { return Err(SettingsError::SuffixContainsSeparator(additional_suffix)); } let strategy = Strategy::from(matches).map_err(SettingsError::Strategy)?; - let suffix_type = suffix_type_from(matches); + let (suffix_type, suffix_start) = suffix_type_from(matches)?; let suffix_length_str = matches.get_one::(OPT_SUFFIX_LENGTH).unwrap(); let suffix_length: usize = suffix_length_str .parse() @@ -517,6 +528,7 @@ impl Settings { .parse() .map_err(|_| SettingsError::SuffixNotParsable(suffix_length_str.to_string()))?, suffix_type, + suffix_start, additional_suffix, verbose: matches.value_source("verbose") == Some(ValueSource::CommandLine), strategy, @@ -589,7 +601,8 @@ impl<'a> ByteChunkWriter<'a> { &settings.additional_suffix, settings.suffix_length, settings.suffix_type, - ); + settings.suffix_start, + )?; let filename = filename_iterator .next() .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; @@ -717,7 +730,8 @@ impl<'a> LineChunkWriter<'a> { &settings.additional_suffix, settings.suffix_length, settings.suffix_type, - ); + settings.suffix_start, + )?; let filename = filename_iterator .next() .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; @@ -825,7 +839,8 @@ impl<'a> LineBytesChunkWriter<'a> { &settings.additional_suffix, settings.suffix_length, settings.suffix_type, - ); + settings.suffix_start, + )?; let filename = filename_iterator .next() .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; @@ -1022,7 +1037,8 @@ where &settings.additional_suffix, settings.suffix_length, settings.suffix_type, - ); + settings.suffix_start, + )?; // Create one writer for each chunk. This will create each // of the underlying files (if not in `--filter` mode). @@ -1098,7 +1114,8 @@ where &settings.additional_suffix, settings.suffix_length, settings.suffix_type, - ); + settings.suffix_start, + )?; // Create one writer for each chunk. This will create each // of the underlying files (if not in `--filter` mode). diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index c57e6bd6ec7..2527f4562db 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -701,3 +701,29 @@ fn test_multiple_of_input_chunk() { } assert_eq!(glob.collate(), at.read_bytes(name)); } + +#[test] +fn test_numeric_suffix() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["-n", "4", "--numeric-suffixes", "9", "threebytes.txt"]) + .succeeds() + .no_stdout() + .no_stderr(); + assert_eq!(at.read("x09"), "a"); + assert_eq!(at.read("x10"), "b"); + assert_eq!(at.read("x11"), "c"); + assert_eq!(at.read("x12"), ""); +} + +#[test] +fn test_hex_suffix() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["-n", "4", "--hex-suffixes", "9", "threebytes.txt"]) + .succeeds() + .no_stdout() + .no_stderr(); + assert_eq!(at.read("x09"), "a"); + assert_eq!(at.read("x0a"), "b"); + assert_eq!(at.read("x0b"), "c"); + assert_eq!(at.read("x0c"), ""); +}