diff --git a/src/uu/split/src/filenames.rs b/src/uu/split/src/filenames.rs index 6f68caeb41f..95ae5d40f33 100644 --- a/src/uu/split/src/filenames.rs +++ b/src/uu/split/src/filenames.rs @@ -28,6 +28,7 @@ use crate::number::DynamicWidthNumber; use crate::number::FixedWidthNumber; use crate::number::Number; +use uucore::error::{UResult, USimpleError}; /// The format to use for suffixes in the filename for each output chunk. #[derive(Clone, Copy)] @@ -119,19 +120,28 @@ impl<'a> FilenameIterator<'a> { additional_suffix: &'a str, suffix_length: usize, suffix_type: SuffixType, - ) -> FilenameIterator<'a> { + suffix_start: usize, + ) -> UResult> { let radix = suffix_type.radix(); let number = if suffix_length == 0 { - Number::DynamicWidth(DynamicWidthNumber::new(radix)) + Number::DynamicWidth(DynamicWidthNumber::new(radix, suffix_start)) } else { - Number::FixedWidth(FixedWidthNumber::new(radix, suffix_length)) + Number::FixedWidth( + FixedWidthNumber::new(radix, suffix_length, suffix_start).map_err(|_| { + USimpleError::new( + 1, + "numerical suffix start value is too large for the suffix length", + ) + })?, + ) }; - FilenameIterator { + + Ok(FilenameIterator { prefix, additional_suffix, number, first_iteration: true, - } + }) } } @@ -161,36 +171,36 @@ mod tests { #[test] fn test_filename_iterator_alphabetic_fixed_width() { - let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::Alphabetic); + let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::Alphabetic, 0).unwrap(); assert_eq!(it.next().unwrap(), "chunk_aa.txt"); assert_eq!(it.next().unwrap(), "chunk_ab.txt"); assert_eq!(it.next().unwrap(), "chunk_ac.txt"); - let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::Alphabetic); + let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::Alphabetic, 0).unwrap(); assert_eq!(it.nth(26 * 26 - 1).unwrap(), "chunk_zz.txt"); assert_eq!(it.next(), None); } #[test] fn test_filename_iterator_numeric_fixed_width() { - let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::Decimal); + let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::Decimal, 0).unwrap(); assert_eq!(it.next().unwrap(), "chunk_00.txt"); assert_eq!(it.next().unwrap(), "chunk_01.txt"); assert_eq!(it.next().unwrap(), "chunk_02.txt"); - let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::Decimal); + let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::Decimal, 0).unwrap(); assert_eq!(it.nth(10 * 10 - 1).unwrap(), "chunk_99.txt"); assert_eq!(it.next(), None); } #[test] fn test_filename_iterator_alphabetic_dynamic_width() { - let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Alphabetic); + let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Alphabetic, 0).unwrap(); assert_eq!(it.next().unwrap(), "chunk_aa.txt"); assert_eq!(it.next().unwrap(), "chunk_ab.txt"); assert_eq!(it.next().unwrap(), "chunk_ac.txt"); - let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Alphabetic); + let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Alphabetic, 0).unwrap(); assert_eq!(it.nth(26 * 25 - 1).unwrap(), "chunk_yz.txt"); assert_eq!(it.next().unwrap(), "chunk_zaaa.txt"); assert_eq!(it.next().unwrap(), "chunk_zaab.txt"); @@ -198,14 +208,49 @@ mod tests { #[test] fn test_filename_iterator_numeric_dynamic_width() { - let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Decimal); + let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Decimal, 0).unwrap(); assert_eq!(it.next().unwrap(), "chunk_00.txt"); assert_eq!(it.next().unwrap(), "chunk_01.txt"); assert_eq!(it.next().unwrap(), "chunk_02.txt"); - let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Decimal); + let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Decimal, 0).unwrap(); assert_eq!(it.nth(10 * 9 - 1).unwrap(), "chunk_89.txt"); assert_eq!(it.next().unwrap(), "chunk_9000.txt"); assert_eq!(it.next().unwrap(), "chunk_9001.txt"); } + + #[test] + fn test_filename_iterator_numeric_suffix_decimal() { + let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Decimal, 5).unwrap(); + assert_eq!(it.next().unwrap(), "chunk_05.txt"); + assert_eq!(it.next().unwrap(), "chunk_06.txt"); + assert_eq!(it.next().unwrap(), "chunk_07.txt"); + } + + #[test] + fn test_filename_iterator_numeric_suffix_hex() { + let mut it = + FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Hexadecimal, 9).unwrap(); + assert_eq!(it.next().unwrap(), "chunk_09.txt"); + assert_eq!(it.next().unwrap(), "chunk_0a.txt"); + assert_eq!(it.next().unwrap(), "chunk_0b.txt"); + } + + #[test] + fn test_filename_iterator_numeric_suffix_err() { + let mut it = FilenameIterator::new("chunk_", ".txt", 3, SuffixType::Decimal, 999).unwrap(); + assert_eq!(it.next().unwrap(), "chunk_999.txt"); + assert!(it.next().is_none()); + + let it = FilenameIterator::new("chunk_", ".txt", 3, SuffixType::Decimal, 1000); + assert!(it.is_err()); + + let mut it = + FilenameIterator::new("chunk_", ".txt", 3, SuffixType::Hexadecimal, 0xfff).unwrap(); + assert_eq!(it.next().unwrap(), "chunk_fff.txt"); + assert!(it.next().is_none()); + + let it = FilenameIterator::new("chunk_", ".txt", 3, SuffixType::Hexadecimal, 0x1000); + assert!(it.is_err()); + } } diff --git a/src/uu/split/src/number.rs b/src/uu/split/src/number.rs index c7557271db6..c29aa89fe67 100644 --- a/src/uu/split/src/number.rs +++ b/src/uu/split/src/number.rs @@ -65,12 +65,6 @@ impl Error for Overflow {} /// 1 * 3^2 + 0 * 3^1 + 2 * 3^0 = 9 + 0 + 2 = 11 /// ``` /// -/// For the [`DynamicWidthNumber`], the digits are not unique in the -/// sense that repeatedly incrementing the number will eventually -/// yield `vec![0, 0]`, `vec![0, 0, 0], `vec![0, 0, 0, 0]`, etc. -/// That's okay because each of these numbers will be displayed -/// differently and we only intend to use these numbers for display -/// purposes and not for mathematical purposes. #[derive(Clone)] pub enum Number { /// A fixed-width representation of a number. @@ -151,11 +145,31 @@ impl Number { impl Display for Number { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { - Self::FixedWidth(number) => number.fmt(f), - Self::DynamicWidth(number) => number.fmt(f), + Self::FixedWidth(number) => fmt_internal(number.radix, &number.digits, f), + Self::DynamicWidth(number) => fmt_internal(number.radix, &number.digits, f), } } } +fn fmt_internal(radix: u8, digits: &[u8], f: &mut Formatter) -> fmt::Result { + match radix { + 10 => { + let digits: String = digits.iter().map(|d| (b'0' + d) as char).collect(); + write!(f, "{}", digits) + } + 16 => { + let digits: String = digits + .iter() + .map(|d| (if *d < 10 { b'0' + d } else { b'a' + (d - 10) }) as char) + .collect(); + write!(f, "{}", digits) + } + 26 => { + let digits: String = digits.iter().map(|d| (b'a' + d) as char).collect(); + write!(f, "{}", digits) + } + _ => Err(fmt::Error), + } +} /// A positional notation representation of a fixed-width number. /// @@ -175,7 +189,7 @@ impl Display for Number { /// /// # Displaying /// -/// This number is only displayable if `radix` is 10, 26, or 26. If +/// This number is only displayable if `radix` is 10, 16, or 26. If /// `radix` is 10 or 16, then the digits are concatenated and /// displayed as a fixed-width decimal or hexadecimal number, /// respectively. If `radix` is 26, then each digit is translated to @@ -189,10 +203,21 @@ pub struct FixedWidthNumber { impl FixedWidthNumber { /// Instantiate a number of the given radix and width. - pub fn new(radix: u8, width: usize) -> Self { - Self { - radix, - digits: vec![0; width], + pub fn new(radix: u8, width: usize, mut suffix_start: usize) -> Result { + let mut digits = vec![0_u8; width]; + + for i in (0..digits.len()).rev() { + let remainder = (suffix_start % (radix as usize)) as u8; + suffix_start /= radix as usize; + digits[i] = remainder; + if suffix_start == 0 { + break; + } + } + if suffix_start != 0 { + Err(Overflow) + } else { + Ok(Self { radix, digits }) } } @@ -227,30 +252,6 @@ impl FixedWidthNumber { } } -impl Display for FixedWidthNumber { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - match self.radix { - 10 => { - let digits: String = self.digits.iter().map(|d| (b'0' + d) as char).collect(); - write!(f, "{}", digits) - } - 16 => { - let digits: String = self - .digits - .iter() - .map(|d| (if *d < 10 { b'0' + d } else { b'a' + (d - 10) }) as char) - .collect(); - write!(f, "{}", digits) - } - 26 => { - let digits: String = self.digits.iter().map(|d| (b'a' + d) as char).collect(); - write!(f, "{}", digits) - } - _ => Err(fmt::Error), - } - } -} - /// A positional notation representation of a number of dynamically growing width. /// /// The digits are represented as a [`Vec`] with the most @@ -301,11 +302,20 @@ impl DynamicWidthNumber { /// /// This associated function returns a new instance of the struct /// with the given radix and a width of two digits, both 0. - pub fn new(radix: u8) -> Self { - Self { - radix, - digits: vec![0, 0], + pub fn new(radix: u8, mut suffix_start: usize) -> Self { + let mut digits = vec![]; + + while suffix_start > 0 { + let remainder = (suffix_start % (radix as usize)) as u8; + suffix_start /= radix as usize; + digits.push(remainder); } + while digits.len() < 2 { + digits.push(0); + } + digits.reverse(); + + Self { radix, digits } } /// Set all digits to zero. @@ -333,7 +343,7 @@ impl DynamicWidthNumber { // significant digit. Otherwise, terminate the loop, since // there will be no further changes to any higher order // digits. - if self.digits[i] == self.radix { + if i > 0 && self.digits[i] == self.radix { self.digits[i] = 0; } else { break; @@ -341,60 +351,16 @@ impl DynamicWidthNumber { } // If the most significant digit is at its maximum value, then - // add another digit and reset all digits zero. - if self.digits[0] == self.radix - 1 { + // add another digit and reset all digits to zero. + if self.digits[0] == self.radix { self.digits.push(0); self.reset(); + self.digits[0] = 1; } Ok(()) } } -impl Display for DynamicWidthNumber { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - match self.radix { - 10 => { - let num_fill_chars = self.digits.len() - 2; - let digits: String = self.digits.iter().map(|d| (b'0' + d) as char).collect(); - write!( - f, - "{empty:9 { - let num_fill_chars = self.digits.len() - 2; - let digits: String = self - .digits - .iter() - .map(|d| (if *d < 10 { b'0' + d } else { b'a' + (d - 10) }) as char) - .collect(); - write!( - f, - "{empty:f { - let num_fill_chars = self.digits.len() - 2; - let digits: String = self.digits.iter().map(|d| (b'a' + d) as char).collect(); - write!( - f, - "{empty:z Err(fmt::Error), - } - } -} - #[cfg(test)] mod tests { use crate::number::DynamicWidthNumber; @@ -404,7 +370,7 @@ mod tests { #[test] fn test_dynamic_width_number_increment() { - let mut n = Number::DynamicWidth(DynamicWidthNumber::new(3)); + let mut n = Number::DynamicWidth(DynamicWidthNumber::new(3, 0)); assert_eq!(n.digits(), &vec![0, 0]); n.increment().unwrap(); @@ -432,7 +398,7 @@ mod tests { #[test] fn test_dynamic_width_number_display_alphabetic() { fn num(n: usize) -> Number { - let mut number = Number::DynamicWidth(DynamicWidthNumber::new(26)); + let mut number = Number::DynamicWidth(DynamicWidthNumber::new(26, 0)); for _ in 0..n { number.increment().unwrap(); } @@ -456,7 +422,7 @@ mod tests { #[test] fn test_dynamic_width_number_display_numeric_decimal() { fn num(n: usize) -> Number { - let mut number = Number::DynamicWidth(DynamicWidthNumber::new(10)); + let mut number = Number::DynamicWidth(DynamicWidthNumber::new(10, 0)); for _ in 0..n { number.increment().unwrap(); } @@ -477,7 +443,7 @@ mod tests { #[test] fn test_dynamic_width_number_display_numeric_hexadecimal() { fn num(n: usize) -> Number { - let mut number = Number::DynamicWidth(DynamicWidthNumber::new(16)); + let mut number = Number::DynamicWidth(DynamicWidthNumber::new(16, 0)); for _ in 0..n { number.increment().unwrap(); } @@ -500,7 +466,7 @@ mod tests { #[test] fn test_fixed_width_number_increment() { - let mut n = Number::FixedWidth(FixedWidthNumber::new(3, 2)); + let mut n = Number::FixedWidth(FixedWidthNumber::new(3, 2, 0).unwrap()); assert_eq!(n.digits(), &vec![0, 0]); n.increment().unwrap(); assert_eq!(n.digits(), &vec![0, 1]); @@ -524,7 +490,7 @@ mod tests { #[test] fn test_fixed_width_number_display_alphabetic() { fn num(n: usize) -> Result { - let mut number = Number::FixedWidth(FixedWidthNumber::new(26, 2)); + let mut number = Number::FixedWidth(FixedWidthNumber::new(26, 2, 0).unwrap()); for _ in 0..n { number.increment()?; } @@ -549,7 +515,7 @@ mod tests { #[test] fn test_fixed_width_number_display_numeric_decimal() { fn num(n: usize) -> Result { - let mut number = Number::FixedWidth(FixedWidthNumber::new(10, 2)); + let mut number = Number::FixedWidth(FixedWidthNumber::new(10, 2, 0).unwrap()); for _ in 0..n { number.increment()?; } @@ -568,7 +534,7 @@ mod tests { #[test] fn test_fixed_width_number_display_numeric_hexadecimal() { fn num(n: usize) -> Result { - let mut number = Number::FixedWidth(FixedWidthNumber::new(16, 2)); + let mut number = Number::FixedWidth(FixedWidthNumber::new(16, 2, 0).unwrap()); for _ in 0..n { number.increment()?; } @@ -583,4 +549,32 @@ mod tests { assert_eq!(format!("{}", num(16 * 16 - 1).unwrap()), "ff"); assert!(num(16 * 16).is_err()); } + + #[test] + fn test_fixed_width_number_start_suffix() { + fn num(n: usize) -> Result { + let mut number = Number::FixedWidth(FixedWidthNumber::new(16, 2, 0x14)?); + for _ in 0..n { + number.increment()?; + } + Ok(number) + } + + assert_eq!(format!("{}", num(0).unwrap()), "14"); + assert_eq!(format!("{}", num(0xf).unwrap()), "23"); + } + + #[test] + fn test_dynamic_width_number_start_suffix() { + fn num(n: usize) -> Result { + let mut number = Number::DynamicWidth(DynamicWidthNumber::new(16, 0x14)); + for _ in 0..n { + number.increment()?; + } + Ok(number) + } + + assert_eq!(format!("{}", num(0).unwrap()), "14"); + assert_eq!(format!("{}", num(0xff).unwrap()), "113"); + } } diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 1224c83ec9d..4beacc4810c 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -400,13 +400,23 @@ impl Strategy { } /// Parse the suffix type from the command-line arguments. -fn suffix_type_from(matches: &ArgMatches) -> SuffixType { +fn suffix_type_from(matches: &ArgMatches) -> Result<(SuffixType, usize), SettingsError> { if matches.value_source(OPT_NUMERIC_SUFFIXES) == Some(ValueSource::CommandLine) { - SuffixType::Decimal + let suffix_start = matches.value_of(OPT_NUMERIC_SUFFIXES); + let suffix_start = suffix_start.ok_or(SettingsError::SuffixNotParsable(String::new()))?; + let suffix_start = suffix_start + .parse() + .map_err(|_| SettingsError::SuffixNotParsable(suffix_start.to_string()))?; + Ok((SuffixType::Decimal, suffix_start)) } else if matches.value_source(OPT_HEX_SUFFIXES) == Some(ValueSource::CommandLine) { - SuffixType::Hexadecimal + let suffix_start = matches.value_of(OPT_HEX_SUFFIXES); + let suffix_start = suffix_start.ok_or(SettingsError::SuffixNotParsable(String::new()))?; + let suffix_start = usize::from_str_radix(suffix_start, 16) + .map_err(|_| SettingsError::SuffixNotParsable(suffix_start.to_string()))?; + Ok((SuffixType::Hexadecimal, suffix_start)) } else { - SuffixType::Alphabetic + // no numeric/hex suffix + Ok((SuffixType::Alphabetic, 0)) } } @@ -418,6 +428,7 @@ struct Settings { prefix: String, suffix_type: SuffixType, suffix_length: usize, + suffix_start: usize, additional_suffix: String, input: String, /// When supplied, a shell command to output to instead of xaa, xab … @@ -497,7 +508,7 @@ impl Settings { return Err(SettingsError::SuffixContainsSeparator(additional_suffix)); } let strategy = Strategy::from(matches).map_err(SettingsError::Strategy)?; - let suffix_type = suffix_type_from(matches); + let (suffix_type, suffix_start) = suffix_type_from(matches)?; let suffix_length_str = matches.get_one::(OPT_SUFFIX_LENGTH).unwrap(); let suffix_length: usize = suffix_length_str .parse() @@ -517,6 +528,7 @@ impl Settings { .parse() .map_err(|_| SettingsError::SuffixNotParsable(suffix_length_str.to_string()))?, suffix_type, + suffix_start, additional_suffix, verbose: matches.value_source("verbose") == Some(ValueSource::CommandLine), strategy, @@ -589,7 +601,8 @@ impl<'a> ByteChunkWriter<'a> { &settings.additional_suffix, settings.suffix_length, settings.suffix_type, - ); + settings.suffix_start, + )?; let filename = filename_iterator .next() .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; @@ -717,7 +730,8 @@ impl<'a> LineChunkWriter<'a> { &settings.additional_suffix, settings.suffix_length, settings.suffix_type, - ); + settings.suffix_start, + )?; let filename = filename_iterator .next() .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; @@ -825,7 +839,8 @@ impl<'a> LineBytesChunkWriter<'a> { &settings.additional_suffix, settings.suffix_length, settings.suffix_type, - ); + settings.suffix_start, + )?; let filename = filename_iterator .next() .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; @@ -1022,7 +1037,8 @@ where &settings.additional_suffix, settings.suffix_length, settings.suffix_type, - ); + settings.suffix_start, + )?; // Create one writer for each chunk. This will create each // of the underlying files (if not in `--filter` mode). @@ -1098,7 +1114,8 @@ where &settings.additional_suffix, settings.suffix_length, settings.suffix_type, - ); + settings.suffix_start, + )?; // Create one writer for each chunk. This will create each // of the underlying files (if not in `--filter` mode). diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index c57e6bd6ec7..2527f4562db 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -701,3 +701,29 @@ fn test_multiple_of_input_chunk() { } assert_eq!(glob.collate(), at.read_bytes(name)); } + +#[test] +fn test_numeric_suffix() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["-n", "4", "--numeric-suffixes", "9", "threebytes.txt"]) + .succeeds() + .no_stdout() + .no_stderr(); + assert_eq!(at.read("x09"), "a"); + assert_eq!(at.read("x10"), "b"); + assert_eq!(at.read("x11"), "c"); + assert_eq!(at.read("x12"), ""); +} + +#[test] +fn test_hex_suffix() { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["-n", "4", "--hex-suffixes", "9", "threebytes.txt"]) + .succeeds() + .no_stdout() + .no_stderr(); + assert_eq!(at.read("x09"), "a"); + assert_eq!(at.read("x0a"), "b"); + assert_eq!(at.read("x0b"), "c"); + assert_eq!(at.read("x0c"), ""); +}