diff --git a/src/uu/split/src/filenames.rs b/src/uu/split/src/filenames.rs index 36488e7e41..3e2db36063 100644 --- a/src/uu/split/src/filenames.rs +++ b/src/uu/split/src/filenames.rs @@ -2,529 +2,182 @@ // * // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. -// spell-checker:ignore zaaa zaab zzaaaa zzzaaaaa +// spell-checker:ignore zaaa zaab //! Compute filenames from a given index. //! -//! The [`FilenameFactory`] can be used to convert a chunk index given -//! as a [`usize`] to a filename for that chunk. +//! The [`FilenameIterator`] yields filenames for use with ``split``. //! //! # Examples //! //! Create filenames of the form `chunk_??.txt`: //! //! ```rust,ignore -//! use crate::filenames::FilenameFactory; +//! use crate::filenames::FilenameIterator; //! //! let prefix = "chunk_".to_string(); //! let suffix = ".txt".to_string(); //! let width = 2; //! let use_numeric_suffix = false; -//! let factory = FilenameFactory::new(prefix, suffix, width, use_numeric_suffix); +//! let it = FilenameIterator::new(prefix, suffix, width, use_numeric_suffix); //! -//! assert_eq!(factory.make(0).unwrap(), "chunk_aa.txt"); -//! assert_eq!(factory.make(10).unwrap(), "chunk_ak.txt"); -//! assert_eq!(factory.make(28).unwrap(), "chunk_bc.txt"); +//! assert_eq!(it.next().unwrap(), "chunk_aa.txt"); +//! assert_eq!(it.next().unwrap(), "chunk_ab.txt"); +//! assert_eq!(it.next().unwrap(), "chunk_ac.txt"); //! ``` - -/// Base 10 logarithm. -fn log10(n: usize) -> usize { - (n as f64).log10() as usize -} - -/// Base 26 logarithm. -fn log26(n: usize) -> usize { - (n as f64).log(26.0) as usize -} - -/// Convert a radix 10 number to a radix 26 number of the given width. -/// -/// `n` is the radix 10 (that is, decimal) number to transform. This -/// function returns a [`Vec`] of unsigned integers representing the -/// digits, with the most significant digit first and the least -/// significant digit last. The returned `Vec` is always of length -/// `width`. -/// -/// If the number `n` is too large to represent within `width` digits, -/// then this function returns `None`. -/// -/// # Examples -/// -/// ```rust,ignore -/// use crate::filenames::to_radix_26; -/// -/// assert_eq!(to_radix_26(20, 2), Some(vec![0, 20])); -/// assert_eq!(to_radix_26(26, 2), Some(vec![1, 0])); -/// assert_eq!(to_radix_26(30, 2), Some(vec![1, 4])); -/// ``` -fn to_radix_26(mut n: usize, width: usize) -> Option> { - if width == 0 { - return None; - } - // Use the division algorithm to repeatedly compute the quotient - // and remainder of the number after division by the radix 26. The - // successive quotients are the digits in radix 26, from most - // significant to least significant. - let mut result = vec![]; - for w in (0..width).rev() { - let divisor = 26_usize.pow(w as u32); - let (quotient, remainder) = (n / divisor, n % divisor); - n = remainder; - // If the quotient is equal to or greater than the radix, that - // means the number `n` requires a greater width to be able to - // represent it in radix 26. - if quotient >= 26 { - return None; - } - result.push(quotient as u8); - } - Some(result) -} - -/// Convert a number between 0 and 25 into a lowercase ASCII character. -/// -/// # Examples -/// -/// ```rust,ignore -/// use crate::filenames::to_ascii_char; -/// -/// assert_eq!(to_ascii_char(&0), Some('a')); -/// assert_eq!(to_ascii_char(&25), Some('z')); -/// assert_eq!(to_ascii_char(&26), None); -/// ``` -fn to_ascii_char(n: &u8) -> Option { - // TODO In Rust v1.52.0 or later, use `char::from_digit`: - // https://doc.rust-lang.org/std/primitive.char.html#method.from_digit - // - // char::from_digit(*n as u32 + 10, 36) - // - // In that call, radix 36 is used because the characters in radix - // 36 are [0-9a-z]. We want to exclude the the first ten of those - // characters, so we add 10 to the number before conversion. - // - // Until that function is available, just add `n` to `b'a'` and - // cast to `char`. - if *n < 26 { - Some((b'a' + n) as char) - } else { - None - } -} - -/// Fixed width alphabetic string representation of index `i`. -/// -/// If `i` is greater than or equal to the number of lowercase ASCII -/// strings that can be represented in the given `width`, then this -/// function returns `None`. -/// -/// # Examples -/// -/// ```rust,ignore -/// use crate::filenames::str_prefix_fixed_width; -/// -/// assert_eq!(str_prefix_fixed_width(0, 2).as_deref(), "aa"); -/// assert_eq!(str_prefix_fixed_width(675, 2).as_deref(), "zz"); -/// assert_eq!(str_prefix_fixed_width(676, 2), None); -/// ``` -fn str_prefix_fixed_width(i: usize, width: usize) -> Option { - to_radix_26(i, width)?.iter().map(to_ascii_char).collect() -} - -/// Dynamically sized alphabetic string representation of index `i`. -/// -/// The size of the returned string starts at two then grows by 2 if -/// `i` is sufficiently large. -/// -/// # Examples -/// -/// ```rust,ignore -/// use crate::filenames::str_prefix; -/// -/// assert_eq!(str_prefix(0), "aa"); -/// assert_eq!(str_prefix(649), "yz"); -/// assert_eq!(str_prefix(650), "zaaa"); -/// assert_eq!(str_prefix(651), "zaab"); -/// ``` -fn str_prefix(i: usize) -> Option { - // This number tells us the order of magnitude of `i`, with a - // slight adjustment. - // - // We shift by 26 so that - // - // * if `i` is in the interval [0, 26^2 - 26), then `d` is 1, - // * if `i` is in the interval [26^2 - 26, 26^3 - 26), then `d` is 2, - // * if `i` is in the interval [26^3 - 26, 26^4 - 26), then `d` is 3, - // - // and so on. This will allow us to compute how many leading "z" - // characters need to appear in the string and how many characters - // to format to the right of those. - let d = log26(i + 26); - - // This is the number of leading "z" characters. - // - // For values of `i` less than 26^2 - 26, the returned string is - // just the radix 26 representation of that number with a width of - // two (using the lowercase ASCII characters as the digits). - // - // * if `i` is 26^2 - 26, then the returned string is "zaa", - // * if `i` is 26^3 - 26, then the returned string is "zzaaaa", - // * if `i` is 26^4 - 26, then the returned string is "zzzaaaaa", - // - // and so on. As you can see, the number of leading "z"s there is - // linearly increasing by 1 for each order of magnitude. - let num_fill_chars = d - 1; - - // This is the number of characters after the leading "z" characters. - let width = d + 1; - - // This is the radix 10 number to render in radix 26, to the right - // of the leading "z"s. - let number = (i + 26) - 26_usize.pow(d as u32); - - // This is the radix 26 number to render after the leading "z"s, - // collected in a `String`. - // - // For example, if `i` is 789, then `number` is 789 + 26 - 676, - // which equals 139. In radix 26 and assuming a `width` of 3, this - // number is - // - // [0, 5, 9] - // - // with the most significant digit on the left and the least - // significant digit on the right. After translating to ASCII - // lowercase letters, this becomes "afj". - let digits = str_prefix_fixed_width(number, width)?; - - // `empty` is just the empty string, to be displayed with a width - // of `num_fill_chars` and with blank spaces filled with the - // character "z". - // - // `digits` is as described in the previous comment. - Some(format!( - "{empty:z Option { - let max = 10_usize.pow(width as u32); - if i >= max { - None - } else { - Some(format!("{i:0width$}", i = i, width = width)) - } -} - -/// Dynamically sized numeric string representation of index `i`. -/// -/// The size of the returned string starts at two then grows by 2 if -/// `i` is sufficiently large. -/// -/// # Examples -/// -/// ```rust,ignore -/// use crate::filenames::num_prefix; -/// -/// assert_eq!(num_prefix(89), "89"); -/// assert_eq!(num_prefix(90), "9000"); -/// assert_eq!(num_prefix(91), "9001"); -/// ``` -fn num_prefix(i: usize) -> String { - // This number tells us the order of magnitude of `i`, with a - // slight adjustment. - // - // We shift by 10 so that - // - // * if `i` is in the interval [0, 90), then `d` is 1, - // * if `i` is in the interval [90, 990), then `d` is 2, - // * if `i` is in the interval [990, 9990), then `d` is 3, - // - // and so on. This will allow us to compute how many leading "9" - // characters need to appear in the string and how many digits to - // format to the right of those. - let d = log10(i + 10); - - // This is the number of leading "9" characters. - // - // For values of `i` less than 90, the returned string is just - // that number padded by a 0 to ensure the width is 2, but - // - // * if `i` is 90, then the returned string is "900", - // * if `i` is 990, then the returned string is "990000", - // * if `i` is 9990, then the returned string is "99900000", - // - // and so on. As you can see, the number of leading 9s there is - // linearly increasing by 1 for each order of magnitude. - let num_fill_chars = d - 1; - - // This is the number of characters after the leading "9" characters. - let width = d + 1; - - // This is the number to render after the leading "9"s. - // - // For example, if `i` is 5732, then the returned string is - // "994742". After the two "9" characters is the number 4742, - // which equals 5732 + 10 - 1000. - let number = (i + 10) - 10_usize.pow(d as u32); - - // `empty` is just the empty string, to be displayed with a width - // of `num_fill_chars` and with blank spaces filled with the - // character "9". - // - // `number` is the next remaining part of the number to render; - // for small numbers we pad with 0 and enforce a minimum width. - format!( - "{empty:9 { - prefix: &'a str, +pub struct FilenameIterator<'a> { additional_suffix: &'a str, - suffix_length: usize, - use_numeric_suffix: bool, + prefix: &'a str, + number: Number, + first_iteration: bool, } -impl<'a> FilenameFactory<'a> { - /// Create a new instance of this struct. - /// - /// For an explanation of the parameters, see the struct documentation. +impl<'a> FilenameIterator<'a> { pub fn new( prefix: &'a str, additional_suffix: &'a str, suffix_length: usize, use_numeric_suffix: bool, - ) -> FilenameFactory<'a> { - FilenameFactory { + ) -> FilenameIterator<'a> { + let radix = if use_numeric_suffix { 10 } else { 26 }; + let number = if suffix_length == 0 { + Number::DynamicWidth(DynamicWidthNumber::new(radix)) + } else { + Number::FixedWidth(FixedWidthNumber::new(radix, suffix_length)) + }; + FilenameIterator { prefix, additional_suffix, - suffix_length, - use_numeric_suffix, + number, + first_iteration: true, } } +} - /// Construct the filename for the specified element of the output collection of files. - /// - /// For an explanation of the parameters, see the struct documentation. - /// - /// If `suffix_length` has been set to a positive integer and `i` - /// is greater than or equal to the number of strings that can be - /// represented within that length, then this returns `None`. For - /// example: - /// - /// ```rust,ignore - /// use crate::filenames::FilenameFactory; - /// - /// let prefix = ""; - /// let suffix = ""; - /// let width = 1; - /// let use_numeric_suffix = true; - /// let factory = FilenameFactory::new(prefix, suffix, width, use_numeric_suffix); - /// - /// assert_eq!(factory.make(10), None); - /// ``` - pub fn make(&self, i: usize) -> Option { - let suffix = match (self.use_numeric_suffix, self.suffix_length) { - (true, 0) => Some(num_prefix(i)), - (false, 0) => str_prefix(i), - (true, width) => num_prefix_fixed_width(i, width), - (false, width) => str_prefix_fixed_width(i, width), - }?; +impl<'a> Iterator for FilenameIterator<'a> { + type Item = String; + + fn next(&mut self) -> Option { + if self.first_iteration { + self.first_iteration = false; + } else { + self.number.increment().ok()?; + } + // The first and third parts are just taken directly from the + // struct parameters unchanged. Some(format!( "{}{}{}", - self.prefix, suffix, self.additional_suffix + self.prefix, self.number, self.additional_suffix )) } } #[cfg(test)] mod tests { - use crate::filenames::num_prefix; - use crate::filenames::num_prefix_fixed_width; - use crate::filenames::str_prefix; - use crate::filenames::str_prefix_fixed_width; - use crate::filenames::to_ascii_char; - use crate::filenames::to_radix_26; - use crate::filenames::FilenameFactory; - #[test] - fn test_to_ascii_char() { - assert_eq!(to_ascii_char(&0), Some('a')); - assert_eq!(to_ascii_char(&5), Some('f')); - assert_eq!(to_ascii_char(&25), Some('z')); - assert_eq!(to_ascii_char(&26), None); - } + use crate::filenames::FilenameIterator; #[test] - fn test_to_radix_26_exceed_width() { - assert_eq!(to_radix_26(1, 0), None); - assert_eq!(to_radix_26(26, 1), None); - assert_eq!(to_radix_26(26 * 26, 2), None); - } + fn test_filename_iterator_alphabetic_fixed_width() { + let mut it = FilenameIterator::new("chunk_", ".txt", 2, false); + assert_eq!(it.next().unwrap(), "chunk_aa.txt"); + assert_eq!(it.next().unwrap(), "chunk_ab.txt"); + assert_eq!(it.next().unwrap(), "chunk_ac.txt"); - #[test] - fn test_to_radix_26_width_one() { - assert_eq!(to_radix_26(0, 1), Some(vec![0])); - assert_eq!(to_radix_26(10, 1), Some(vec![10])); - assert_eq!(to_radix_26(20, 1), Some(vec![20])); - assert_eq!(to_radix_26(25, 1), Some(vec![25])); + let mut it = FilenameIterator::new("chunk_", ".txt", 2, false); + assert_eq!(it.nth(26 * 26 - 1).unwrap(), "chunk_zz.txt"); + assert_eq!(it.next(), None); } #[test] - fn test_to_radix_26_width_two() { - assert_eq!(to_radix_26(0, 2), Some(vec![0, 0])); - assert_eq!(to_radix_26(10, 2), Some(vec![0, 10])); - assert_eq!(to_radix_26(20, 2), Some(vec![0, 20])); - assert_eq!(to_radix_26(25, 2), Some(vec![0, 25])); + fn test_filename_iterator_numeric_fixed_width() { + let mut it = FilenameIterator::new("chunk_", ".txt", 2, true); + assert_eq!(it.next().unwrap(), "chunk_00.txt"); + assert_eq!(it.next().unwrap(), "chunk_01.txt"); + assert_eq!(it.next().unwrap(), "chunk_02.txt"); - assert_eq!(to_radix_26(26, 2), Some(vec![1, 0])); - assert_eq!(to_radix_26(30, 2), Some(vec![1, 4])); - - assert_eq!(to_radix_26(26 * 2, 2), Some(vec![2, 0])); - assert_eq!(to_radix_26(26 * 26 - 1, 2), Some(vec![25, 25])); + let mut it = FilenameIterator::new("chunk_", ".txt", 2, true); + assert_eq!(it.nth(10 * 10 - 1).unwrap(), "chunk_99.txt"); + assert_eq!(it.next(), None); } #[test] - fn test_str_prefix_dynamic_width() { - assert_eq!(str_prefix(0).as_deref(), Some("aa")); - assert_eq!(str_prefix(1).as_deref(), Some("ab")); - assert_eq!(str_prefix(2).as_deref(), Some("ac")); - assert_eq!(str_prefix(25).as_deref(), Some("az")); - - assert_eq!(str_prefix(26).as_deref(), Some("ba")); - assert_eq!(str_prefix(27).as_deref(), Some("bb")); - assert_eq!(str_prefix(28).as_deref(), Some("bc")); - assert_eq!(str_prefix(51).as_deref(), Some("bz")); - - assert_eq!(str_prefix(52).as_deref(), Some("ca")); + fn test_filename_iterator_alphabetic_dynamic_width() { + let mut it = FilenameIterator::new("chunk_", ".txt", 0, false); + assert_eq!(it.next().unwrap(), "chunk_aa.txt"); + assert_eq!(it.next().unwrap(), "chunk_ab.txt"); + assert_eq!(it.next().unwrap(), "chunk_ac.txt"); - assert_eq!(str_prefix(26 * 25 - 1).as_deref(), Some("yz")); - assert_eq!(str_prefix(26 * 25).as_deref(), Some("zaaa")); - assert_eq!(str_prefix(26 * 25 + 1).as_deref(), Some("zaab")); - } - - #[test] - fn test_num_prefix_dynamic_width() { - assert_eq!(num_prefix(0), "00"); - assert_eq!(num_prefix(9), "09"); - assert_eq!(num_prefix(17), "17"); - assert_eq!(num_prefix(89), "89"); - assert_eq!(num_prefix(90), "9000"); - assert_eq!(num_prefix(91), "9001"); - assert_eq!(num_prefix(989), "9899"); - assert_eq!(num_prefix(990), "990000"); - } - - #[test] - fn test_str_prefix_fixed_width() { - assert_eq!(str_prefix_fixed_width(0, 2).as_deref(), Some("aa")); - assert_eq!(str_prefix_fixed_width(1, 2).as_deref(), Some("ab")); - assert_eq!(str_prefix_fixed_width(26, 2).as_deref(), Some("ba")); - assert_eq!( - str_prefix_fixed_width(26 * 26 - 1, 2).as_deref(), - Some("zz") - ); - assert_eq!(str_prefix_fixed_width(26 * 26, 2).as_deref(), None); - } - - #[test] - fn test_num_prefix_fixed_width() { - assert_eq!(num_prefix_fixed_width(0, 2).as_deref(), Some("00")); - assert_eq!(num_prefix_fixed_width(1, 2).as_deref(), Some("01")); - assert_eq!(num_prefix_fixed_width(99, 2).as_deref(), Some("99")); - assert_eq!(num_prefix_fixed_width(100, 2).as_deref(), None); - } - - #[test] - fn test_alphabetic_suffix() { - let factory = FilenameFactory::new("123", "789", 3, false); - assert_eq!(factory.make(0).unwrap(), "123aaa789"); - assert_eq!(factory.make(1).unwrap(), "123aab789"); - assert_eq!(factory.make(28).unwrap(), "123abc789"); + let mut it = FilenameIterator::new("chunk_", ".txt", 0, false); + assert_eq!(it.nth(26 * 25 - 1).unwrap(), "chunk_yz.txt"); + assert_eq!(it.next().unwrap(), "chunk_zaaa.txt"); + assert_eq!(it.next().unwrap(), "chunk_zaab.txt"); } #[test] - fn test_numeric_suffix() { - let factory = FilenameFactory::new("abc", "xyz", 3, true); - assert_eq!(factory.make(0).unwrap(), "abc000xyz"); - assert_eq!(factory.make(1).unwrap(), "abc001xyz"); - assert_eq!(factory.make(123).unwrap(), "abc123xyz"); + fn test_filename_iterator_numeric_dynamic_width() { + let mut it = FilenameIterator::new("chunk_", ".txt", 0, true); + assert_eq!(it.next().unwrap(), "chunk_00.txt"); + assert_eq!(it.next().unwrap(), "chunk_01.txt"); + assert_eq!(it.next().unwrap(), "chunk_02.txt"); + + let mut it = FilenameIterator::new("chunk_", ".txt", 0, true); + assert_eq!(it.nth(10 * 9 - 1).unwrap(), "chunk_89.txt"); + assert_eq!(it.next().unwrap(), "chunk_9000.txt"); + assert_eq!(it.next().unwrap(), "chunk_9001.txt"); } } diff --git a/src/uu/split/src/number.rs b/src/uu/split/src/number.rs new file mode 100644 index 0000000000..b2c4027164 --- /dev/null +++ b/src/uu/split/src/number.rs @@ -0,0 +1,513 @@ +// * This file is part of the uutils coreutils package. +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. +// spell-checker:ignore zaaa zaab +//! A number in arbitrary radix expressed in a positional notation. +//! +//! Use the [`Number`] enum to represent an arbitrary number in an +//! arbitrary radix. A number can be incremented and can be +//! displayed. See the [`Number`] documentation for more information. +//! +//! See the Wikipedia articles on [radix] and [positional notation] +//! for more background information on those topics. +//! +//! [radix]: https://en.wikipedia.org/wiki/Radix +//! [positional notation]: https://en.wikipedia.org/wiki/Positional_notation +use std::error::Error; +use std::fmt::{self, Display, Formatter}; + +/// An overflow due to incrementing a number beyond its representable limit. +#[derive(Debug)] +pub struct Overflow; + +impl fmt::Display for Overflow { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "Overflow") + } +} + +impl Error for Overflow {} + +/// A number in arbitrary radix expressed in a positional notation. +/// +/// Use the [`Number`] enum to represent an arbitrary number in an +/// arbitrary radix. A number can be incremented with +/// [`Number::increment`]. The [`FixedWidthNumber`] overflows when +/// attempting to increment it beyond the maximum number that can be +/// represented in the specified width. The [`DynamicWidthNumber`] +/// follows a non-standard incrementing procedure that is used +/// specifically for the `split` program. See the +/// [`DynamicWidthNumber`] documentation for more information. +/// +/// Numbers of radix 10 are displayable and rendered as decimal +/// numbers (for example, "00" or "917"). Numbers of radix 26 are +/// displayable and rendered as lowercase ASCII alphabetic characters +/// (for example, "aa" or "zax"). Numbers of other radices cannot be +/// displayed. The display of a [`DynamicWidthNumber`] includes a +/// prefix whose length depends on the width of the number. See the +/// [`DynamicWidthNumber`] documentation for more information. +/// +/// The digits of a number are accessible via the [`Number::digits`] +/// method. The digits are represented as a [`Vec`] with the most +/// significant digit on the left and the least significant digit on +/// the right. Each digit is a nonnegative integer less than the +/// radix. For example, if the radix is 3, then `vec![1, 0, 2]` +/// represents the decimal number 11: +/// +/// ```ignore +/// 1 * 3^2 + 0 * 3^1 + 2 * 3^0 = 9 + 0 + 2 = 11 +/// ``` +/// +/// For the [`DynamicWidthNumber`], the digits are not unique in the +/// sense that repeatedly incrementing the number will eventually +/// yield `vec![0, 0]`, `vec![0, 0, 0], `vec![0, 0, 0, 0]`, etc. +/// That's okay because each of these numbers will be displayed +/// differently and we only intend to use these numbers for display +/// purposes and not for mathematical purposes. +#[derive(Clone)] +pub enum Number { + /// A fixed-width representation of a number. + FixedWidth(FixedWidthNumber), + + /// A representation of a number with a dynamically growing width. + DynamicWidth(DynamicWidthNumber), +} + +impl Number { + /// The digits of this number in decreasing order of significance. + /// + /// The digits are represented as a [`Vec`] with the most + /// significant digit on the left and the least significant digit + /// on the right. Each digit is a nonnegative integer less than + /// the radix. For example, if the radix is 3, then `vec![1, 0, + /// 2]` represents the decimal number 11: + /// + /// ```ignore + /// 1 * 3^2 + 0 * 3^1 + 2 * 3^0 = 9 + 0 + 2 = 11 + /// ``` + /// + /// For the [`DynamicWidthNumber`], the digits are not unique in the + /// sense that repeatedly incrementing the number will eventually + /// yield `vec![0, 0]`, `vec![0, 0, 0], `vec![0, 0, 0, 0]`, etc. + /// That's okay because each of these numbers will be displayed + /// differently and we only intend to use these numbers for display + /// purposes and not for mathematical purposes. + #[allow(dead_code)] + fn digits(&self) -> &Vec { + match self { + Number::FixedWidth(number) => &number.digits, + Number::DynamicWidth(number) => &number.digits, + } + } + + /// Increment this number to its successor. + /// + /// If incrementing this number would result in an overflow beyond + /// the maximum representable number, then return + /// [`Err(Overflow)`]. The [`FixedWidthNumber`] overflows, but + /// [`DynamicWidthNumber`] does not. + /// + /// The [`DynamicWidthNumber`] follows a non-standard incrementing + /// procedure that is used specifically for the `split` program. + /// See the [`DynamicWidthNumber`] documentation for more + /// information. + /// + /// # Errors + /// + /// This method returns [`Err(Overflow)`] when attempting to + /// increment beyond the largest representable number. + /// + /// # Examples + /// + /// Overflowing: + /// + /// ```rust,ignore + /// + /// use crate::number::FixedWidthNumber; + /// use crate::number::Number; + /// use crate::number::Overflow; + /// + /// // Radix 3, width of 1 digit. + /// let mut number = Number::FixedWidth(FixedWidthNumber::new(3, 1)); + /// number.increment().unwrap(); // from 0 to 1 + /// number.increment().unwrap(); // from 1 to 2 + /// assert!(number.increment().is_err()); + /// ``` + pub fn increment(&mut self) -> Result<(), Overflow> { + match self { + Number::FixedWidth(number) => number.increment(), + Number::DynamicWidth(number) => number.increment(), + } + } +} + +impl Display for Number { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self { + Number::FixedWidth(number) => number.fmt(f), + Number::DynamicWidth(number) => number.fmt(f), + } + } +} + +/// A positional notation representation of a fixed-width number. +/// +/// The digits are represented as a [`Vec`] with the most +/// significant digit on the left and the least significant digit on +/// the right. Each digit is a nonnegative integer less than the +/// radix. +/// +/// # Incrementing +/// +/// This number starts at `vec![0; width]`, representing the number 0 +/// width the specified number of digits. Incrementing this number +/// with [`Number::increment`] causes it to increase its value by 1 in +/// the usual sense. If the digits are `vec![radix - 1; width]`, then +/// an overflow would occur and the [`Number::increment`] method +/// returns an error. +/// +/// # Displaying +/// +/// This number is only displayable if `radix` is 10 or `radix` is +/// 26. If `radix` is 10, then the digits are concatenated and +/// displayed as a fixed-width decimal number. If `radix` is 26, then +/// each digit is translated to the corresponding lowercase ASCII +/// alphabetic character (that is, 'a', 'b', 'c', etc.) and +/// concatenated. +#[derive(Clone)] +pub struct FixedWidthNumber { + radix: u8, + digits: Vec, +} + +impl FixedWidthNumber { + /// Instantiate a number of the given radix and width. + pub fn new(radix: u8, width: usize) -> FixedWidthNumber { + FixedWidthNumber { + radix, + digits: vec![0; width], + } + } + + /// Increment this number. + /// + /// This method adds one to this number. If incrementing this + /// number would require more digits than are available with the + /// specified width, then this method returns [`Err(Overflow)`]. + fn increment(&mut self) -> Result<(), Overflow> { + for i in (0..self.digits.len()).rev() { + // Increment the current digit. + self.digits[i] += 1; + + // If the digit overflows, then set it to 0 and continue + // to the next iteration to increment the next most + // significant digit. Otherwise, terminate the loop, since + // there will be no further changes to any higher order + // digits. + if self.digits[i] == self.radix { + self.digits[i] = 0; + } else { + break; + } + } + + // Return an error on overflow, which is signified by all zeros. + if self.digits == vec![0; self.digits.len()] { + Err(Overflow) + } else { + Ok(()) + } + } +} + +impl Display for FixedWidthNumber { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self.radix { + 10 => { + let digits: String = self.digits.iter().map(|d| (b'0' + d) as char).collect(); + write!(f, "{}", digits) + } + 26 => { + let digits: String = self.digits.iter().map(|d| (b'a' + d) as char).collect(); + write!(f, "{}", digits) + } + _ => Err(fmt::Error), + } + } +} + +/// A positional notation representation of a number of dynamically growing width. +/// +/// The digits are represented as a [`Vec`] with the most +/// significant digit on the left and the least significant digit on +/// the right. Each digit is a nonnegative integer less than the +/// radix. +/// +/// # Incrementing +/// +/// This number starts at `vec![0, 0]`, representing the number 0 with +/// a width of 2 digits. Incrementing this number with +/// [`Number::increment`] causes it to increase its value by 1. When +/// incrementing the number would have caused it to change from +/// `vec![radix - 2, radix - 1]` to `vec![radix - 1, 0]`, it instead +/// increases its width by one and resets its value to 0. For example, +/// if the radix were 3, the digits were `vec![1, 2]`, and we called +/// [`Number::increment`], then the digits would become `vec![0, 0, +/// 0]`. In this way, the width grows by one each time the most +/// significant digit would have achieved its maximum value. +/// +/// This notion of "incrementing" here does not match the notion of +/// incrementing the *value* of the number, it is just an abstract way +/// of updating the representation of the number in a way that is only +/// useful for the purposes of the `split` program. +/// +/// # Displaying +/// +/// This number is only displayable if `radix` is 10 or `radix` is +/// 26. If `radix` is 10, then the digits are concatenated and +/// displayed as a fixed-width decimal number with a prefix of `n - 2` +/// instances of the character '9', where `n` is the number of digits. +/// If `radix` is 26, then each digit is translated to the +/// corresponding lowercase ASCII alphabetic character (that is, 'a', +/// 'b', 'c', etc.) and concatenated with a prefix of `n - 2` +/// instances of the character 'z'. +/// +/// This notion of displaying the number is specific to the `split` +/// program. +#[derive(Clone)] +pub struct DynamicWidthNumber { + radix: u8, + digits: Vec, +} + +impl DynamicWidthNumber { + /// Instantiate a number of the given radix, starting with width 2. + /// + /// This associated function returns a new instance of the struct + /// with the given radix and a width of two digits, both 0. + pub fn new(radix: u8) -> DynamicWidthNumber { + DynamicWidthNumber { + radix, + digits: vec![0, 0], + } + } + + /// Set all digits to zero. + fn reset(&mut self) { + for i in 0..self.digits.len() { + self.digits[i] = 0; + } + } + + /// Increment this number. + /// + /// This method adds one to this number. The first time that the + /// most significant digit would achieve its highest possible + /// value (that is, `radix - 1`), then all the digits get reset to + /// 0 and the number of digits increases by one. + /// + /// This method never returns an error. + fn increment(&mut self) -> Result<(), Overflow> { + for i in (0..self.digits.len()).rev() { + // Increment the current digit. + self.digits[i] += 1; + + // If the digit overflows, then set it to 0 and continue + // to the next iteration to increment the next most + // significant digit. Otherwise, terminate the loop, since + // there will be no further changes to any higher order + // digits. + if self.digits[i] == self.radix { + self.digits[i] = 0; + } else { + break; + } + } + + // If the most significant digit is at its maximum value, then + // add another digit and reset all digits zero. + if self.digits[0] == self.radix - 1 { + self.digits.push(0); + self.reset(); + } + Ok(()) + } +} + +impl Display for DynamicWidthNumber { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self.radix { + 10 => { + let num_fill_chars = self.digits.len() - 2; + let digits: String = self.digits.iter().map(|d| (b'0' + d) as char).collect(); + write!( + f, + "{empty:9 { + let num_fill_chars = self.digits.len() - 2; + let digits: String = self.digits.iter().map(|d| (b'a' + d) as char).collect(); + write!( + f, + "{empty:z Err(fmt::Error), + } + } +} + +#[cfg(test)] +mod tests { + use crate::number::DynamicWidthNumber; + use crate::number::FixedWidthNumber; + use crate::number::Number; + use crate::number::Overflow; + + #[test] + fn test_dynamic_width_number_increment() { + let mut n = Number::DynamicWidth(DynamicWidthNumber::new(3)); + assert_eq!(n.digits(), &vec![0, 0]); + + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![0, 1]); + + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![0, 2]); + + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![1, 0]); + + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![1, 1]); + + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![1, 2]); + + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![0, 0, 0]); + + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![0, 0, 1]); + } + + #[test] + fn test_dynamic_width_number_display_alphabetic() { + fn num(n: usize) -> Number { + let mut number = Number::DynamicWidth(DynamicWidthNumber::new(26)); + for _ in 0..n { + number.increment().unwrap() + } + number + } + + assert_eq!(format!("{}", num(0)), "aa"); + assert_eq!(format!("{}", num(1)), "ab"); + assert_eq!(format!("{}", num(2)), "ac"); + assert_eq!(format!("{}", num(25)), "az"); + assert_eq!(format!("{}", num(26)), "ba"); + assert_eq!(format!("{}", num(27)), "bb"); + assert_eq!(format!("{}", num(28)), "bc"); + assert_eq!(format!("{}", num(26 + 25)), "bz"); + assert_eq!(format!("{}", num(26 + 26)), "ca"); + assert_eq!(format!("{}", num(26 * 25 - 1)), "yz"); + assert_eq!(format!("{}", num(26 * 25)), "zaaa"); + assert_eq!(format!("{}", num(26 * 25 + 1)), "zaab"); + } + + #[test] + fn test_dynamic_width_number_display_numeric() { + fn num(n: usize) -> Number { + let mut number = Number::DynamicWidth(DynamicWidthNumber::new(10)); + for _ in 0..n { + number.increment().unwrap() + } + number + } + + assert_eq!(format!("{}", num(0)), "00"); + assert_eq!(format!("{}", num(9)), "09"); + assert_eq!(format!("{}", num(17)), "17"); + assert_eq!(format!("{}", num(10 * 9 - 1)), "89"); + assert_eq!(format!("{}", num(10 * 9)), "9000"); + assert_eq!(format!("{}", num(10 * 9 + 1)), "9001"); + assert_eq!(format!("{}", num(10 * 99 - 1)), "9899"); + assert_eq!(format!("{}", num(10 * 99)), "990000"); + assert_eq!(format!("{}", num(10 * 99 + 1)), "990001"); + } + + #[test] + fn test_fixed_width_number_increment() { + let mut n = Number::FixedWidth(FixedWidthNumber::new(3, 2)); + assert_eq!(n.digits(), &vec![0, 0]); + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![0, 1]); + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![0, 2]); + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![1, 0]); + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![1, 1]); + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![1, 2]); + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![2, 0]); + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![2, 1]); + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![2, 2]); + assert!(n.increment().is_err()); + } + + #[test] + fn test_fixed_width_number_display_alphabetic() { + fn num(n: usize) -> Result { + let mut number = Number::FixedWidth(FixedWidthNumber::new(26, 2)); + for _ in 0..n { + number.increment()?; + } + Ok(number) + } + + assert_eq!(format!("{}", num(0).unwrap()), "aa"); + assert_eq!(format!("{}", num(1).unwrap()), "ab"); + assert_eq!(format!("{}", num(2).unwrap()), "ac"); + assert_eq!(format!("{}", num(25).unwrap()), "az"); + assert_eq!(format!("{}", num(26).unwrap()), "ba"); + assert_eq!(format!("{}", num(27).unwrap()), "bb"); + assert_eq!(format!("{}", num(28).unwrap()), "bc"); + assert_eq!(format!("{}", num(26 + 25).unwrap()), "bz"); + assert_eq!(format!("{}", num(26 + 26).unwrap()), "ca"); + assert_eq!(format!("{}", num(26 * 25 - 1).unwrap()), "yz"); + assert_eq!(format!("{}", num(26 * 25).unwrap()), "za"); + assert_eq!(format!("{}", num(26 * 26 - 1).unwrap()), "zz"); + assert!(num(26 * 26).is_err()); + } + + #[test] + fn test_fixed_width_number_display_numeric() { + fn num(n: usize) -> Result { + let mut number = Number::FixedWidth(FixedWidthNumber::new(10, 2)); + for _ in 0..n { + number.increment()?; + } + Ok(number) + } + + assert_eq!(format!("{}", num(0).unwrap()), "00"); + assert_eq!(format!("{}", num(9).unwrap()), "09"); + assert_eq!(format!("{}", num(17).unwrap()), "17"); + assert_eq!(format!("{}", num(10 * 9 - 1).unwrap()), "89"); + assert_eq!(format!("{}", num(10 * 9).unwrap()), "90"); + assert_eq!(format!("{}", num(10 * 10 - 1).unwrap()), "99"); + assert!(num(10 * 10).is_err()); + } +} diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 239df62fb6..23eb247681 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -8,9 +8,10 @@ // spell-checker:ignore (ToDO) PREFIXaa mod filenames; +mod number; mod platform; -use crate::filenames::FilenameFactory; +use crate::filenames::FilenameIterator; use clap::{crate_version, App, AppSettings, Arg, ArgMatches}; use std::convert::TryFrom; use std::env; @@ -384,7 +385,7 @@ where let chunk_size = (num_bytes / (num_chunks as u64)) as usize; // This object is responsible for creating the filename for each chunk. - let filename_factory = FilenameFactory::new( + let mut filename_iterator = FilenameIterator::new( &settings.prefix, &settings.additional_suffix, settings.suffix_length, @@ -394,9 +395,9 @@ where // Create one writer for each chunk. This will create each // of the underlying files (if not in `--filter` mode). let mut writers = vec![]; - for i in 0..num_chunks { - let filename = filename_factory - .make(i) + for _ in 0..num_chunks { + let filename = filename_iterator + .next() .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; let writer = platform::instantiate_current_writer(&settings.filter, filename.as_str()); writers.push(writer); @@ -462,17 +463,16 @@ fn split(settings: &Settings) -> UResult<()> { }; // This object is responsible for creating the filename for each chunk. - let filename_factory = FilenameFactory::new( + let mut filename_iterator = FilenameIterator::new( &settings.prefix, &settings.additional_suffix, settings.suffix_length, settings.numeric_suffix, ); - let mut fileno = 0; loop { // Get a new part file set up, and construct `writer` for it. - let filename = filename_factory - .make(fileno) + let filename = filename_iterator + .next() .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; let mut writer = platform::instantiate_current_writer(&settings.filter, filename.as_str()); @@ -509,8 +509,6 @@ fn split(settings: &Settings) -> UResult<()> { if settings.verbose { println!("creating file {}", filename.quote()); } - - fileno += 1; } Ok(()) }