From a5b435da581da6a803e8e1e695c44684a90cada2 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Sun, 9 Jan 2022 13:39:55 -0500 Subject: [PATCH] split: use iterator to produce filenames Replace the `FilenameFactory` with `FilenameIterator` and calls to `FilenameFactory::make()` with calls to `FilenameIterator::next()`. We did not need the fully generality of being able to produce the filename for an arbitrary chunk index. Instead we need only iterate over filenames one after another. This allows for a less mathematically dense algorithm that is easier to understand and maintain. Furthermore, it can be connected to some familiar concepts from the representation of numbers as a sequence of digits. This does not change the behavior of the `split` program, just the implementation of how filenames are produced. Co-authored-by: Terts Diepraam --- src/uu/split/src/filenames.rs | 551 +++++++--------------------------- src/uu/split/src/number.rs | 513 +++++++++++++++++++++++++++++++ src/uu/split/src/split.rs | 20 +- 3 files changed, 624 insertions(+), 460 deletions(-) create mode 100644 src/uu/split/src/number.rs diff --git a/src/uu/split/src/filenames.rs b/src/uu/split/src/filenames.rs index 36488e7e417..3e2db36063c 100644 --- a/src/uu/split/src/filenames.rs +++ b/src/uu/split/src/filenames.rs @@ -2,529 +2,182 @@ // * // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. -// spell-checker:ignore zaaa zaab zzaaaa zzzaaaaa +// spell-checker:ignore zaaa zaab //! Compute filenames from a given index. //! -//! The [`FilenameFactory`] can be used to convert a chunk index given -//! as a [`usize`] to a filename for that chunk. +//! The [`FilenameIterator`] yields filenames for use with ``split``. //! //! # Examples //! //! Create filenames of the form `chunk_??.txt`: //! //! ```rust,ignore -//! use crate::filenames::FilenameFactory; +//! use crate::filenames::FilenameIterator; //! //! let prefix = "chunk_".to_string(); //! let suffix = ".txt".to_string(); //! let width = 2; //! let use_numeric_suffix = false; -//! let factory = FilenameFactory::new(prefix, suffix, width, use_numeric_suffix); +//! let it = FilenameIterator::new(prefix, suffix, width, use_numeric_suffix); //! -//! assert_eq!(factory.make(0).unwrap(), "chunk_aa.txt"); -//! assert_eq!(factory.make(10).unwrap(), "chunk_ak.txt"); -//! assert_eq!(factory.make(28).unwrap(), "chunk_bc.txt"); +//! assert_eq!(it.next().unwrap(), "chunk_aa.txt"); +//! assert_eq!(it.next().unwrap(), "chunk_ab.txt"); +//! assert_eq!(it.next().unwrap(), "chunk_ac.txt"); //! ``` - -/// Base 10 logarithm. -fn log10(n: usize) -> usize { - (n as f64).log10() as usize -} - -/// Base 26 logarithm. -fn log26(n: usize) -> usize { - (n as f64).log(26.0) as usize -} - -/// Convert a radix 10 number to a radix 26 number of the given width. -/// -/// `n` is the radix 10 (that is, decimal) number to transform. This -/// function returns a [`Vec`] of unsigned integers representing the -/// digits, with the most significant digit first and the least -/// significant digit last. The returned `Vec` is always of length -/// `width`. -/// -/// If the number `n` is too large to represent within `width` digits, -/// then this function returns `None`. -/// -/// # Examples -/// -/// ```rust,ignore -/// use crate::filenames::to_radix_26; -/// -/// assert_eq!(to_radix_26(20, 2), Some(vec![0, 20])); -/// assert_eq!(to_radix_26(26, 2), Some(vec![1, 0])); -/// assert_eq!(to_radix_26(30, 2), Some(vec![1, 4])); -/// ``` -fn to_radix_26(mut n: usize, width: usize) -> Option> { - if width == 0 { - return None; - } - // Use the division algorithm to repeatedly compute the quotient - // and remainder of the number after division by the radix 26. The - // successive quotients are the digits in radix 26, from most - // significant to least significant. - let mut result = vec![]; - for w in (0..width).rev() { - let divisor = 26_usize.pow(w as u32); - let (quotient, remainder) = (n / divisor, n % divisor); - n = remainder; - // If the quotient is equal to or greater than the radix, that - // means the number `n` requires a greater width to be able to - // represent it in radix 26. - if quotient >= 26 { - return None; - } - result.push(quotient as u8); - } - Some(result) -} - -/// Convert a number between 0 and 25 into a lowercase ASCII character. -/// -/// # Examples -/// -/// ```rust,ignore -/// use crate::filenames::to_ascii_char; -/// -/// assert_eq!(to_ascii_char(&0), Some('a')); -/// assert_eq!(to_ascii_char(&25), Some('z')); -/// assert_eq!(to_ascii_char(&26), None); -/// ``` -fn to_ascii_char(n: &u8) -> Option { - // TODO In Rust v1.52.0 or later, use `char::from_digit`: - // https://doc.rust-lang.org/std/primitive.char.html#method.from_digit - // - // char::from_digit(*n as u32 + 10, 36) - // - // In that call, radix 36 is used because the characters in radix - // 36 are [0-9a-z]. We want to exclude the the first ten of those - // characters, so we add 10 to the number before conversion. - // - // Until that function is available, just add `n` to `b'a'` and - // cast to `char`. - if *n < 26 { - Some((b'a' + n) as char) - } else { - None - } -} - -/// Fixed width alphabetic string representation of index `i`. -/// -/// If `i` is greater than or equal to the number of lowercase ASCII -/// strings that can be represented in the given `width`, then this -/// function returns `None`. -/// -/// # Examples -/// -/// ```rust,ignore -/// use crate::filenames::str_prefix_fixed_width; -/// -/// assert_eq!(str_prefix_fixed_width(0, 2).as_deref(), "aa"); -/// assert_eq!(str_prefix_fixed_width(675, 2).as_deref(), "zz"); -/// assert_eq!(str_prefix_fixed_width(676, 2), None); -/// ``` -fn str_prefix_fixed_width(i: usize, width: usize) -> Option { - to_radix_26(i, width)?.iter().map(to_ascii_char).collect() -} - -/// Dynamically sized alphabetic string representation of index `i`. -/// -/// The size of the returned string starts at two then grows by 2 if -/// `i` is sufficiently large. -/// -/// # Examples -/// -/// ```rust,ignore -/// use crate::filenames::str_prefix; -/// -/// assert_eq!(str_prefix(0), "aa"); -/// assert_eq!(str_prefix(649), "yz"); -/// assert_eq!(str_prefix(650), "zaaa"); -/// assert_eq!(str_prefix(651), "zaab"); -/// ``` -fn str_prefix(i: usize) -> Option { - // This number tells us the order of magnitude of `i`, with a - // slight adjustment. - // - // We shift by 26 so that - // - // * if `i` is in the interval [0, 26^2 - 26), then `d` is 1, - // * if `i` is in the interval [26^2 - 26, 26^3 - 26), then `d` is 2, - // * if `i` is in the interval [26^3 - 26, 26^4 - 26), then `d` is 3, - // - // and so on. This will allow us to compute how many leading "z" - // characters need to appear in the string and how many characters - // to format to the right of those. - let d = log26(i + 26); - - // This is the number of leading "z" characters. - // - // For values of `i` less than 26^2 - 26, the returned string is - // just the radix 26 representation of that number with a width of - // two (using the lowercase ASCII characters as the digits). - // - // * if `i` is 26^2 - 26, then the returned string is "zaa", - // * if `i` is 26^3 - 26, then the returned string is "zzaaaa", - // * if `i` is 26^4 - 26, then the returned string is "zzzaaaaa", - // - // and so on. As you can see, the number of leading "z"s there is - // linearly increasing by 1 for each order of magnitude. - let num_fill_chars = d - 1; - - // This is the number of characters after the leading "z" characters. - let width = d + 1; - - // This is the radix 10 number to render in radix 26, to the right - // of the leading "z"s. - let number = (i + 26) - 26_usize.pow(d as u32); - - // This is the radix 26 number to render after the leading "z"s, - // collected in a `String`. - // - // For example, if `i` is 789, then `number` is 789 + 26 - 676, - // which equals 139. In radix 26 and assuming a `width` of 3, this - // number is - // - // [0, 5, 9] - // - // with the most significant digit on the left and the least - // significant digit on the right. After translating to ASCII - // lowercase letters, this becomes "afj". - let digits = str_prefix_fixed_width(number, width)?; - - // `empty` is just the empty string, to be displayed with a width - // of `num_fill_chars` and with blank spaces filled with the - // character "z". - // - // `digits` is as described in the previous comment. - Some(format!( - "{empty:z Option { - let max = 10_usize.pow(width as u32); - if i >= max { - None - } else { - Some(format!("{i:0width$}", i = i, width = width)) - } -} - -/// Dynamically sized numeric string representation of index `i`. -/// -/// The size of the returned string starts at two then grows by 2 if -/// `i` is sufficiently large. -/// -/// # Examples -/// -/// ```rust,ignore -/// use crate::filenames::num_prefix; -/// -/// assert_eq!(num_prefix(89), "89"); -/// assert_eq!(num_prefix(90), "9000"); -/// assert_eq!(num_prefix(91), "9001"); -/// ``` -fn num_prefix(i: usize) -> String { - // This number tells us the order of magnitude of `i`, with a - // slight adjustment. - // - // We shift by 10 so that - // - // * if `i` is in the interval [0, 90), then `d` is 1, - // * if `i` is in the interval [90, 990), then `d` is 2, - // * if `i` is in the interval [990, 9990), then `d` is 3, - // - // and so on. This will allow us to compute how many leading "9" - // characters need to appear in the string and how many digits to - // format to the right of those. - let d = log10(i + 10); - - // This is the number of leading "9" characters. - // - // For values of `i` less than 90, the returned string is just - // that number padded by a 0 to ensure the width is 2, but - // - // * if `i` is 90, then the returned string is "900", - // * if `i` is 990, then the returned string is "990000", - // * if `i` is 9990, then the returned string is "99900000", - // - // and so on. As you can see, the number of leading 9s there is - // linearly increasing by 1 for each order of magnitude. - let num_fill_chars = d - 1; - - // This is the number of characters after the leading "9" characters. - let width = d + 1; - - // This is the number to render after the leading "9"s. - // - // For example, if `i` is 5732, then the returned string is - // "994742". After the two "9" characters is the number 4742, - // which equals 5732 + 10 - 1000. - let number = (i + 10) - 10_usize.pow(d as u32); - - // `empty` is just the empty string, to be displayed with a width - // of `num_fill_chars` and with blank spaces filled with the - // character "9". - // - // `number` is the next remaining part of the number to render; - // for small numbers we pad with 0 and enforce a minimum width. - format!( - "{empty:9 { - prefix: &'a str, +pub struct FilenameIterator<'a> { additional_suffix: &'a str, - suffix_length: usize, - use_numeric_suffix: bool, + prefix: &'a str, + number: Number, + first_iteration: bool, } -impl<'a> FilenameFactory<'a> { - /// Create a new instance of this struct. - /// - /// For an explanation of the parameters, see the struct documentation. +impl<'a> FilenameIterator<'a> { pub fn new( prefix: &'a str, additional_suffix: &'a str, suffix_length: usize, use_numeric_suffix: bool, - ) -> FilenameFactory<'a> { - FilenameFactory { + ) -> FilenameIterator<'a> { + let radix = if use_numeric_suffix { 10 } else { 26 }; + let number = if suffix_length == 0 { + Number::DynamicWidth(DynamicWidthNumber::new(radix)) + } else { + Number::FixedWidth(FixedWidthNumber::new(radix, suffix_length)) + }; + FilenameIterator { prefix, additional_suffix, - suffix_length, - use_numeric_suffix, + number, + first_iteration: true, } } +} - /// Construct the filename for the specified element of the output collection of files. - /// - /// For an explanation of the parameters, see the struct documentation. - /// - /// If `suffix_length` has been set to a positive integer and `i` - /// is greater than or equal to the number of strings that can be - /// represented within that length, then this returns `None`. For - /// example: - /// - /// ```rust,ignore - /// use crate::filenames::FilenameFactory; - /// - /// let prefix = ""; - /// let suffix = ""; - /// let width = 1; - /// let use_numeric_suffix = true; - /// let factory = FilenameFactory::new(prefix, suffix, width, use_numeric_suffix); - /// - /// assert_eq!(factory.make(10), None); - /// ``` - pub fn make(&self, i: usize) -> Option { - let suffix = match (self.use_numeric_suffix, self.suffix_length) { - (true, 0) => Some(num_prefix(i)), - (false, 0) => str_prefix(i), - (true, width) => num_prefix_fixed_width(i, width), - (false, width) => str_prefix_fixed_width(i, width), - }?; +impl<'a> Iterator for FilenameIterator<'a> { + type Item = String; + + fn next(&mut self) -> Option { + if self.first_iteration { + self.first_iteration = false; + } else { + self.number.increment().ok()?; + } + // The first and third parts are just taken directly from the + // struct parameters unchanged. Some(format!( "{}{}{}", - self.prefix, suffix, self.additional_suffix + self.prefix, self.number, self.additional_suffix )) } } #[cfg(test)] mod tests { - use crate::filenames::num_prefix; - use crate::filenames::num_prefix_fixed_width; - use crate::filenames::str_prefix; - use crate::filenames::str_prefix_fixed_width; - use crate::filenames::to_ascii_char; - use crate::filenames::to_radix_26; - use crate::filenames::FilenameFactory; - #[test] - fn test_to_ascii_char() { - assert_eq!(to_ascii_char(&0), Some('a')); - assert_eq!(to_ascii_char(&5), Some('f')); - assert_eq!(to_ascii_char(&25), Some('z')); - assert_eq!(to_ascii_char(&26), None); - } + use crate::filenames::FilenameIterator; #[test] - fn test_to_radix_26_exceed_width() { - assert_eq!(to_radix_26(1, 0), None); - assert_eq!(to_radix_26(26, 1), None); - assert_eq!(to_radix_26(26 * 26, 2), None); - } + fn test_filename_iterator_alphabetic_fixed_width() { + let mut it = FilenameIterator::new("chunk_", ".txt", 2, false); + assert_eq!(it.next().unwrap(), "chunk_aa.txt"); + assert_eq!(it.next().unwrap(), "chunk_ab.txt"); + assert_eq!(it.next().unwrap(), "chunk_ac.txt"); - #[test] - fn test_to_radix_26_width_one() { - assert_eq!(to_radix_26(0, 1), Some(vec![0])); - assert_eq!(to_radix_26(10, 1), Some(vec![10])); - assert_eq!(to_radix_26(20, 1), Some(vec![20])); - assert_eq!(to_radix_26(25, 1), Some(vec![25])); + let mut it = FilenameIterator::new("chunk_", ".txt", 2, false); + assert_eq!(it.nth(26 * 26 - 1).unwrap(), "chunk_zz.txt"); + assert_eq!(it.next(), None); } #[test] - fn test_to_radix_26_width_two() { - assert_eq!(to_radix_26(0, 2), Some(vec![0, 0])); - assert_eq!(to_radix_26(10, 2), Some(vec![0, 10])); - assert_eq!(to_radix_26(20, 2), Some(vec![0, 20])); - assert_eq!(to_radix_26(25, 2), Some(vec![0, 25])); + fn test_filename_iterator_numeric_fixed_width() { + let mut it = FilenameIterator::new("chunk_", ".txt", 2, true); + assert_eq!(it.next().unwrap(), "chunk_00.txt"); + assert_eq!(it.next().unwrap(), "chunk_01.txt"); + assert_eq!(it.next().unwrap(), "chunk_02.txt"); - assert_eq!(to_radix_26(26, 2), Some(vec![1, 0])); - assert_eq!(to_radix_26(30, 2), Some(vec![1, 4])); - - assert_eq!(to_radix_26(26 * 2, 2), Some(vec![2, 0])); - assert_eq!(to_radix_26(26 * 26 - 1, 2), Some(vec![25, 25])); + let mut it = FilenameIterator::new("chunk_", ".txt", 2, true); + assert_eq!(it.nth(10 * 10 - 1).unwrap(), "chunk_99.txt"); + assert_eq!(it.next(), None); } #[test] - fn test_str_prefix_dynamic_width() { - assert_eq!(str_prefix(0).as_deref(), Some("aa")); - assert_eq!(str_prefix(1).as_deref(), Some("ab")); - assert_eq!(str_prefix(2).as_deref(), Some("ac")); - assert_eq!(str_prefix(25).as_deref(), Some("az")); - - assert_eq!(str_prefix(26).as_deref(), Some("ba")); - assert_eq!(str_prefix(27).as_deref(), Some("bb")); - assert_eq!(str_prefix(28).as_deref(), Some("bc")); - assert_eq!(str_prefix(51).as_deref(), Some("bz")); - - assert_eq!(str_prefix(52).as_deref(), Some("ca")); + fn test_filename_iterator_alphabetic_dynamic_width() { + let mut it = FilenameIterator::new("chunk_", ".txt", 0, false); + assert_eq!(it.next().unwrap(), "chunk_aa.txt"); + assert_eq!(it.next().unwrap(), "chunk_ab.txt"); + assert_eq!(it.next().unwrap(), "chunk_ac.txt"); - assert_eq!(str_prefix(26 * 25 - 1).as_deref(), Some("yz")); - assert_eq!(str_prefix(26 * 25).as_deref(), Some("zaaa")); - assert_eq!(str_prefix(26 * 25 + 1).as_deref(), Some("zaab")); - } - - #[test] - fn test_num_prefix_dynamic_width() { - assert_eq!(num_prefix(0), "00"); - assert_eq!(num_prefix(9), "09"); - assert_eq!(num_prefix(17), "17"); - assert_eq!(num_prefix(89), "89"); - assert_eq!(num_prefix(90), "9000"); - assert_eq!(num_prefix(91), "9001"); - assert_eq!(num_prefix(989), "9899"); - assert_eq!(num_prefix(990), "990000"); - } - - #[test] - fn test_str_prefix_fixed_width() { - assert_eq!(str_prefix_fixed_width(0, 2).as_deref(), Some("aa")); - assert_eq!(str_prefix_fixed_width(1, 2).as_deref(), Some("ab")); - assert_eq!(str_prefix_fixed_width(26, 2).as_deref(), Some("ba")); - assert_eq!( - str_prefix_fixed_width(26 * 26 - 1, 2).as_deref(), - Some("zz") - ); - assert_eq!(str_prefix_fixed_width(26 * 26, 2).as_deref(), None); - } - - #[test] - fn test_num_prefix_fixed_width() { - assert_eq!(num_prefix_fixed_width(0, 2).as_deref(), Some("00")); - assert_eq!(num_prefix_fixed_width(1, 2).as_deref(), Some("01")); - assert_eq!(num_prefix_fixed_width(99, 2).as_deref(), Some("99")); - assert_eq!(num_prefix_fixed_width(100, 2).as_deref(), None); - } - - #[test] - fn test_alphabetic_suffix() { - let factory = FilenameFactory::new("123", "789", 3, false); - assert_eq!(factory.make(0).unwrap(), "123aaa789"); - assert_eq!(factory.make(1).unwrap(), "123aab789"); - assert_eq!(factory.make(28).unwrap(), "123abc789"); + let mut it = FilenameIterator::new("chunk_", ".txt", 0, false); + assert_eq!(it.nth(26 * 25 - 1).unwrap(), "chunk_yz.txt"); + assert_eq!(it.next().unwrap(), "chunk_zaaa.txt"); + assert_eq!(it.next().unwrap(), "chunk_zaab.txt"); } #[test] - fn test_numeric_suffix() { - let factory = FilenameFactory::new("abc", "xyz", 3, true); - assert_eq!(factory.make(0).unwrap(), "abc000xyz"); - assert_eq!(factory.make(1).unwrap(), "abc001xyz"); - assert_eq!(factory.make(123).unwrap(), "abc123xyz"); + fn test_filename_iterator_numeric_dynamic_width() { + let mut it = FilenameIterator::new("chunk_", ".txt", 0, true); + assert_eq!(it.next().unwrap(), "chunk_00.txt"); + assert_eq!(it.next().unwrap(), "chunk_01.txt"); + assert_eq!(it.next().unwrap(), "chunk_02.txt"); + + let mut it = FilenameIterator::new("chunk_", ".txt", 0, true); + assert_eq!(it.nth(10 * 9 - 1).unwrap(), "chunk_89.txt"); + assert_eq!(it.next().unwrap(), "chunk_9000.txt"); + assert_eq!(it.next().unwrap(), "chunk_9001.txt"); } } diff --git a/src/uu/split/src/number.rs b/src/uu/split/src/number.rs new file mode 100644 index 00000000000..b2c40271641 --- /dev/null +++ b/src/uu/split/src/number.rs @@ -0,0 +1,513 @@ +// * This file is part of the uutils coreutils package. +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. +// spell-checker:ignore zaaa zaab +//! A number in arbitrary radix expressed in a positional notation. +//! +//! Use the [`Number`] enum to represent an arbitrary number in an +//! arbitrary radix. A number can be incremented and can be +//! displayed. See the [`Number`] documentation for more information. +//! +//! See the Wikipedia articles on [radix] and [positional notation] +//! for more background information on those topics. +//! +//! [radix]: https://en.wikipedia.org/wiki/Radix +//! [positional notation]: https://en.wikipedia.org/wiki/Positional_notation +use std::error::Error; +use std::fmt::{self, Display, Formatter}; + +/// An overflow due to incrementing a number beyond its representable limit. +#[derive(Debug)] +pub struct Overflow; + +impl fmt::Display for Overflow { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "Overflow") + } +} + +impl Error for Overflow {} + +/// A number in arbitrary radix expressed in a positional notation. +/// +/// Use the [`Number`] enum to represent an arbitrary number in an +/// arbitrary radix. A number can be incremented with +/// [`Number::increment`]. The [`FixedWidthNumber`] overflows when +/// attempting to increment it beyond the maximum number that can be +/// represented in the specified width. The [`DynamicWidthNumber`] +/// follows a non-standard incrementing procedure that is used +/// specifically for the `split` program. See the +/// [`DynamicWidthNumber`] documentation for more information. +/// +/// Numbers of radix 10 are displayable and rendered as decimal +/// numbers (for example, "00" or "917"). Numbers of radix 26 are +/// displayable and rendered as lowercase ASCII alphabetic characters +/// (for example, "aa" or "zax"). Numbers of other radices cannot be +/// displayed. The display of a [`DynamicWidthNumber`] includes a +/// prefix whose length depends on the width of the number. See the +/// [`DynamicWidthNumber`] documentation for more information. +/// +/// The digits of a number are accessible via the [`Number::digits`] +/// method. The digits are represented as a [`Vec`] with the most +/// significant digit on the left and the least significant digit on +/// the right. Each digit is a nonnegative integer less than the +/// radix. For example, if the radix is 3, then `vec![1, 0, 2]` +/// represents the decimal number 11: +/// +/// ```ignore +/// 1 * 3^2 + 0 * 3^1 + 2 * 3^0 = 9 + 0 + 2 = 11 +/// ``` +/// +/// For the [`DynamicWidthNumber`], the digits are not unique in the +/// sense that repeatedly incrementing the number will eventually +/// yield `vec![0, 0]`, `vec![0, 0, 0], `vec![0, 0, 0, 0]`, etc. +/// That's okay because each of these numbers will be displayed +/// differently and we only intend to use these numbers for display +/// purposes and not for mathematical purposes. +#[derive(Clone)] +pub enum Number { + /// A fixed-width representation of a number. + FixedWidth(FixedWidthNumber), + + /// A representation of a number with a dynamically growing width. + DynamicWidth(DynamicWidthNumber), +} + +impl Number { + /// The digits of this number in decreasing order of significance. + /// + /// The digits are represented as a [`Vec`] with the most + /// significant digit on the left and the least significant digit + /// on the right. Each digit is a nonnegative integer less than + /// the radix. For example, if the radix is 3, then `vec![1, 0, + /// 2]` represents the decimal number 11: + /// + /// ```ignore + /// 1 * 3^2 + 0 * 3^1 + 2 * 3^0 = 9 + 0 + 2 = 11 + /// ``` + /// + /// For the [`DynamicWidthNumber`], the digits are not unique in the + /// sense that repeatedly incrementing the number will eventually + /// yield `vec![0, 0]`, `vec![0, 0, 0], `vec![0, 0, 0, 0]`, etc. + /// That's okay because each of these numbers will be displayed + /// differently and we only intend to use these numbers for display + /// purposes and not for mathematical purposes. + #[allow(dead_code)] + fn digits(&self) -> &Vec { + match self { + Number::FixedWidth(number) => &number.digits, + Number::DynamicWidth(number) => &number.digits, + } + } + + /// Increment this number to its successor. + /// + /// If incrementing this number would result in an overflow beyond + /// the maximum representable number, then return + /// [`Err(Overflow)`]. The [`FixedWidthNumber`] overflows, but + /// [`DynamicWidthNumber`] does not. + /// + /// The [`DynamicWidthNumber`] follows a non-standard incrementing + /// procedure that is used specifically for the `split` program. + /// See the [`DynamicWidthNumber`] documentation for more + /// information. + /// + /// # Errors + /// + /// This method returns [`Err(Overflow)`] when attempting to + /// increment beyond the largest representable number. + /// + /// # Examples + /// + /// Overflowing: + /// + /// ```rust,ignore + /// + /// use crate::number::FixedWidthNumber; + /// use crate::number::Number; + /// use crate::number::Overflow; + /// + /// // Radix 3, width of 1 digit. + /// let mut number = Number::FixedWidth(FixedWidthNumber::new(3, 1)); + /// number.increment().unwrap(); // from 0 to 1 + /// number.increment().unwrap(); // from 1 to 2 + /// assert!(number.increment().is_err()); + /// ``` + pub fn increment(&mut self) -> Result<(), Overflow> { + match self { + Number::FixedWidth(number) => number.increment(), + Number::DynamicWidth(number) => number.increment(), + } + } +} + +impl Display for Number { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self { + Number::FixedWidth(number) => number.fmt(f), + Number::DynamicWidth(number) => number.fmt(f), + } + } +} + +/// A positional notation representation of a fixed-width number. +/// +/// The digits are represented as a [`Vec`] with the most +/// significant digit on the left and the least significant digit on +/// the right. Each digit is a nonnegative integer less than the +/// radix. +/// +/// # Incrementing +/// +/// This number starts at `vec![0; width]`, representing the number 0 +/// width the specified number of digits. Incrementing this number +/// with [`Number::increment`] causes it to increase its value by 1 in +/// the usual sense. If the digits are `vec![radix - 1; width]`, then +/// an overflow would occur and the [`Number::increment`] method +/// returns an error. +/// +/// # Displaying +/// +/// This number is only displayable if `radix` is 10 or `radix` is +/// 26. If `radix` is 10, then the digits are concatenated and +/// displayed as a fixed-width decimal number. If `radix` is 26, then +/// each digit is translated to the corresponding lowercase ASCII +/// alphabetic character (that is, 'a', 'b', 'c', etc.) and +/// concatenated. +#[derive(Clone)] +pub struct FixedWidthNumber { + radix: u8, + digits: Vec, +} + +impl FixedWidthNumber { + /// Instantiate a number of the given radix and width. + pub fn new(radix: u8, width: usize) -> FixedWidthNumber { + FixedWidthNumber { + radix, + digits: vec![0; width], + } + } + + /// Increment this number. + /// + /// This method adds one to this number. If incrementing this + /// number would require more digits than are available with the + /// specified width, then this method returns [`Err(Overflow)`]. + fn increment(&mut self) -> Result<(), Overflow> { + for i in (0..self.digits.len()).rev() { + // Increment the current digit. + self.digits[i] += 1; + + // If the digit overflows, then set it to 0 and continue + // to the next iteration to increment the next most + // significant digit. Otherwise, terminate the loop, since + // there will be no further changes to any higher order + // digits. + if self.digits[i] == self.radix { + self.digits[i] = 0; + } else { + break; + } + } + + // Return an error on overflow, which is signified by all zeros. + if self.digits == vec![0; self.digits.len()] { + Err(Overflow) + } else { + Ok(()) + } + } +} + +impl Display for FixedWidthNumber { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self.radix { + 10 => { + let digits: String = self.digits.iter().map(|d| (b'0' + d) as char).collect(); + write!(f, "{}", digits) + } + 26 => { + let digits: String = self.digits.iter().map(|d| (b'a' + d) as char).collect(); + write!(f, "{}", digits) + } + _ => Err(fmt::Error), + } + } +} + +/// A positional notation representation of a number of dynamically growing width. +/// +/// The digits are represented as a [`Vec`] with the most +/// significant digit on the left and the least significant digit on +/// the right. Each digit is a nonnegative integer less than the +/// radix. +/// +/// # Incrementing +/// +/// This number starts at `vec![0, 0]`, representing the number 0 with +/// a width of 2 digits. Incrementing this number with +/// [`Number::increment`] causes it to increase its value by 1. When +/// incrementing the number would have caused it to change from +/// `vec![radix - 2, radix - 1]` to `vec![radix - 1, 0]`, it instead +/// increases its width by one and resets its value to 0. For example, +/// if the radix were 3, the digits were `vec![1, 2]`, and we called +/// [`Number::increment`], then the digits would become `vec![0, 0, +/// 0]`. In this way, the width grows by one each time the most +/// significant digit would have achieved its maximum value. +/// +/// This notion of "incrementing" here does not match the notion of +/// incrementing the *value* of the number, it is just an abstract way +/// of updating the representation of the number in a way that is only +/// useful for the purposes of the `split` program. +/// +/// # Displaying +/// +/// This number is only displayable if `radix` is 10 or `radix` is +/// 26. If `radix` is 10, then the digits are concatenated and +/// displayed as a fixed-width decimal number with a prefix of `n - 2` +/// instances of the character '9', where `n` is the number of digits. +/// If `radix` is 26, then each digit is translated to the +/// corresponding lowercase ASCII alphabetic character (that is, 'a', +/// 'b', 'c', etc.) and concatenated with a prefix of `n - 2` +/// instances of the character 'z'. +/// +/// This notion of displaying the number is specific to the `split` +/// program. +#[derive(Clone)] +pub struct DynamicWidthNumber { + radix: u8, + digits: Vec, +} + +impl DynamicWidthNumber { + /// Instantiate a number of the given radix, starting with width 2. + /// + /// This associated function returns a new instance of the struct + /// with the given radix and a width of two digits, both 0. + pub fn new(radix: u8) -> DynamicWidthNumber { + DynamicWidthNumber { + radix, + digits: vec![0, 0], + } + } + + /// Set all digits to zero. + fn reset(&mut self) { + for i in 0..self.digits.len() { + self.digits[i] = 0; + } + } + + /// Increment this number. + /// + /// This method adds one to this number. The first time that the + /// most significant digit would achieve its highest possible + /// value (that is, `radix - 1`), then all the digits get reset to + /// 0 and the number of digits increases by one. + /// + /// This method never returns an error. + fn increment(&mut self) -> Result<(), Overflow> { + for i in (0..self.digits.len()).rev() { + // Increment the current digit. + self.digits[i] += 1; + + // If the digit overflows, then set it to 0 and continue + // to the next iteration to increment the next most + // significant digit. Otherwise, terminate the loop, since + // there will be no further changes to any higher order + // digits. + if self.digits[i] == self.radix { + self.digits[i] = 0; + } else { + break; + } + } + + // If the most significant digit is at its maximum value, then + // add another digit and reset all digits zero. + if self.digits[0] == self.radix - 1 { + self.digits.push(0); + self.reset(); + } + Ok(()) + } +} + +impl Display for DynamicWidthNumber { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self.radix { + 10 => { + let num_fill_chars = self.digits.len() - 2; + let digits: String = self.digits.iter().map(|d| (b'0' + d) as char).collect(); + write!( + f, + "{empty:9 { + let num_fill_chars = self.digits.len() - 2; + let digits: String = self.digits.iter().map(|d| (b'a' + d) as char).collect(); + write!( + f, + "{empty:z Err(fmt::Error), + } + } +} + +#[cfg(test)] +mod tests { + use crate::number::DynamicWidthNumber; + use crate::number::FixedWidthNumber; + use crate::number::Number; + use crate::number::Overflow; + + #[test] + fn test_dynamic_width_number_increment() { + let mut n = Number::DynamicWidth(DynamicWidthNumber::new(3)); + assert_eq!(n.digits(), &vec![0, 0]); + + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![0, 1]); + + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![0, 2]); + + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![1, 0]); + + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![1, 1]); + + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![1, 2]); + + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![0, 0, 0]); + + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![0, 0, 1]); + } + + #[test] + fn test_dynamic_width_number_display_alphabetic() { + fn num(n: usize) -> Number { + let mut number = Number::DynamicWidth(DynamicWidthNumber::new(26)); + for _ in 0..n { + number.increment().unwrap() + } + number + } + + assert_eq!(format!("{}", num(0)), "aa"); + assert_eq!(format!("{}", num(1)), "ab"); + assert_eq!(format!("{}", num(2)), "ac"); + assert_eq!(format!("{}", num(25)), "az"); + assert_eq!(format!("{}", num(26)), "ba"); + assert_eq!(format!("{}", num(27)), "bb"); + assert_eq!(format!("{}", num(28)), "bc"); + assert_eq!(format!("{}", num(26 + 25)), "bz"); + assert_eq!(format!("{}", num(26 + 26)), "ca"); + assert_eq!(format!("{}", num(26 * 25 - 1)), "yz"); + assert_eq!(format!("{}", num(26 * 25)), "zaaa"); + assert_eq!(format!("{}", num(26 * 25 + 1)), "zaab"); + } + + #[test] + fn test_dynamic_width_number_display_numeric() { + fn num(n: usize) -> Number { + let mut number = Number::DynamicWidth(DynamicWidthNumber::new(10)); + for _ in 0..n { + number.increment().unwrap() + } + number + } + + assert_eq!(format!("{}", num(0)), "00"); + assert_eq!(format!("{}", num(9)), "09"); + assert_eq!(format!("{}", num(17)), "17"); + assert_eq!(format!("{}", num(10 * 9 - 1)), "89"); + assert_eq!(format!("{}", num(10 * 9)), "9000"); + assert_eq!(format!("{}", num(10 * 9 + 1)), "9001"); + assert_eq!(format!("{}", num(10 * 99 - 1)), "9899"); + assert_eq!(format!("{}", num(10 * 99)), "990000"); + assert_eq!(format!("{}", num(10 * 99 + 1)), "990001"); + } + + #[test] + fn test_fixed_width_number_increment() { + let mut n = Number::FixedWidth(FixedWidthNumber::new(3, 2)); + assert_eq!(n.digits(), &vec![0, 0]); + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![0, 1]); + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![0, 2]); + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![1, 0]); + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![1, 1]); + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![1, 2]); + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![2, 0]); + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![2, 1]); + n.increment().unwrap(); + assert_eq!(n.digits(), &vec![2, 2]); + assert!(n.increment().is_err()); + } + + #[test] + fn test_fixed_width_number_display_alphabetic() { + fn num(n: usize) -> Result { + let mut number = Number::FixedWidth(FixedWidthNumber::new(26, 2)); + for _ in 0..n { + number.increment()?; + } + Ok(number) + } + + assert_eq!(format!("{}", num(0).unwrap()), "aa"); + assert_eq!(format!("{}", num(1).unwrap()), "ab"); + assert_eq!(format!("{}", num(2).unwrap()), "ac"); + assert_eq!(format!("{}", num(25).unwrap()), "az"); + assert_eq!(format!("{}", num(26).unwrap()), "ba"); + assert_eq!(format!("{}", num(27).unwrap()), "bb"); + assert_eq!(format!("{}", num(28).unwrap()), "bc"); + assert_eq!(format!("{}", num(26 + 25).unwrap()), "bz"); + assert_eq!(format!("{}", num(26 + 26).unwrap()), "ca"); + assert_eq!(format!("{}", num(26 * 25 - 1).unwrap()), "yz"); + assert_eq!(format!("{}", num(26 * 25).unwrap()), "za"); + assert_eq!(format!("{}", num(26 * 26 - 1).unwrap()), "zz"); + assert!(num(26 * 26).is_err()); + } + + #[test] + fn test_fixed_width_number_display_numeric() { + fn num(n: usize) -> Result { + let mut number = Number::FixedWidth(FixedWidthNumber::new(10, 2)); + for _ in 0..n { + number.increment()?; + } + Ok(number) + } + + assert_eq!(format!("{}", num(0).unwrap()), "00"); + assert_eq!(format!("{}", num(9).unwrap()), "09"); + assert_eq!(format!("{}", num(17).unwrap()), "17"); + assert_eq!(format!("{}", num(10 * 9 - 1).unwrap()), "89"); + assert_eq!(format!("{}", num(10 * 9).unwrap()), "90"); + assert_eq!(format!("{}", num(10 * 10 - 1).unwrap()), "99"); + assert!(num(10 * 10).is_err()); + } +} diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index dbc17da70c6..c8393818479 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -8,9 +8,10 @@ // spell-checker:ignore (ToDO) PREFIXaa mod filenames; +mod number; mod platform; -use crate::filenames::FilenameFactory; +use crate::filenames::FilenameIterator; use clap::{crate_version, App, AppSettings, Arg, ArgMatches}; use std::convert::TryFrom; use std::env; @@ -384,7 +385,7 @@ where let chunk_size = (num_bytes / (num_chunks as u64)) as usize; // This object is responsible for creating the filename for each chunk. - let filename_factory = FilenameFactory::new( + let mut filename_iterator = FilenameIterator::new( &settings.prefix, &settings.additional_suffix, settings.suffix_length, @@ -394,9 +395,9 @@ where // Create one writer for each chunk. This will create each // of the underlying files (if not in `--filter` mode). let mut writers = vec![]; - for i in 0..num_chunks { - let filename = filename_factory - .make(i) + for _ in 0..num_chunks { + let filename = filename_iterator + .next() .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; let writer = platform::instantiate_current_writer(&settings.filter, filename.as_str()); writers.push(writer); @@ -462,17 +463,16 @@ fn split(settings: Settings) -> UResult<()> { }; // This object is responsible for creating the filename for each chunk. - let filename_factory = FilenameFactory::new( + let mut filename_iterator = FilenameIterator::new( &settings.prefix, &settings.additional_suffix, settings.suffix_length, settings.numeric_suffix, ); - let mut fileno = 0; loop { // Get a new part file set up, and construct `writer` for it. - let filename = filename_factory - .make(fileno) + let filename = filename_iterator + .next() .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; let mut writer = platform::instantiate_current_writer(&settings.filter, filename.as_str()); @@ -509,8 +509,6 @@ fn split(settings: Settings) -> UResult<()> { if settings.verbose { println!("creating file {}", filename.quote()); } - - fileno += 1; } Ok(()) }