From e9e9a8166f0714422cb31f55a0a2d7e4b4351544 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Thu, 18 Aug 2022 15:20:32 +0200 Subject: [PATCH 1/3] uucore/ranges: refactor FromStr impl --- src/uucore/src/lib/mods/ranges.rs | 94 ++++++++++++++----------------- 1 file changed, 41 insertions(+), 53 deletions(-) diff --git a/src/uucore/src/lib/mods/ranges.rs b/src/uucore/src/lib/mods/ranges.rs index 822c09e0279..ff35b343296 100644 --- a/src/uucore/src/lib/mods/ranges.rs +++ b/src/uucore/src/lib/mods/ranges.rs @@ -20,63 +20,53 @@ pub struct Range { impl FromStr for Range { type Err = &'static str; + /// Parse a string of the form `a-b` into a `Range` + /// + /// ``` + /// use std::str::FromStr; + /// use uucore::ranges::Range; + /// assert_eq!(Range::from_str("5"), Ok(Range { low: 5, high: 5 })); + /// assert_eq!(Range::from_str("4-"), Ok(Range { low: 4, high: usize::MAX - 1 })); + /// assert_eq!(Range::from_str("-4"), Ok(Range { low: 1, high: 4 })); + /// assert_eq!(Range::from_str("2-4"), Ok(Range { low: 2, high: 4 })); + /// assert!(Range::from_str("0-4").is_err()); + /// assert!(Range::from_str("4-2").is_err()); + /// assert!(Range::from_str("-").is_err()); + /// assert!(Range::from_str("a").is_err()); + /// assert!(Range::from_str("a-b").is_err()); + /// ``` fn from_str(s: &str) -> Result { - use std::usize::MAX; - - let mut parts = s.splitn(2, '-'); - - let field = "fields and positions are numbered from 1"; - let order = "high end of range less than low end"; - let inval = "failed to parse range"; - - match (parts.next(), parts.next()) { - (Some(nm), None) => { - if let Ok(nm) = nm.parse::() { - if nm > 0 { - Ok(Self { low: nm, high: nm }) - } else { - Err(field) - } - } else { - Err(inval) - } + fn parse(s: &str) -> Result { + match s.parse::() { + Ok(0) => Err("fields and positions are numbered from 1"), + Ok(n) => Ok(n), + Err(_) => Err("failed to parse range"), } - (Some(n), Some(m)) if m.is_empty() => { - if let Ok(low) = n.parse::() { - if low > 0 { - Ok(Self { low, high: MAX - 1 }) - } else { - Err(field) - } - } else { - Err(inval) - } + } + + Ok(match s.split_once('-') { + None => { + let n = parse(s)?; + Self { low: n, high: n } } - (Some(n), Some(m)) if n.is_empty() => { - if let Ok(high) = m.parse::() { - if high > 0 { - Ok(Self { low: 1, high }) - } else { - Err(field) - } + Some(("", "")) => return Err("invalid range with no endpoint"), + Some((low, "")) => Self { + low: parse(low)?, + high: usize::MAX - 1, + }, + Some(("", high)) => Self { + low: 1, + high: parse(high)?, + }, + Some((low, high)) => { + let (low, high) = (parse(low)?, parse(high)?); + if low <= high { + Self { low, high } } else { - Err(inval) + return Err("high end of range less than low end"); } } - (Some(n), Some(m)) => match (n.parse::(), m.parse::()) { - (Ok(low), Ok(high)) => { - if low > 0 && low <= high { - Ok(Self { low, high }) - } else if low == 0 { - Err(field) - } else { - Err(order) - } - } - _ => Err(inval), - }, - _ => unreachable!(), - } + }) } } @@ -109,8 +99,6 @@ impl Range { } pub fn complement(ranges: &[Range]) -> Vec { - use std::usize; - let mut complements = Vec::with_capacity(ranges.len() + 1); if !ranges.is_empty() && ranges[0].low > 1 { From 7890228f82c6c14f3528a2f8b735a275bf24b273 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Thu, 18 Aug 2022 19:45:56 +0200 Subject: [PATCH 2/3] uucore/ranges: document and test merge operation --- src/uucore/src/lib/mods/ranges.rs | 86 ++++++++++++++++++++++++++++--- 1 file changed, 80 insertions(+), 6 deletions(-) diff --git a/src/uucore/src/lib/mods/ranges.rs b/src/uucore/src/lib/mods/ranges.rs index ff35b343296..319ab5b8314 100644 --- a/src/uucore/src/lib/mods/ranges.rs +++ b/src/uucore/src/lib/mods/ranges.rs @@ -7,6 +7,7 @@ // spell-checker:ignore (ToDO) inval +use std::cmp::max; use std::str::FromStr; use crate::display::Quotable; @@ -72,9 +73,7 @@ impl FromStr for Range { impl Range { pub fn from_list(list: &str) -> Result, String> { - use std::cmp::max; - - let mut ranges: Vec = vec![]; + let mut ranges = Vec::new(); for item in list.split(',') { let range_item = FromStr::from_str(item) @@ -82,19 +81,28 @@ impl Range { ranges.push(range_item); } + Ok(Range::merge(ranges)) + } + + /// Merge any overlapping ranges + /// + /// Is guaranteed to return only disjoint ranges in a sorted order. + fn merge(mut ranges: Vec) -> Vec { ranges.sort(); // merge overlapping ranges for i in 0..ranges.len() { let j = i + 1; - while j < ranges.len() && ranges[j].low <= ranges[i].high { + // The +1 is a small optimization, because we can merge adjacent Ranges. + // For example (1,3) and (4,6), because in the integers, there are no + // possible values between 3 and 4, this is equivalent to (1,6). + while j < ranges.len() && ranges[j].low <= ranges[i].high + 1 { let j_high = ranges.remove(j).high; ranges[i].high = max(ranges[i].high, j_high); } } - - Ok(ranges) + ranges } } @@ -161,3 +169,69 @@ pub fn contain(ranges: &[Range], n: usize) -> bool { false } + +#[cfg(test)] +mod test { + use super::Range; + + fn m(a: Vec, b: Vec) { + assert_eq!(Range::merge(a), b); + } + + fn r(low: usize, high: usize) -> Range { + Range { low, high } + } + + #[test] + fn merging() { + // Single element + m(vec![r(1, 2)], vec![r(1, 2)]); + + // Disjoint in wrong order + m(vec![r(4, 5), r(1, 2)], vec![r(1, 2), r(4, 5)]); + + // Two elements must be merged + m(vec![r(1, 3), r(2, 4), r(6, 7)], vec![r(1, 4), r(6, 7)]); + + // Two merges and a duplicate + m( + vec![r(1, 3), r(6, 7), r(2, 4), r(6, 7)], + vec![r(1, 4), r(6, 7)], + ); + + // One giant + m( + vec![ + r(110, 120), + r(10, 20), + r(100, 200), + r(130, 140), + r(150, 160), + ], + vec![r(10, 20), r(100, 200)], + ); + + // Last one joins the previous two + m( + vec![ + r(10,20), + r(30,40), + r(20,30), + ], + vec![r(10,40)] + ); + + m( + vec![ + r(10,20), + r(30,40), + r(50,60), + r(20,30), + ], + vec![r(10,40), r(50,60)] + ); + + // Merge adjacent ranges + m(vec![r(1, 3), r(4, 6)], vec![r(1, 6)]) + } +} \ No newline at end of file From 003b483705141634358e1457ddde8dccecfb0d5a Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Thu, 18 Aug 2022 20:02:50 +0200 Subject: [PATCH 3/3] uucore/ranges: refactor and test complement --- src/uucore/src/lib/mods/ranges.rs | 89 +++++++++++++++---------------- 1 file changed, 43 insertions(+), 46 deletions(-) diff --git a/src/uucore/src/lib/mods/ranges.rs b/src/uucore/src/lib/mods/ranges.rs index 319ab5b8314..71ad384145c 100644 --- a/src/uucore/src/lib/mods/ranges.rs +++ b/src/uucore/src/lib/mods/ranges.rs @@ -81,11 +81,11 @@ impl Range { ranges.push(range_item); } - Ok(Range::merge(ranges)) + Ok(Self::merge(ranges)) } /// Merge any overlapping ranges - /// + /// /// Is guaranteed to return only disjoint ranges in a sorted order. fn merge(mut ranges: Vec) -> Vec { ranges.sort(); @@ -107,36 +107,24 @@ impl Range { } pub fn complement(ranges: &[Range]) -> Vec { + let mut prev_high = 0; let mut complements = Vec::with_capacity(ranges.len() + 1); - if !ranges.is_empty() && ranges[0].low > 1 { - complements.push(Range { - low: 1, - high: ranges[0].low - 1, - }); + for range in ranges { + if range.low > prev_high + 1 { + complements.push(Range { + low: prev_high + 1, + high: range.low - 1, + }); + } + prev_high = range.high; } - let mut ranges_iter = ranges.iter().peekable(); - loop { - match (ranges_iter.next(), ranges_iter.peek()) { - (Some(left), Some(right)) => { - if left.high + 1 != right.low { - complements.push(Range { - low: left.high + 1, - high: right.low - 1, - }); - } - } - (Some(last), None) => { - if last.high < usize::MAX - 1 { - complements.push(Range { - low: last.high + 1, - high: usize::MAX - 1, - }); - } - } - _ => break, - } + if prev_high < usize::MAX - 1 { + complements.push(Range { + low: prev_high + 1, + high: usize::MAX - 1, + }); } complements @@ -172,7 +160,7 @@ pub fn contain(ranges: &[Range], n: usize) -> bool { #[cfg(test)] mod test { - use super::Range; + use super::{complement, Range}; fn m(a: Vec, b: Vec) { assert_eq!(Range::merge(a), b); @@ -181,7 +169,7 @@ mod test { fn r(low: usize, high: usize) -> Range { Range { low, high } } - + #[test] fn merging() { // Single element @@ -212,26 +200,35 @@ mod test { ); // Last one joins the previous two - m( - vec![ - r(10,20), - r(30,40), - r(20,30), - ], - vec![r(10,40)] - ); + m(vec![r(10, 20), r(30, 40), r(20, 30)], vec![r(10, 40)]); m( - vec![ - r(10,20), - r(30,40), - r(50,60), - r(20,30), - ], - vec![r(10,40), r(50,60)] + vec![r(10, 20), r(30, 40), r(50, 60), r(20, 30)], + vec![r(10, 40), r(50, 60)], ); // Merge adjacent ranges m(vec![r(1, 3), r(4, 6)], vec![r(1, 6)]) } -} \ No newline at end of file + + #[test] + fn complementing() { + // Simple + assert_eq!(complement(&[r(3, 4)]), vec![r(1, 2), r(5, usize::MAX - 1)]); + + // With start + assert_eq!( + complement(&[r(1, 3), r(6, 10)]), + vec![r(4, 5), r(11, usize::MAX - 1)] + ); + + // With end + assert_eq!( + complement(&[r(2, 4), r(6, usize::MAX - 1)]), + vec![r(1, 1), r(5, 5)] + ); + + // With start and end + assert_eq!(complement(&[r(1, 4), r(6, usize::MAX - 1)]), vec![r(5, 5)]); + } +}