From 522625814cbb5f22e0b4f60227a397aff71098b5 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Fri, 11 Nov 2022 13:23:34 +1300 Subject: [PATCH] Make RowSelection::intersection a member function (#3084) --- parquet/src/arrow/arrow_reader/mod.rs | 2 +- parquet/src/arrow/arrow_reader/selection.rs | 42 +++++++++++++-------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index 1f841a0ee175..35b70a0485cd 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -41,7 +41,7 @@ mod filter; mod selection; pub use filter::{ArrowPredicate, ArrowPredicateFn, RowFilter}; -pub use selection::{intersect_row_selections, RowSelection, RowSelector}; +pub use selection::{RowSelection, RowSelector}; /// A generic builder for constructing sync or async arrow parquet readers. This is not intended /// to be used directly, instead you should use the specialization for the type of reader diff --git a/parquet/src/arrow/arrow_reader/selection.rs b/parquet/src/arrow/arrow_reader/selection.rs index e01c584b6e63..d5c4ce5ea450 100644 --- a/parquet/src/arrow/arrow_reader/selection.rs +++ b/parquet/src/arrow/arrow_reader/selection.rs @@ -323,6 +323,18 @@ impl RowSelection { Self { selectors } } + /// Compute the intersection of two [`RowSelection`] + /// For example: + /// self: NNYYYYNNYYNYN + /// other: NYNNNNNNY + /// + /// returned: NNNNNNNNYYNYN + pub fn intersection(&self, other: &Self) -> Self { + Self { + selectors: intersect_row_selections(&self.selectors, &other.selectors), + } + } + /// Returns `true` if this [`RowSelection`] selects any rows pub fn selects_any(&self) -> bool { self.selectors.iter().any(|x| !x.skip) @@ -349,19 +361,19 @@ impl From for VecDeque { } } -// Combine two lists of `RowSelection` return the intersection of them -// For example: -// self: NNYYYYNNYYNYN -// other: NYNNNNNNY -// -// returned: NNNNNNNNYYNYN -pub fn intersect_row_selections( - left: Vec, - right: Vec, +/// Combine two lists of `RowSelection` return the intersection of them +/// For example: +/// self: NNYYYYNNYYNYN +/// other: NYNNNNNNY +/// +/// returned: NNNNNNNNYYNYN +fn intersect_row_selections( + left: &[RowSelector], + right: &[RowSelector], ) -> Vec { let mut res = Vec::with_capacity(left.len()); - let mut l_iter = left.into_iter().peekable(); - let mut r_iter = right.into_iter().peekable(); + let mut l_iter = left.iter().copied().peekable(); + let mut r_iter = right.iter().copied().peekable(); while let (Some(a), Some(b)) = (l_iter.peek_mut(), r_iter.peek_mut()) { if a.row_count == 0 { @@ -692,7 +704,7 @@ mod tests { RowSelector::select(1), ]; - let res = intersect_row_selections(a, b); + let res = intersect_row_selections(&a, &b); assert_eq!( RowSelection::from_selectors_and_combine(&res).selectors, vec![ @@ -710,7 +722,7 @@ mod tests { RowSelector::skip(33), ]; let b = vec![RowSelector::select(36), RowSelector::skip(36)]; - let res = intersect_row_selections(a, b); + let res = intersect_row_selections(&a, &b); assert_eq!( RowSelection::from_selectors_and_combine(&res).selectors, vec![RowSelector::select(3), RowSelector::skip(69)] @@ -725,7 +737,7 @@ mod tests { RowSelector::skip(2), RowSelector::select(2), ]; - let res = intersect_row_selections(a, b); + let res = intersect_row_selections(&a, &b); assert_eq!( RowSelection::from_selectors_and_combine(&res).selectors, vec![RowSelector::select(2), RowSelector::skip(8)] @@ -739,7 +751,7 @@ mod tests { RowSelector::skip(2), RowSelector::select(2), ]; - let res = intersect_row_selections(a, b); + let res = intersect_row_selections(&a, &b); assert_eq!( RowSelection::from_selectors_and_combine(&res).selectors, vec![RowSelector::select(2), RowSelector::skip(8)]