diff --git a/arrow-array/src/array/byte_view_array.rs b/arrow-array/src/array/byte_view_array.rs index f52bce8ab0b..842c38ec28c 100644 --- a/arrow-array/src/array/byte_view_array.rs +++ b/arrow-array/src/array/byte_view_array.rs @@ -325,18 +325,18 @@ impl GenericByteViewArray { /// and return a new array with the compacted data buffers. /// The original array will be left as is. pub fn gc(&self) -> Self { - let compact_check = self.compact_check(); + let check_result = self.compact_check(); - if compact_check.iter().all(|x| *x) { + if check_result.iter().all(|x| *x) { return self.clone(); } let mut new_views = Vec::with_capacity(self.views.len()); let mut new_bufs: Vec> = vec![vec![]; self.buffers.len()]; - for view in self.views.iter() { + for (i, view) in self.views.iter().enumerate() { let mut bv = ByteView::from(*view); let idx = bv.buffer_index as usize; - if bv.length <= 12 || compact_check[idx] { + if self.is_null(i) || bv.length <= 12 || check_result[idx] { new_views.push(*view); continue; } @@ -362,7 +362,7 @@ impl GenericByteViewArray { .iter() .enumerate() .map(|(idx, buf)| { - if compact_check[idx] { + if check_result[idx] { buf.clone() } else { new_bufs[idx].clone() @@ -604,14 +604,15 @@ impl From>> for StringViewArray { /// Then it is better to do the check at once, rather than doing it for each accumulate operation. struct CompactChecker { length: usize, - coverage: BTreeMap, + intervals: BTreeMap, } impl CompactChecker { + /// Create a new checker with the expected length of the buffer pub fn new(length: usize) -> Self { Self { length, - coverage: BTreeMap::new(), + intervals: BTreeMap::new(), } } @@ -621,21 +622,21 @@ impl CompactChecker { return; } let end = offset + length; - if let Some(val) = self.coverage.get_mut(&offset) { + if let Some(val) = self.intervals.get_mut(&offset) { if *val < end { *val = end; } } else { - self.coverage.insert(offset, end); + self.intervals.insert(offset, end); } } /// Check if the checker is fully covered - pub fn finish(&self) -> bool { + pub fn finish(self) -> bool { // check if the coverage is continuous and full let mut last_end = 0; // todo: can be optimized - for (start, end) in self.coverage.iter() { + for (start, end) in self.intervals.iter() { if *start > last_end { return false; }