From b50ac47fddcd3f54109f3c444b576ef6f026a8af Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Thu, 3 Feb 2022 13:08:51 +0100 Subject: [PATCH] mMutableUtf8Array::extend_values --- src/array/physical_binary.rs | 35 ++++++++++++++++++++++++++++++++++ src/array/utf8/mutable.rs | 32 +++++++++++++++++++++++-------- tests/it/array/utf8/mutable.rs | 15 +++++++++++++++ 3 files changed, 74 insertions(+), 8 deletions(-) diff --git a/src/array/physical_binary.rs b/src/array/physical_binary.rs index eb7ef69748a..b1395c0bb9e 100644 --- a/src/array/physical_binary.rs +++ b/src/array/physical_binary.rs @@ -157,6 +157,41 @@ pub(crate) unsafe fn extend_from_trusted_len_values_iter( offsets.set_len(offsets.len() + additional); } +// Populates `offsets` and `values` [`Vec`]s with information extracted +// from the incoming `iterator`. + +// the return value indicates how many items were added. +#[inline] +pub(crate) fn extend_from_values_iter( + offsets: &mut Vec, + values: &mut Vec, + iterator: I, +) -> usize +where + O: Offset, + P: AsRef<[u8]>, + I: Iterator, +{ + let (size_hint, _) = iterator.size_hint(); + + offsets.reserve(size_hint); + + // Read in the last offset, will be used to increment and store + // new values later on + let mut length = *offsets.last().unwrap(); + let start_index = offsets.len(); + + for item in iterator { + let s = item.as_ref(); + // Calculate the new offset value + length += O::from_usize(s.len()).unwrap(); + + values.extend_from_slice(s); + offsets.push(length); + } + offsets.len() - start_index +} + // Populates `offsets`, `values`, and `validity` [`Vec`]s with // information extracted from the incoming `iterator`. // diff --git a/src/array/utf8/mutable.rs b/src/array/utf8/mutable.rs index cc8b170855e..19f2662f113 100644 --- a/src/array/utf8/mutable.rs +++ b/src/array/utf8/mutable.rs @@ -14,8 +14,8 @@ use crate::{ use super::Utf8Array; use crate::array::physical_binary::*; -struct Wrapper

(P); -impl> AsRef<[u8]> for Wrapper { +struct StrAsBytes

(P); +impl> AsRef<[u8]> for StrAsBytes { #[inline] fn as_ref(&self) -> &[u8] { self.0.as_ref().as_bytes() @@ -278,6 +278,22 @@ impl MutableUtf8Array { unsafe { self.extend_trusted_len_values_unchecked(iterator) } } + /// Extends the [`MutableUtf8Array`] from an iterator of values. + /// This differs from `extended_trusted_len` which accepts iterator of optional values. + #[inline] + pub fn extend_values(&mut self, iterator: I) + where + P: AsRef, + I: Iterator, + { + let iterator = iterator.map(StrAsBytes); + let additional = extend_from_values_iter(&mut self.offsets, &mut self.values, iterator); + + if let Some(validity) = self.validity.as_mut() { + validity.extend_constant(additional, true); + } + } + /// Extends the [`MutableUtf8Array`] from an iterator of values of trusted len. /// This differs from `extended_trusted_len_unchecked` which accepts iterator of optional /// values. @@ -292,7 +308,7 @@ impl MutableUtf8Array { let (_, upper) = iterator.size_hint(); let additional = upper.expect("extend_trusted_len_values requires an upper limit"); - let iterator = iterator.map(Wrapper); + let iterator = iterator.map(StrAsBytes); extend_from_trusted_len_values_iter(&mut self.offsets, &mut self.values, iterator); if let Some(validity) = self.validity.as_mut() { @@ -325,7 +341,7 @@ impl MutableUtf8Array { self.validity = Some(validity); } - let iterator = iterator.map(|x| x.map(Wrapper)); + let iterator = iterator.map(|x| x.map(StrAsBytes)); extend_from_trusted_len_iter( &mut self.offsets, &mut self.values, @@ -348,7 +364,7 @@ impl MutableUtf8Array { P: AsRef, I: Iterator>, { - let iterator = iterator.map(|x| x.map(Wrapper)); + let iterator = iterator.map(|x| x.map(StrAsBytes)); let (validity, offsets, values) = trusted_len_unzip(iterator); // soundness: P is `str` @@ -374,7 +390,7 @@ impl MutableUtf8Array { pub unsafe fn from_trusted_len_values_iter_unchecked, I: Iterator>( iterator: I, ) -> Self { - let iterator = iterator.map(Wrapper); + let iterator = iterator.map(StrAsBytes); let (offsets, values) = unsafe { trusted_len_values_iter(iterator) }; // soundness: T is AsRef Self::from_data_unchecked(Self::default_data_type(), offsets, values, None) @@ -417,7 +433,7 @@ impl MutableUtf8Array { { let iterator = iterator.into_iter(); - let iterator = iterator.map(|x| x.map(|x| x.map(Wrapper))); + let iterator = iterator.map(|x| x.map(|x| x.map(StrAsBytes))); let (validity, offsets, values) = try_trusted_len_unzip(iterator)?; // soundness: P is `str` @@ -442,7 +458,7 @@ impl MutableUtf8Array { /// Creates a new [`MutableUtf8Array`] from a [`Iterator`] of `&str`. pub fn from_iter_values, I: Iterator>(iterator: I) -> Self { - let iterator = iterator.map(Wrapper); + let iterator = iterator.map(StrAsBytes); let (offsets, values) = values_iter(iterator); // soundness: T: AsRef unsafe { Self::from_data_unchecked(Self::default_data_type(), offsets, values, None) } diff --git a/tests/it/array/utf8/mutable.rs b/tests/it/array/utf8/mutable.rs index 893b92c6806..548409f68dc 100644 --- a/tests/it/array/utf8/mutable.rs +++ b/tests/it/array/utf8/mutable.rs @@ -80,3 +80,18 @@ fn test_extend_trusted_len() { Some(&Bitmap::from_u8_slice(&[0b00011011], 5)) ); } + +#[test] +fn test_extend_values() { + let mut array = MutableUtf8Array::::new(); + + array.extend_values([Some("hi"), None, Some("there"), None].iter().flatten()); + array.extend_values([Some("hello"), None].iter().flatten()); + array.extend_values(vec![Some("again"), None].into_iter().flatten()); + + let array: Utf8Array = array.into(); + + assert_eq!(array.values().as_slice(), b"hitherehelloagain"); + assert_eq!(array.offsets().as_slice(), &[0, 2, 7, 12, 17]); + assert_eq!(array.validity(), None,); +}