Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Added MutableUtf8Array::extend_values #798

Merged
merged 1 commit into from
Feb 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions src/array/physical_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,41 @@ pub(crate) unsafe fn extend_from_trusted_len_values_iter<I, P, O>(
offsets.set_len(offsets.len() + additional);
}

// Populates `offsets` and `values` [`Vec`]s with information extracted
// from the incoming `iterator`.

// the return value indicates how many items were added.
#[inline]
pub(crate) fn extend_from_values_iter<I, P, O>(
offsets: &mut Vec<O>,
values: &mut Vec<u8>,
iterator: I,
) -> usize
where
O: Offset,
P: AsRef<[u8]>,
I: Iterator<Item = P>,
{
let (size_hint, _) = iterator.size_hint();

offsets.reserve(size_hint);

// Read in the last offset, will be used to increment and store
// new values later on
let mut length = *offsets.last().unwrap();
let start_index = offsets.len();

for item in iterator {
let s = item.as_ref();
// Calculate the new offset value
length += O::from_usize(s.len()).unwrap();

values.extend_from_slice(s);
offsets.push(length);
}
offsets.len() - start_index
}

// Populates `offsets`, `values`, and `validity` [`Vec`]s with
// information extracted from the incoming `iterator`.
//
Expand Down
32 changes: 24 additions & 8 deletions src/array/utf8/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ use crate::{
use super::Utf8Array;
use crate::array::physical_binary::*;

struct Wrapper<P>(P);
impl<T: AsRef<str>> AsRef<[u8]> for Wrapper<T> {
struct StrAsBytes<P>(P);
impl<T: AsRef<str>> AsRef<[u8]> for StrAsBytes<T> {
#[inline]
fn as_ref(&self) -> &[u8] {
self.0.as_ref().as_bytes()
Expand Down Expand Up @@ -278,6 +278,22 @@ impl<O: Offset> MutableUtf8Array<O> {
unsafe { self.extend_trusted_len_values_unchecked(iterator) }
}

/// Extends the [`MutableUtf8Array`] from an iterator of values.
/// This differs from `extended_trusted_len` which accepts iterator of optional values.
#[inline]
pub fn extend_values<I, P>(&mut self, iterator: I)
where
P: AsRef<str>,
I: Iterator<Item = P>,
{
let iterator = iterator.map(StrAsBytes);
let additional = extend_from_values_iter(&mut self.offsets, &mut self.values, iterator);

if let Some(validity) = self.validity.as_mut() {
validity.extend_constant(additional, true);
}
}

/// Extends the [`MutableUtf8Array`] from an iterator of values of trusted len.
/// This differs from `extended_trusted_len_unchecked` which accepts iterator of optional
/// values.
Expand All @@ -292,7 +308,7 @@ impl<O: Offset> MutableUtf8Array<O> {
let (_, upper) = iterator.size_hint();
let additional = upper.expect("extend_trusted_len_values requires an upper limit");

let iterator = iterator.map(Wrapper);
let iterator = iterator.map(StrAsBytes);
extend_from_trusted_len_values_iter(&mut self.offsets, &mut self.values, iterator);

if let Some(validity) = self.validity.as_mut() {
Expand Down Expand Up @@ -325,7 +341,7 @@ impl<O: Offset> MutableUtf8Array<O> {
self.validity = Some(validity);
}

let iterator = iterator.map(|x| x.map(Wrapper));
let iterator = iterator.map(|x| x.map(StrAsBytes));
extend_from_trusted_len_iter(
&mut self.offsets,
&mut self.values,
Expand All @@ -348,7 +364,7 @@ impl<O: Offset> MutableUtf8Array<O> {
P: AsRef<str>,
I: Iterator<Item = Option<P>>,
{
let iterator = iterator.map(|x| x.map(Wrapper));
let iterator = iterator.map(|x| x.map(StrAsBytes));
let (validity, offsets, values) = trusted_len_unzip(iterator);

// soundness: P is `str`
Expand All @@ -374,7 +390,7 @@ impl<O: Offset> MutableUtf8Array<O> {
pub unsafe fn from_trusted_len_values_iter_unchecked<T: AsRef<str>, I: Iterator<Item = T>>(
iterator: I,
) -> Self {
let iterator = iterator.map(Wrapper);
let iterator = iterator.map(StrAsBytes);
let (offsets, values) = unsafe { trusted_len_values_iter(iterator) };
// soundness: T is AsRef<str>
Self::from_data_unchecked(Self::default_data_type(), offsets, values, None)
Expand Down Expand Up @@ -417,7 +433,7 @@ impl<O: Offset> MutableUtf8Array<O> {
{
let iterator = iterator.into_iter();

let iterator = iterator.map(|x| x.map(|x| x.map(Wrapper)));
let iterator = iterator.map(|x| x.map(|x| x.map(StrAsBytes)));
let (validity, offsets, values) = try_trusted_len_unzip(iterator)?;

// soundness: P is `str`
Expand All @@ -442,7 +458,7 @@ impl<O: Offset> MutableUtf8Array<O> {

/// Creates a new [`MutableUtf8Array`] from a [`Iterator`] of `&str`.
pub fn from_iter_values<T: AsRef<str>, I: Iterator<Item = T>>(iterator: I) -> Self {
let iterator = iterator.map(Wrapper);
let iterator = iterator.map(StrAsBytes);
let (offsets, values) = values_iter(iterator);
// soundness: T: AsRef<str>
unsafe { Self::from_data_unchecked(Self::default_data_type(), offsets, values, None) }
Expand Down
15 changes: 15 additions & 0 deletions tests/it/array/utf8/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,18 @@ fn test_extend_trusted_len() {
Some(&Bitmap::from_u8_slice(&[0b00011011], 5))
);
}

#[test]
fn test_extend_values() {
let mut array = MutableUtf8Array::<i32>::new();

array.extend_values([Some("hi"), None, Some("there"), None].iter().flatten());
jorgecarleitao marked this conversation as resolved.
Show resolved Hide resolved
array.extend_values([Some("hello"), None].iter().flatten());
array.extend_values(vec![Some("again"), None].into_iter().flatten());

let array: Utf8Array<i32> = array.into();

assert_eq!(array.values().as_slice(), b"hitherehelloagain");
assert_eq!(array.offsets().as_slice(), &[0, 2, 7, 12, 17]);
assert_eq!(array.validity(), None,);
}