Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
MutableUtf8Array::extend_values (#798)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Feb 3, 2022
1 parent 89d8e09 commit e577c9f
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 8 deletions.
35 changes: 35 additions & 0 deletions src/array/physical_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,41 @@ pub(crate) unsafe fn extend_from_trusted_len_values_iter<I, P, O>(
offsets.set_len(offsets.len() + additional);
}

// Populates `offsets` and `values` [`Vec`]s with information extracted
// from the incoming `iterator`.

// the return value indicates how many items were added.
#[inline]
pub(crate) fn extend_from_values_iter<I, P, O>(
offsets: &mut Vec<O>,
values: &mut Vec<u8>,
iterator: I,
) -> usize
where
O: Offset,
P: AsRef<[u8]>,
I: Iterator<Item = P>,
{
let (size_hint, _) = iterator.size_hint();

offsets.reserve(size_hint);

// Read in the last offset, will be used to increment and store
// new values later on
let mut length = *offsets.last().unwrap();
let start_index = offsets.len();

for item in iterator {
let s = item.as_ref();
// Calculate the new offset value
length += O::from_usize(s.len()).unwrap();

values.extend_from_slice(s);
offsets.push(length);
}
offsets.len() - start_index
}

// Populates `offsets`, `values`, and `validity` [`Vec`]s with
// information extracted from the incoming `iterator`.
//
Expand Down
32 changes: 24 additions & 8 deletions src/array/utf8/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ use crate::{
use super::Utf8Array;
use crate::array::physical_binary::*;

struct Wrapper<P>(P);
impl<T: AsRef<str>> AsRef<[u8]> for Wrapper<T> {
struct StrAsBytes<P>(P);
impl<T: AsRef<str>> AsRef<[u8]> for StrAsBytes<T> {
#[inline]
fn as_ref(&self) -> &[u8] {
self.0.as_ref().as_bytes()
Expand Down Expand Up @@ -278,6 +278,22 @@ impl<O: Offset> MutableUtf8Array<O> {
unsafe { self.extend_trusted_len_values_unchecked(iterator) }
}

/// Extends the [`MutableUtf8Array`] from an iterator of values.
/// This differs from `extended_trusted_len` which accepts iterator of optional values.
#[inline]
pub fn extend_values<I, P>(&mut self, iterator: I)
where
P: AsRef<str>,
I: Iterator<Item = P>,
{
let iterator = iterator.map(StrAsBytes);
let additional = extend_from_values_iter(&mut self.offsets, &mut self.values, iterator);

if let Some(validity) = self.validity.as_mut() {
validity.extend_constant(additional, true);
}
}

/// Extends the [`MutableUtf8Array`] from an iterator of values of trusted len.
/// This differs from `extended_trusted_len_unchecked` which accepts iterator of optional
/// values.
Expand All @@ -292,7 +308,7 @@ impl<O: Offset> MutableUtf8Array<O> {
let (_, upper) = iterator.size_hint();
let additional = upper.expect("extend_trusted_len_values requires an upper limit");

let iterator = iterator.map(Wrapper);
let iterator = iterator.map(StrAsBytes);
extend_from_trusted_len_values_iter(&mut self.offsets, &mut self.values, iterator);

if let Some(validity) = self.validity.as_mut() {
Expand Down Expand Up @@ -325,7 +341,7 @@ impl<O: Offset> MutableUtf8Array<O> {
self.validity = Some(validity);
}

let iterator = iterator.map(|x| x.map(Wrapper));
let iterator = iterator.map(|x| x.map(StrAsBytes));
extend_from_trusted_len_iter(
&mut self.offsets,
&mut self.values,
Expand All @@ -348,7 +364,7 @@ impl<O: Offset> MutableUtf8Array<O> {
P: AsRef<str>,
I: Iterator<Item = Option<P>>,
{
let iterator = iterator.map(|x| x.map(Wrapper));
let iterator = iterator.map(|x| x.map(StrAsBytes));
let (validity, offsets, values) = trusted_len_unzip(iterator);

// soundness: P is `str`
Expand All @@ -374,7 +390,7 @@ impl<O: Offset> MutableUtf8Array<O> {
pub unsafe fn from_trusted_len_values_iter_unchecked<T: AsRef<str>, I: Iterator<Item = T>>(
iterator: I,
) -> Self {
let iterator = iterator.map(Wrapper);
let iterator = iterator.map(StrAsBytes);
let (offsets, values) = unsafe { trusted_len_values_iter(iterator) };
// soundness: T is AsRef<str>
Self::from_data_unchecked(Self::default_data_type(), offsets, values, None)
Expand Down Expand Up @@ -417,7 +433,7 @@ impl<O: Offset> MutableUtf8Array<O> {
{
let iterator = iterator.into_iter();

let iterator = iterator.map(|x| x.map(|x| x.map(Wrapper)));
let iterator = iterator.map(|x| x.map(|x| x.map(StrAsBytes)));
let (validity, offsets, values) = try_trusted_len_unzip(iterator)?;

// soundness: P is `str`
Expand All @@ -442,7 +458,7 @@ impl<O: Offset> MutableUtf8Array<O> {

/// Creates a new [`MutableUtf8Array`] from a [`Iterator`] of `&str`.
pub fn from_iter_values<T: AsRef<str>, I: Iterator<Item = T>>(iterator: I) -> Self {
let iterator = iterator.map(Wrapper);
let iterator = iterator.map(StrAsBytes);
let (offsets, values) = values_iter(iterator);
// soundness: T: AsRef<str>
unsafe { Self::from_data_unchecked(Self::default_data_type(), offsets, values, None) }
Expand Down
15 changes: 15 additions & 0 deletions tests/it/array/utf8/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,18 @@ fn test_extend_trusted_len() {
Some(&Bitmap::from_u8_slice(&[0b00011011], 5))
);
}

#[test]
fn test_extend_values() {
let mut array = MutableUtf8Array::<i32>::new();

array.extend_values([Some("hi"), None, Some("there"), None].iter().flatten());
array.extend_values([Some("hello"), None].iter().flatten());
array.extend_values(vec![Some("again"), None].into_iter().flatten());

let array: Utf8Array<i32> = array.into();

assert_eq!(array.values().as_slice(), b"hitherehelloagain");
assert_eq!(array.offsets().as_slice(), &[0, 2, 7, 12, 17]);
assert_eq!(array.validity(), None,);
}

0 comments on commit e577c9f

Please sign in to comment.