Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Improved performance of list iterator (- 10-20%) #441

Merged
merged 2 commits into from
Sep 25, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -230,3 +230,7 @@ harness = false
[[bench]]
name = "iter_utf8"
harness = false

[[bench]]
name = "iter_list"
harness = false
54 changes: 54 additions & 0 deletions benches/iter_list.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
use arrow2::{
array::{ListArray, PrimitiveArray},
buffer::Buffer,
datatypes::DataType,
};

use arrow2::bitmap::Bitmap;
use arrow2::buffer::MutableBuffer;
use criterion::{criterion_group, criterion_main, Criterion};
use std::iter::FromIterator;
use std::sync::Arc;

fn add_benchmark(c: &mut Criterion) {
(10..=20).step_by(2).for_each(|log2_size| {
let size = 2usize.pow(log2_size);

let values = Buffer::from_iter(0..size as i32);
let values = PrimitiveArray::<i32>::from_data(DataType::Int32, values, None);

let mut offsets = MutableBuffer::from_iter((0..size as i32).step_by(2));
offsets.push(size as i32);

let validity = (0..(offsets.len() - 1))
.map(|i| i % 4 == 0)
.collect::<Bitmap>();

let data_type = ListArray::<i32>::default_datatype(DataType::Int32);
let array = ListArray::<i32>::from_data(
data_type,
offsets.into(),
Arc::new(values),
Some(validity),
);

c.bench_function(&format!("list: iter_values 2^{}", log2_size), |b| {
b.iter(|| {
for x in array.values_iter() {
assert_eq!(x.len(), 2);
}
})
});

c.bench_function(&format!("list: iter 2^{}", log2_size), |b| {
b.iter(|| {
for x in array.iter() {
assert_eq!(x.unwrap().len(), 2);
}
})
});
})
}

criterion_group!(benches, add_benchmark);
criterion_main!(benches);
23 changes: 21 additions & 2 deletions src/array/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,24 @@ impl<O: Offset> BinaryArray<O> {
/// # Panics
/// iff `offset + length > self.len()`.
pub fn slice(&self, offset: usize, length: usize) -> Self {
let validity = self.validity.clone().map(|x| x.slice(offset, length));
let offsets = self.offsets.clone().slice(offset, length + 1);
assert!(
offset + length <= self.len(),
"the offset of the new Buffer cannot exceed the existing length"
);
unsafe { self.slice_unchecked(offset, length) }
}

/// Creates a new [`BinaryArray`] by slicing this [`BinaryArray`].
/// # Implementation
/// This function is `O(1)`: all data will be shared between both arrays.
/// # Safety
/// The caller must ensure that `offset + length < self.len()`.
ritchie46 marked this conversation as resolved.
Show resolved Hide resolved
pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Self {
let validity = self
.validity
.clone()
.map(|x| x.slice_unchecked(offset, length));
let offsets = self.offsets.clone().slice_unchecked(offset, length + 1);
Self {
data_type: self.data_type.clone(),
offsets,
Expand Down Expand Up @@ -175,6 +191,9 @@ impl<O: Offset> Array for BinaryArray<O> {
fn slice(&self, offset: usize, length: usize) -> Box<dyn Array> {
Box::new(self.slice(offset, length))
}
unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box<dyn Array> {
Box::new(self.slice_unchecked(offset, length))
}
fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
Box::new(self.with_validity(validity))
}
Expand Down
25 changes: 23 additions & 2 deletions src/array/boolean/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,27 @@ impl BooleanArray {
/// This function panics iff `offset + length >= self.len()`.
#[inline]
pub fn slice(&self, offset: usize, length: usize) -> Self {
let validity = self.validity.clone().map(|x| x.slice(offset, length));
assert!(
offset + length <= self.len(),
"the offset of the new Buffer cannot exceed the existing length"
);
unsafe { self.slice_unchecked(offset, length) }
}

/// Returns a slice of this [`BooleanArray`].
/// # Implementation
/// This operation is `O(1)` as it amounts to increase two ref counts.
/// # Safety
/// The caller must ensure that `offset + length < self.len()`.
ritchie46 marked this conversation as resolved.
Show resolved Hide resolved
#[inline]
pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Self {
let validity = self
.validity
.clone()
.map(|x| x.slice_unchecked(offset, length));
Self {
data_type: self.data_type.clone(),
values: self.values.clone().slice(offset, length),
values: self.values.clone().slice_unchecked(offset, length),
validity,
offset: self.offset + offset,
}
Expand Down Expand Up @@ -130,6 +147,10 @@ impl Array for BooleanArray {
fn slice(&self, offset: usize, length: usize) -> Box<dyn Array> {
Box::new(self.slice(offset, length))
}
#[inline]
unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box<dyn Array> {
Box::new(self.slice_unchecked(offset, length))
}
fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
Box::new(self.with_validity(validity))
}
Expand Down
15 changes: 15 additions & 0 deletions src/array/dictionary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ impl<K: DictionaryKey> DictionaryArray<K> {
}

/// Creates a new [`DictionaryArray`] by slicing the existing [`DictionaryArray`].
/// # Panics
/// iff `offset + length > self.len()`.
pub fn slice(&self, offset: usize, length: usize) -> Self {
Self {
data_type: self.data_type.clone(),
Expand All @@ -83,6 +85,16 @@ impl<K: DictionaryKey> DictionaryArray<K> {
}
}

/// Creates a new [`DictionaryArray`] by slicing the existing [`DictionaryArray`].
ritchie46 marked this conversation as resolved.
Show resolved Hide resolved
pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Self {
Self {
data_type: self.data_type.clone(),
keys: self.keys.clone().slice_unchecked(offset, length),
values: self.values.clone(),
offset: self.offset + offset,
}
}

/// Sets the validity bitmap on this [`Array`].
/// # Panic
/// This function panics iff `validity.len() != self.len()`.
Expand Down Expand Up @@ -149,6 +161,9 @@ impl<K: DictionaryKey> Array for DictionaryArray<K> {
fn slice(&self, offset: usize, length: usize) -> Box<dyn Array> {
Box::new(self.slice(offset, length))
}
unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box<dyn Array> {
Box::new(self.slice(offset, length))
ritchie46 marked this conversation as resolved.
Show resolved Hide resolved
}
fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
Box::new(self.with_validity(validity))
}
Expand Down
25 changes: 23 additions & 2 deletions src/array/fixed_size_binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,30 @@ impl FixedSizeBinaryArray {
/// Returns a slice of this [`FixedSizeBinaryArray`].
/// # Implementation
/// This operation is `O(1)` as it amounts to increase 3 ref counts.
/// # Panics
/// panics iff `offset + length >= self.len()`
ritchie46 marked this conversation as resolved.
Show resolved Hide resolved
pub fn slice(&self, offset: usize, length: usize) -> Self {
let validity = self.validity.clone().map(|x| x.slice(offset, length));
assert!(
offset + length <= self.len(),
"the offset of the new Buffer cannot exceed the existing length"
);
unsafe { self.slice_unchecked(offset, length) }
}

/// Returns a slice of this [`FixedSizeBinaryArray`].
/// # Implementation
/// This operation is `O(1)` as it amounts to increase 3 ref counts.
/// # Safety
/// The caller must ensure that `offset + length < self.len()`.
ritchie46 marked this conversation as resolved.
Show resolved Hide resolved
pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Self {
let validity = self
.validity
.clone()
.map(|x| x.slice_unchecked(offset, length));
let values = self
.values
.clone()
.slice(offset * self.size as usize, length * self.size as usize);
.slice_unchecked(offset * self.size as usize, length * self.size as usize);
Self {
data_type: self.data_type.clone(),
size: self.size,
Expand Down Expand Up @@ -139,6 +157,9 @@ impl Array for FixedSizeBinaryArray {
fn slice(&self, offset: usize, length: usize) -> Box<dyn Array> {
Box::new(self.slice(offset, length))
}
unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box<dyn Array> {
Box::new(self.slice_unchecked(offset, length))
}
fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
Box::new(self.with_validity(validity))
}
Expand Down
4 changes: 2 additions & 2 deletions src/array/fixed_size_list/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ use crate::{
use super::FixedSizeListArray;

impl IterableListArray for FixedSizeListArray {
fn value(&self, i: usize) -> Box<dyn Array> {
FixedSizeListArray::value(self, i)
unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {
FixedSizeListArray::value_unchecked(self, i)
}
}

Expand Down
36 changes: 34 additions & 2 deletions src/array/fixed_size_list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,30 @@ impl FixedSizeListArray {
/// Returns a slice of this [`FixedSizeListArray`].
/// # Implementation
/// This operation is `O(1)`.
/// # Panics
/// panics iff `offset + length >= self.len()`
ritchie46 marked this conversation as resolved.
Show resolved Hide resolved
pub fn slice(&self, offset: usize, length: usize) -> Self {
let validity = self.validity.clone().map(|x| x.slice(offset, length));
assert!(
offset + length <= self.len(),
"the offset of the new Buffer cannot exceed the existing length"
);
unsafe { self.slice_unchecked(offset, length) }
}

/// Returns a slice of this [`FixedSizeListArray`].
/// # Implementation
/// This operation is `O(1)`.
/// # Safety
/// The caller must ensure that `offset + length < self.len()`.
ritchie46 marked this conversation as resolved.
Show resolved Hide resolved
pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Self {
let validity = self
.validity
.clone()
.map(|x| x.slice_unchecked(offset, length));
let values = self
.values
.clone()
.slice(offset * self.size as usize, length * self.size as usize)
.slice_unchecked(offset * self.size as usize, length * self.size as usize)
.into();
Self {
data_type: self.data_type.clone(),
Expand All @@ -85,12 +103,23 @@ impl FixedSizeListArray {
}

/// Returns the `Vec<T>` at position `i`.
/// # Panic:
/// panics iff `i >= self.len()`
#[inline]
pub fn value(&self, i: usize) -> Box<dyn Array> {
self.values
.slice(i * self.size as usize, self.size as usize)
}

/// Returns the `Vec<T>` at position `i`.
/// # Safety:
/// Caller must ensure that `i < self.len()`
#[inline]
pub unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {
self.values
.slice_unchecked(i * self.size as usize, self.size as usize)
}

/// Sets the validity bitmap on this [`FixedSizeListArray`].
/// # Panic
/// This function panics iff `validity.len() != self.len()`.
Expand Down Expand Up @@ -143,6 +172,9 @@ impl Array for FixedSizeListArray {
fn slice(&self, offset: usize, length: usize) -> Box<dyn Array> {
Box::new(self.slice(offset, length))
}
unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box<dyn Array> {
Box::new(self.slice_unchecked(offset, length))
}
fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
Box::new(self.with_validity(validity))
}
Expand Down
12 changes: 8 additions & 4 deletions src/array/list/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ impl<'a, A: IterableListArray> Iterator for ListValuesIter<'a, A> {
}
let old = self.index;
self.index += 1;
Some(self.array.value(old))
// Safety:
// self.end is maximized by the length of the array
Some(unsafe { self.array.value_unchecked(old) })
}

#[inline]
Expand All @@ -50,14 +52,16 @@ impl<'a, A: IterableListArray> DoubleEndedIterator for ListValuesIter<'a, A> {
None
} else {
self.end -= 1;
Some(self.array.value(self.end))
// Safety:
// self.end is maximized by the length of the array
Some(unsafe { self.array.value_unchecked(self.end) })
}
}
}

impl<O: Offset> IterableListArray for ListArray<O> {
fn value(&self, i: usize) -> Box<dyn Array> {
ListArray::<O>::value(self, i)
unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {
ListArray::<O>::value_unchecked(self, i)
}
}

Expand Down
31 changes: 27 additions & 4 deletions src/array/list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,10 @@ impl<O: Offset> ListArray<O> {
let offset_1 = offsets[i + 1];
let length = (offset_1 - offset).to_usize();

self.values.slice(offset.to_usize(), length)
// Safety:
// One of the invariants of the struct
// is that offsets are in bounds
unsafe { self.values.slice_unchecked(offset.to_usize(), length) }
}
}

Expand All @@ -93,12 +96,29 @@ impl<O: Offset> ListArray<O> {
let offset_1 = *self.offsets.as_ptr().add(i + 1);
let length = (offset_1 - offset).to_usize();

self.values.slice(offset.to_usize(), length)
self.values.slice_unchecked(offset.to_usize(), length)
}

/// Returns a slice of this [`ListArray`].
/// # Panics
/// panics iff `offset + length >= self.len()`
pub fn slice(&self, offset: usize, length: usize) -> Self {
let validity = self.validity.clone().map(|x| x.slice(offset, length));
let offsets = self.offsets.clone().slice(offset, length + 1);
assert!(
offset + length <= self.len(),
"the offset of the new Buffer cannot exceed the existing length"
);
unsafe { self.slice_unchecked(offset, length) }
}

/// Returns a slice of this [`ListArray`].
/// # Safety
/// The caller must ensure that `offset + length < self.len()`.
pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Self {
let validity = self
.validity
.clone()
.map(|x| x.slice_unchecked(offset, length));
let offsets = self.offsets.clone().slice_unchecked(offset, length + 1);
Self {
data_type: self.data_type.clone(),
offsets,
Expand Down Expand Up @@ -186,6 +206,9 @@ impl<O: Offset> Array for ListArray<O> {
fn slice(&self, offset: usize, length: usize) -> Box<dyn Array> {
Box::new(self.slice(offset, length))
}
unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box<dyn Array> {
Box::new(self.slice_unchecked(offset, length))
}
fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
Box::new(self.with_validity(validity))
}
Expand Down
Loading