Skip to content

Commit

Permalink
Better numbers and benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
AdamGS committed Sep 4, 2024
1 parent dc09067 commit c63d246
Show file tree
Hide file tree
Showing 6 changed files with 146 additions and 58 deletions.
4 changes: 4 additions & 0 deletions vortex-array/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,7 @@ harness = false
[[bench]]
name = "iter"
harness = false

[[bench]]
name = "fn"
harness = false
67 changes: 67 additions & 0 deletions vortex-array/benches/fn.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
use arrow_array::types::UInt32Type;
use arrow_array::UInt32Array;
use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
use vortex::array::PrimitiveArray;
use vortex::elementwise::{BinaryFn, UnaryFn};
use vortex::validity::Validity;
use vortex::IntoArray;

fn vortex_unary_add(c: &mut Criterion) {
let data = PrimitiveArray::from_vec((0_u32..1_000_000).collect::<Vec<_>>(), Validity::AllValid);
c.bench_function("vortex_unary_add", |b| {
b.iter_batched(
|| (data.clone()),
|data| data.unary(|v: u32| v + 1).unwrap(),
BatchSize::SmallInput,
)
});
}

fn arrow_unary_add(c: &mut Criterion) {
let data = UInt32Array::from_iter_values(0_u32..1_000_000);
c.bench_function("arrow_unary_add", |b| {
b.iter_batched(
|| data.clone(),
|data: arrow_array::PrimitiveArray<UInt32Type>| data.unary::<_, UInt32Type>(|v| v + 1),
BatchSize::SmallInput,
)
});
}

fn vortex_binary_add(c: &mut Criterion) {
let lhs = PrimitiveArray::from_vec((0_u32..1_000_000).collect::<Vec<_>>(), Validity::AllValid);
let rhs = PrimitiveArray::from_vec((0_u32..1_000_000).collect::<Vec<_>>(), Validity::AllValid)
.into_array();
c.bench_function("vortex_binary_add", |b| {
b.iter_batched(
|| (lhs.clone(), rhs.clone()),
|(lhs, rhs)| lhs.binary(rhs.into(), |l: u32, r: u32| l + r),
BatchSize::SmallInput,
)
});
}

fn arrow_binary_add(c: &mut Criterion) {
let lhs = UInt32Array::from_iter_values(0_u32..1_000_000);
let rhs = UInt32Array::from_iter_values(0_u32..1_000_000);
c.bench_function("arrow_binary_add", |b| {
b.iter_batched(
|| (lhs.clone(), rhs.clone()),
|(lhs, rhs)| {
arrow_arith::arity::binary::<_, _, _, UInt32Type>(&lhs, &rhs, |a, b| a + b).unwrap()
},
BatchSize::SmallInput,
)
});
}

criterion_group!(
name = benches;
config = Criterion::default();
targets =
arrow_unary_add,
vortex_unary_add,
arrow_binary_add,
vortex_binary_add,
);
criterion_main!(benches);
27 changes: 0 additions & 27 deletions vortex-array/benches/iter.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
use arrow_array::types::UInt32Type;
use arrow_array::UInt32Array;
use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
use itertools::Itertools;
use vortex::array::PrimitiveArray;
use vortex::elementwise::UnaryFn;
use vortex::iter::VectorizedArrayIter;
use vortex::validity::Validity;
use vortex::variants::ArrayVariants;
Expand Down Expand Up @@ -61,28 +58,6 @@ fn vortex_iter_flat(c: &mut Criterion) {
});
}

fn vortex_unary_add(c: &mut Criterion) {
let data = PrimitiveArray::from_vec((0_u32..1_000_000).collect_vec(), Validity::AllValid);
c.bench_function("vortex_unary_add", |b| {
b.iter_batched(
|| (data.clone()),
|data| data.unary(|v: u32| v + 1).unwrap(),
BatchSize::SmallInput,
)
});
}

fn arrow_unary_add(c: &mut Criterion) {
let data = UInt32Array::from_iter_values(0_u32..1_000_000);
c.bench_function("arrow_unary_add", |b| {
b.iter_batched(
|| data.clone(),
|data: arrow_array::PrimitiveArray<UInt32Type>| data.unary::<_, UInt32Type>(|v| v + 1),
BatchSize::SmallInput,
)
});
}

fn arrow_iter(c: &mut Criterion) {
let data = arrow_array::UInt32Array::from_iter(0_u32..1_000_000);
c.bench_function("arrow_iter", |b| {
Expand Down Expand Up @@ -121,7 +96,5 @@ criterion_group!(
vortex_iter,
vortex_iter_flat,
arrow_iter,
vortex_unary_add,
arrow_unary_add
);
criterion_main!(benches);
54 changes: 48 additions & 6 deletions vortex-array/src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use vortex_error::{vortex_bail, VortexError, VortexResult};
use vortex_scalar::Scalar;

use crate::elementwise::{flat_array_iter, BinaryFn, OtherValue, UnaryFn};
use crate::iter::{Accessor, AccessorRef};
use crate::iter::{Accessor, AccessorRef, Batch};
use crate::stats::StatsSet;
use crate::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata};
use crate::variants::{ArrayVariants, PrimitiveArrayTrait};
Expand Down Expand Up @@ -326,9 +326,11 @@ impl UnaryFn for PrimitiveArray {
unary_fn: F,
) -> VortexResult<Array> {
let mut output = Vec::with_capacity(self.len());
unsafe { output.set_len(self.len()) };
let data = self.maybe_null_slice::<I>();

for v in self.maybe_null_slice::<I>() {
output.push(unary_fn(*v));
for index in 0..data.len() {
unsafe { *output.get_unchecked_mut(index) = unary_fn(data[index]) }
}

Ok(PrimitiveArray::from_vec(output, self.validity()).into_array())
Expand All @@ -349,12 +351,13 @@ impl BinaryFn for PrimitiveArray {
vortex_bail!(MismatchedTypes: self.dtype(), other.dtype());
}

if self.dtype().as_ptype() != Some(&I::PTYPE) {
if PType::try_from(self.dtype())? != I::PTYPE {
vortex_bail!(MismatchedTypes: self.dtype(), I::PTYPE);
}

let lhs = self.maybe_null_slice::<I>();
let mut output = Vec::with_capacity(self.len());
unsafe { output.set_len(self.len()) };

let validity = match other {
OtherValue::Scalar(ref s) => {
Expand All @@ -366,8 +369,17 @@ impl BinaryFn for PrimitiveArray {
}
OtherValue::Array(ref a) => {
let rhs_iter = flat_array_iter::<I>(a);
for (l, r) in lhs.iter().copied().zip(rhs_iter) {
output.push(binary_fn(l, r));
let mut start_idx = 0;
for batch in rhs_iter {
let batch_len = batch.len();
process_batch(
&lhs[start_idx..start_idx + batch_len],
batch,
&binary_fn,
start_idx,
&mut output,
);
start_idx += batch_len;
}

let rhs = a.with_dyn(|a| a.logical_validity().into_validity());
Expand All @@ -379,6 +391,36 @@ impl BinaryFn for PrimitiveArray {
}
}

fn process_batch<I: NativePType, O: NativePType, F: Fn(I, I) -> O>(
lhs: &[I],
batch: Batch<I>,
f: F,
start_idx: usize,
output: &mut Vec<O>,
) {
assert_eq!(batch.len(), lhs.len());

if batch.len() == 1024 {
let lhs: [I; 1024] = lhs.try_into().unwrap();
let rhs: [I; 1024] = batch.data().try_into().unwrap();

for idx in 0_usize..1024 {
unsafe {
*output.get_unchecked_mut(idx + start_idx) = f(lhs[idx], rhs[idx]);
}
}
} else {
let mut lhs = lhs.iter();
let rhs = batch.data();
for idx in 0..batch.len() {
let l = lhs.next().unwrap();
unsafe {
*output.get_unchecked_mut(idx + start_idx) = f(*l, rhs[idx]);
}
}
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
38 changes: 14 additions & 24 deletions vortex-array/src/elementwise.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use vortex_dtype::{DType, NativePType, PType};
use vortex_error::{VortexError, VortexResult};
use vortex_scalar::Scalar;

use crate::iter::Batch;
use crate::{Array, ArrayDType};

pub enum OtherValue {
Expand Down Expand Up @@ -50,84 +51,73 @@ pub trait UnaryFn {
}

// TODO(adamgs): Turn into a macro, or just have some intermediate adapter struct
pub fn flat_array_iter<N: NativePType>(array: &Array) -> Box<dyn Iterator<Item = N>> {
match array.dtype().as_ptype().unwrap() {
pub fn flat_array_iter<N: NativePType>(array: &Array) -> Box<dyn Iterator<Item = Batch<N>>> {
match PType::try_from(array.dtype()).unwrap() {
PType::U8 => Box::new(
array
.with_dyn(|a| a.as_primitive_array_unchecked().u8_iter())
.unwrap()
.flatten()
.map(|o| N::from(o.unwrap_or_default()).unwrap()),
.map(|b| b.as_::<N>()),
),
PType::U16 => Box::new(
array
.with_dyn(|a| a.as_primitive_array_unchecked().u16_iter())
.unwrap()
.flatten()
.map(|o| N::from(o.unwrap_or_default()).unwrap()),
.map(|b| b.as_::<N>()),
),
PType::U32 => Box::new(
array
.with_dyn(|a| a.as_primitive_array_unchecked().u32_iter())
.unwrap()
.flatten()
.map(|o| N::from(o.unwrap_or_default()).unwrap()),
.map(|b| b.as_::<N>()),
),
PType::U64 => Box::new(
array
.with_dyn(|a| a.as_primitive_array_unchecked().u64_iter())
.unwrap()
.flatten()
.map(|o| N::from(o.unwrap_or_default()).unwrap()),
.map(|b| b.as_::<N>()),
),
PType::I8 => Box::new(
array
.with_dyn(|a| a.as_primitive_array_unchecked().i8_iter())
.unwrap()
.flatten()
.map(|o| N::from(o.unwrap_or_default()).unwrap()),
.map(|b| b.as_::<N>()),
),
PType::I16 => Box::new(
array
.with_dyn(|a| a.as_primitive_array_unchecked().i16_iter())
.unwrap()
.flatten()
.map(|o| N::from(o.unwrap_or_default()).unwrap()),
.map(|b| b.as_::<N>()),
),
PType::I32 => Box::new(
array
.with_dyn(|a| a.as_primitive_array_unchecked().i32_iter())
.unwrap()
.flatten()
.map(|o| N::from(o.unwrap_or_default()).unwrap()),
.map(|b| b.as_::<N>()),
),
PType::I64 => Box::new(
array
.with_dyn(|a| a.as_primitive_array_unchecked().i64_iter())
.unwrap()
.flatten()
.map(|o| N::from(o.unwrap_or_default()).unwrap()),
.map(|b| b.as_::<N>()),
),
PType::F16 => Box::new(
array
.with_dyn(|a| a.as_primitive_array_unchecked().u64_iter())
.unwrap()
.flatten()
.map(|o| N::from(o.unwrap_or_default()).unwrap()),
.map(|b| b.as_::<N>()),
),
PType::F32 => Box::new(
array
.with_dyn(|a| a.as_primitive_array_unchecked().f32_iter())
.unwrap()
.flatten()
.map(|o| N::from(o.unwrap_or_default()).unwrap()),
.map(|b| b.as_::<N>()),
),
PType::F64 => Box::new(
array
.with_dyn(|a| a.as_primitive_array_unchecked().f64_iter())
.unwrap()
.flatten()
.map(|o| N::from(o.unwrap_or_default()).unwrap()),
.map(|b| b.as_::<N>()),
),
}
}
14 changes: 13 additions & 1 deletion vortex-array/src/iter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::sync::Arc;

pub use adapter::*;
pub use ext::*;
use vortex_dtype::DType;
use vortex_dtype::{DType, NativePType};
use vortex_error::VortexResult;

use crate::validity::Validity;
Expand Down Expand Up @@ -124,6 +124,18 @@ impl<T> Batch<T> {
pub unsafe fn get_unchecked(&self, index: usize) -> &T {
unsafe { self.data.get_unchecked(index) }
}

pub fn as_<N: NativePType>(self) -> Batch<N> {
assert_eq!(std::mem::size_of::<T>(), std::mem::size_of::<N>());
Batch {
data: unsafe { std::mem::transmute(self.data) },
validity: self.validity,
}
}

pub fn data(&self) -> &[T] {
self.data.as_ref()
}
}

pub struct FlattenedBatch<T> {
Expand Down

0 comments on commit c63d246

Please sign in to comment.