From c63d2461198b7ab2c0ef81c2e3ea2fa4d2b329e5 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Wed, 4 Sep 2024 17:34:44 +0100 Subject: [PATCH] Better numbers and benchmarks --- vortex-array/Cargo.toml | 4 ++ vortex-array/benches/fn.rs | 67 +++++++++++++++++++++++++ vortex-array/benches/iter.rs | 27 ---------- vortex-array/src/array/primitive/mod.rs | 54 +++++++++++++++++--- vortex-array/src/elementwise.rs | 38 ++++++-------- vortex-array/src/iter/mod.rs | 14 +++++- 6 files changed, 146 insertions(+), 58 deletions(-) create mode 100644 vortex-array/benches/fn.rs diff --git a/vortex-array/Cargo.toml b/vortex-array/Cargo.toml index 8de78bb24..037b97552 100644 --- a/vortex-array/Cargo.toml +++ b/vortex-array/Cargo.toml @@ -89,3 +89,7 @@ harness = false [[bench]] name = "iter" harness = false + +[[bench]] +name = "fn" +harness = false diff --git a/vortex-array/benches/fn.rs b/vortex-array/benches/fn.rs new file mode 100644 index 000000000..a6930d463 --- /dev/null +++ b/vortex-array/benches/fn.rs @@ -0,0 +1,67 @@ +use arrow_array::types::UInt32Type; +use arrow_array::UInt32Array; +use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; +use vortex::array::PrimitiveArray; +use vortex::elementwise::{BinaryFn, UnaryFn}; +use vortex::validity::Validity; +use vortex::IntoArray; + +fn vortex_unary_add(c: &mut Criterion) { + let data = PrimitiveArray::from_vec((0_u32..1_000_000).collect::>(), Validity::AllValid); + c.bench_function("vortex_unary_add", |b| { + b.iter_batched( + || (data.clone()), + |data| data.unary(|v: u32| v + 1).unwrap(), + BatchSize::SmallInput, + ) + }); +} + +fn arrow_unary_add(c: &mut Criterion) { + let data = UInt32Array::from_iter_values(0_u32..1_000_000); + c.bench_function("arrow_unary_add", |b| { + b.iter_batched( + || data.clone(), + |data: arrow_array::PrimitiveArray| data.unary::<_, UInt32Type>(|v| v + 1), + BatchSize::SmallInput, + ) + }); +} + +fn vortex_binary_add(c: &mut Criterion) { + let lhs = PrimitiveArray::from_vec((0_u32..1_000_000).collect::>(), Validity::AllValid); + let rhs = PrimitiveArray::from_vec((0_u32..1_000_000).collect::>(), Validity::AllValid) + .into_array(); + c.bench_function("vortex_binary_add", |b| { + b.iter_batched( + || (lhs.clone(), rhs.clone()), + |(lhs, rhs)| lhs.binary(rhs.into(), |l: u32, r: u32| l + r), + BatchSize::SmallInput, + ) + }); +} + +fn arrow_binary_add(c: &mut Criterion) { + let lhs = UInt32Array::from_iter_values(0_u32..1_000_000); + let rhs = UInt32Array::from_iter_values(0_u32..1_000_000); + c.bench_function("arrow_binary_add", |b| { + b.iter_batched( + || (lhs.clone(), rhs.clone()), + |(lhs, rhs)| { + arrow_arith::arity::binary::<_, _, _, UInt32Type>(&lhs, &rhs, |a, b| a + b).unwrap() + }, + BatchSize::SmallInput, + ) + }); +} + +criterion_group!( + name = benches; + config = Criterion::default(); + targets = + arrow_unary_add, + vortex_unary_add, + arrow_binary_add, + vortex_binary_add, +); +criterion_main!(benches); diff --git a/vortex-array/benches/iter.rs b/vortex-array/benches/iter.rs index 6ebc79b60..a919e7b18 100644 --- a/vortex-array/benches/iter.rs +++ b/vortex-array/benches/iter.rs @@ -1,9 +1,6 @@ -use arrow_array::types::UInt32Type; -use arrow_array::UInt32Array; use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; use itertools::Itertools; use vortex::array::PrimitiveArray; -use vortex::elementwise::UnaryFn; use vortex::iter::VectorizedArrayIter; use vortex::validity::Validity; use vortex::variants::ArrayVariants; @@ -61,28 +58,6 @@ fn vortex_iter_flat(c: &mut Criterion) { }); } -fn vortex_unary_add(c: &mut Criterion) { - let data = PrimitiveArray::from_vec((0_u32..1_000_000).collect_vec(), Validity::AllValid); - c.bench_function("vortex_unary_add", |b| { - b.iter_batched( - || (data.clone()), - |data| data.unary(|v: u32| v + 1).unwrap(), - BatchSize::SmallInput, - ) - }); -} - -fn arrow_unary_add(c: &mut Criterion) { - let data = UInt32Array::from_iter_values(0_u32..1_000_000); - c.bench_function("arrow_unary_add", |b| { - b.iter_batched( - || data.clone(), - |data: arrow_array::PrimitiveArray| data.unary::<_, UInt32Type>(|v| v + 1), - BatchSize::SmallInput, - ) - }); -} - fn arrow_iter(c: &mut Criterion) { let data = arrow_array::UInt32Array::from_iter(0_u32..1_000_000); c.bench_function("arrow_iter", |b| { @@ -121,7 +96,5 @@ criterion_group!( vortex_iter, vortex_iter_flat, arrow_iter, - vortex_unary_add, - arrow_unary_add ); criterion_main!(benches); diff --git a/vortex-array/src/array/primitive/mod.rs b/vortex-array/src/array/primitive/mod.rs index 414200876..835323630 100644 --- a/vortex-array/src/array/primitive/mod.rs +++ b/vortex-array/src/array/primitive/mod.rs @@ -12,7 +12,7 @@ use vortex_error::{vortex_bail, VortexError, VortexResult}; use vortex_scalar::Scalar; use crate::elementwise::{flat_array_iter, BinaryFn, OtherValue, UnaryFn}; -use crate::iter::{Accessor, AccessorRef}; +use crate::iter::{Accessor, AccessorRef, Batch}; use crate::stats::StatsSet; use crate::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata}; use crate::variants::{ArrayVariants, PrimitiveArrayTrait}; @@ -326,9 +326,11 @@ impl UnaryFn for PrimitiveArray { unary_fn: F, ) -> VortexResult { let mut output = Vec::with_capacity(self.len()); + unsafe { output.set_len(self.len()) }; + let data = self.maybe_null_slice::(); - for v in self.maybe_null_slice::() { - output.push(unary_fn(*v)); + for index in 0..data.len() { + unsafe { *output.get_unchecked_mut(index) = unary_fn(data[index]) } } Ok(PrimitiveArray::from_vec(output, self.validity()).into_array()) @@ -349,12 +351,13 @@ impl BinaryFn for PrimitiveArray { vortex_bail!(MismatchedTypes: self.dtype(), other.dtype()); } - if self.dtype().as_ptype() != Some(&I::PTYPE) { + if PType::try_from(self.dtype())? != I::PTYPE { vortex_bail!(MismatchedTypes: self.dtype(), I::PTYPE); } let lhs = self.maybe_null_slice::(); let mut output = Vec::with_capacity(self.len()); + unsafe { output.set_len(self.len()) }; let validity = match other { OtherValue::Scalar(ref s) => { @@ -366,8 +369,17 @@ impl BinaryFn for PrimitiveArray { } OtherValue::Array(ref a) => { let rhs_iter = flat_array_iter::(a); - for (l, r) in lhs.iter().copied().zip(rhs_iter) { - output.push(binary_fn(l, r)); + let mut start_idx = 0; + for batch in rhs_iter { + let batch_len = batch.len(); + process_batch( + &lhs[start_idx..start_idx + batch_len], + batch, + &binary_fn, + start_idx, + &mut output, + ); + start_idx += batch_len; } let rhs = a.with_dyn(|a| a.logical_validity().into_validity()); @@ -379,6 +391,36 @@ impl BinaryFn for PrimitiveArray { } } +fn process_batch O>( + lhs: &[I], + batch: Batch, + f: F, + start_idx: usize, + output: &mut Vec, +) { + assert_eq!(batch.len(), lhs.len()); + + if batch.len() == 1024 { + let lhs: [I; 1024] = lhs.try_into().unwrap(); + let rhs: [I; 1024] = batch.data().try_into().unwrap(); + + for idx in 0_usize..1024 { + unsafe { + *output.get_unchecked_mut(idx + start_idx) = f(lhs[idx], rhs[idx]); + } + } + } else { + let mut lhs = lhs.iter(); + let rhs = batch.data(); + for idx in 0..batch.len() { + let l = lhs.next().unwrap(); + unsafe { + *output.get_unchecked_mut(idx + start_idx) = f(*l, rhs[idx]); + } + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/vortex-array/src/elementwise.rs b/vortex-array/src/elementwise.rs index 47908bb20..f36021511 100644 --- a/vortex-array/src/elementwise.rs +++ b/vortex-array/src/elementwise.rs @@ -2,6 +2,7 @@ use vortex_dtype::{DType, NativePType, PType}; use vortex_error::{VortexError, VortexResult}; use vortex_scalar::Scalar; +use crate::iter::Batch; use crate::{Array, ArrayDType}; pub enum OtherValue { @@ -50,84 +51,73 @@ pub trait UnaryFn { } // TODO(adamgs): Turn into a macro, or just have some intermediate adapter struct -pub fn flat_array_iter(array: &Array) -> Box> { - match array.dtype().as_ptype().unwrap() { +pub fn flat_array_iter(array: &Array) -> Box>> { + match PType::try_from(array.dtype()).unwrap() { PType::U8 => Box::new( array .with_dyn(|a| a.as_primitive_array_unchecked().u8_iter()) .unwrap() - .flatten() - .map(|o| N::from(o.unwrap_or_default()).unwrap()), + .map(|b| b.as_::()), ), PType::U16 => Box::new( array .with_dyn(|a| a.as_primitive_array_unchecked().u16_iter()) .unwrap() - .flatten() - .map(|o| N::from(o.unwrap_or_default()).unwrap()), + .map(|b| b.as_::()), ), PType::U32 => Box::new( array .with_dyn(|a| a.as_primitive_array_unchecked().u32_iter()) .unwrap() - .flatten() - .map(|o| N::from(o.unwrap_or_default()).unwrap()), + .map(|b| b.as_::()), ), PType::U64 => Box::new( array .with_dyn(|a| a.as_primitive_array_unchecked().u64_iter()) .unwrap() - .flatten() - .map(|o| N::from(o.unwrap_or_default()).unwrap()), + .map(|b| b.as_::()), ), PType::I8 => Box::new( array .with_dyn(|a| a.as_primitive_array_unchecked().i8_iter()) .unwrap() - .flatten() - .map(|o| N::from(o.unwrap_or_default()).unwrap()), + .map(|b| b.as_::()), ), PType::I16 => Box::new( array .with_dyn(|a| a.as_primitive_array_unchecked().i16_iter()) .unwrap() - .flatten() - .map(|o| N::from(o.unwrap_or_default()).unwrap()), + .map(|b| b.as_::()), ), PType::I32 => Box::new( array .with_dyn(|a| a.as_primitive_array_unchecked().i32_iter()) .unwrap() - .flatten() - .map(|o| N::from(o.unwrap_or_default()).unwrap()), + .map(|b| b.as_::()), ), PType::I64 => Box::new( array .with_dyn(|a| a.as_primitive_array_unchecked().i64_iter()) .unwrap() - .flatten() - .map(|o| N::from(o.unwrap_or_default()).unwrap()), + .map(|b| b.as_::()), ), PType::F16 => Box::new( array .with_dyn(|a| a.as_primitive_array_unchecked().u64_iter()) .unwrap() - .flatten() - .map(|o| N::from(o.unwrap_or_default()).unwrap()), + .map(|b| b.as_::()), ), PType::F32 => Box::new( array .with_dyn(|a| a.as_primitive_array_unchecked().f32_iter()) .unwrap() - .flatten() - .map(|o| N::from(o.unwrap_or_default()).unwrap()), + .map(|b| b.as_::()), ), PType::F64 => Box::new( array .with_dyn(|a| a.as_primitive_array_unchecked().f64_iter()) .unwrap() - .flatten() - .map(|o| N::from(o.unwrap_or_default()).unwrap()), + .map(|b| b.as_::()), ), } } diff --git a/vortex-array/src/iter/mod.rs b/vortex-array/src/iter/mod.rs index 42b8b677d..dabca015e 100644 --- a/vortex-array/src/iter/mod.rs +++ b/vortex-array/src/iter/mod.rs @@ -2,7 +2,7 @@ use std::sync::Arc; pub use adapter::*; pub use ext::*; -use vortex_dtype::DType; +use vortex_dtype::{DType, NativePType}; use vortex_error::VortexResult; use crate::validity::Validity; @@ -124,6 +124,18 @@ impl Batch { pub unsafe fn get_unchecked(&self, index: usize) -> &T { unsafe { self.data.get_unchecked(index) } } + + pub fn as_(self) -> Batch { + assert_eq!(std::mem::size_of::(), std::mem::size_of::()); + Batch { + data: unsafe { std::mem::transmute(self.data) }, + validity: self.validity, + } + } + + pub fn data(&self) -> &[T] { + self.data.as_ref() + } } pub struct FlattenedBatch {