-
Notifications
You must be signed in to change notification settings - Fork 25
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Primitive Iterator API #689
Changes from all commits
9c369f8
2531987
cc8f63c
26469f3
6e48d11
ed89a43
6464598
784c0f8
44ddc82
e38a058
5a82159
82eaf35
88fd85e
b2359ca
fbfa5f5
37051ad
6d4e9ca
f03941e
ca16a7f
78d824b
3c1920b
0e06f14
b2701e0
feef333
b841b19
979dc50
d9fe8db
f2c24e7
b5ee4ac
ad9cb6e
bf64635
e40b741
b59ca53
e66d22b
091b6b8
420fa43
f65a84a
e674487
d9a1e3c
7b1dc98
da17eb9
4c76ece
8c98d7d
ad4505b
c78e692
3797994
bc54e5a
9440e44
3918d82
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -85,3 +85,7 @@ harness = false | |
[[bench]] | ||
name = "compare" | ||
harness = false | ||
|
||
[[bench]] | ||
name = "iter" | ||
harness = false |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are the results on my laptop:
I'm surprised the Arrow iter is so much faster (and faster than std::iter). My guess is the assertions and alignment checking in our PrimitiveArray make up the difference? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Arrow doesn't actually iterator over Option just over T and in this case there's no nulls so arrow is the same as iterating Vec. To get to the level you need monomorphisation of every function call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What @robert3005 said, spent a lot of time trying to understand everything they do and while I can't say I have full clarity, Arrow's overall design does make it easier to have fast iterators - both having fully typed arrays and not having to support compression. |
||
use itertools::Itertools; | ||
use vortex::array::PrimitiveArray; | ||
use vortex::iter::VectorizedArrayIter; | ||
use vortex::validity::Validity; | ||
use vortex::variants::ArrayVariants; | ||
|
||
fn std_iter(c: &mut Criterion) { | ||
let data = (0_u32..1_000_000).map(Some).collect_vec(); | ||
c.bench_function("std_iter", |b| { | ||
b.iter_batched(|| data.iter().copied(), do_work, BatchSize::SmallInput) | ||
}); | ||
} | ||
|
||
fn std_iter_no_option(c: &mut Criterion) { | ||
let data = (0_u32..1_000_000).collect_vec(); | ||
c.bench_function("std_iter_no_option", |b| { | ||
b.iter_batched( | ||
|| data.iter().copied(), | ||
|mut iter| { | ||
let mut u = 0; | ||
for n in iter.by_ref() { | ||
u += n; | ||
} | ||
u | ||
}, | ||
BatchSize::SmallInput, | ||
) | ||
}); | ||
} | ||
|
||
fn vortex_iter(c: &mut Criterion) { | ||
let data = PrimitiveArray::from_vec((0_u32..1_000_000).collect_vec(), Validity::AllValid); | ||
|
||
c.bench_function("vortex_iter", |b| { | ||
b.iter_batched( | ||
|| data.as_primitive_array_unchecked().u32_iter().unwrap(), | ||
do_work_vortex, | ||
BatchSize::SmallInput, | ||
) | ||
}); | ||
} | ||
|
||
fn vortex_iter_flat(c: &mut Criterion) { | ||
let data = PrimitiveArray::from_vec((0_u32..1_000_000).collect_vec(), Validity::AllValid); | ||
|
||
c.bench_function("vortex_iter_flat", |b| { | ||
b.iter_batched( | ||
|| { | ||
data.as_primitive_array_unchecked() | ||
.u32_iter() | ||
.unwrap() | ||
.flatten() | ||
}, | ||
do_work, | ||
BatchSize::SmallInput, | ||
) | ||
}); | ||
} | ||
|
||
fn arrow_iter(c: &mut Criterion) { | ||
let data = arrow_array::UInt32Array::from_iter(0_u32..1_000_000); | ||
c.bench_function("arrow_iter", |b| { | ||
b.iter_batched(|| data.iter(), do_work, BatchSize::SmallInput) | ||
}); | ||
} | ||
|
||
fn do_work( | ||
mut iter: impl Iterator<Item = Option<u32>>, | ||
) -> (u32, impl Iterator<Item = Option<u32>>) { | ||
let mut u = 0; | ||
for n in iter.by_ref() { | ||
u += n.unwrap(); | ||
} | ||
(u, iter) | ||
} | ||
|
||
fn do_work_vortex(iter: VectorizedArrayIter<u32>) -> u32 { | ||
let mut sum = 0; | ||
for batch in iter { | ||
for idx in 0..batch.len() { | ||
if batch.is_valid(idx) { | ||
sum += unsafe { *batch.get_unchecked(idx) }; | ||
} | ||
} | ||
} | ||
|
||
sum | ||
} | ||
|
||
criterion_group!( | ||
name = benches; | ||
config = Criterion::default().sample_size(100); | ||
targets = std_iter_no_option, | ||
std_iter, | ||
vortex_iter, | ||
vortex_iter_flat, | ||
arrow_iter, | ||
); | ||
criterion_main!(benches); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you can probably disable the default-features for this import since you don't use ipc/json/csv encoding. probably saves a bit of compile time 🤷
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
clippy wants me to disable it at the top-level and then have every crate in the workspace pull its own members, which is probably a good idea