Skip to content

Commit

Permalink
Convince the compiler to auto-vectorize the range check in parquet Di…
Browse files Browse the repository at this point in the history
…ctionaryBuffer (#4453)
  • Loading branch information
jhorstmann authored Jun 27, 2023
1 parent 45cc770 commit c1656ff
Showing 1 changed file with 9 additions and 2 deletions.
11 changes: 9 additions & 2 deletions parquet/src/arrow/buffer/dictionary_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,15 @@ impl<K: ScalarValue + ArrowNativeType + Ord, V: ScalarValue + OffsetSizeTrait>
let min = K::from_usize(0).unwrap();
let max = K::from_usize(values.len()).unwrap();

// It may be possible to use SIMD here
if keys.as_slice().iter().any(|x| *x < min || *x >= max) {
// using copied and fold gets auto-vectorized since rust 1.70
// all/any would allow early exit on invalid values
// but in the happy case all values have to be checked anyway
if !keys
.as_slice()
.iter()
.copied()
.fold(true, |a, x| a && x >= min && x < max)
{
return Err(general_err!(
"dictionary key beyond bounds of dictionary: 0..{}",
values.len()
Expand Down

0 comments on commit c1656ff

Please sign in to comment.