From 0a3128fb76b92c6f26f934fdd5cd29841122d040 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn=20Horstmann?= Date: Tue, 27 Jun 2023 15:10:27 +0200 Subject: [PATCH] Convince the compiler to auto-vectorize the range check in parquet DictionaryBuffer --- parquet/src/arrow/buffer/dictionary_buffer.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/parquet/src/arrow/buffer/dictionary_buffer.rs b/parquet/src/arrow/buffer/dictionary_buffer.rs index 6344d9dd3145..a0a47e3b98f7 100644 --- a/parquet/src/arrow/buffer/dictionary_buffer.rs +++ b/parquet/src/arrow/buffer/dictionary_buffer.rs @@ -152,8 +152,15 @@ impl let min = K::from_usize(0).unwrap(); let max = K::from_usize(values.len()).unwrap(); - // It may be possible to use SIMD here - if keys.as_slice().iter().any(|x| *x < min || *x >= max) { + // using copied and fold gets auto-vectorized since rust 1.70 + // all/any would allow early exit on invalid values + // but in the happy case all values have to be checked anyway + if !keys + .as_slice() + .iter() + .copied() + .fold(true, |a, x| a && x >= min && x < max) + { return Err(general_err!( "dictionary key beyond bounds of dictionary: 0..{}", values.len()