Skip to content

Commit

Permalink
Speed up the function min_max_string (#1374)
Browse files Browse the repository at this point in the history
* clean up the code

Signed-off-by: remzi <[email protected]>

* bring back the optimization when null count is zero

Signed-off-by: remzi <[email protected]>

* pretty the trait bound and update comment

Signed-off-by: remzi <[email protected]>

* use value_unchecked to replace array.value
10% extra speed up

Signed-off-by: remzi <[email protected]>

* update the performance data

Signed-off-by: remzi <[email protected]>
  • Loading branch information
HaoYang670 authored Mar 3, 2022
1 parent 258f828 commit c947027
Showing 1 changed file with 18 additions and 28 deletions.
46 changes: 18 additions & 28 deletions arrow/src/compute/kernels/aggregate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,39 +32,29 @@ fn is_nan<T: ArrowNativeType + PartialOrd + Copy>(a: T) -> bool {
!(a == a)
}

/// Helper macro to perform min/max of strings
fn min_max_string<T: StringOffsetSizeTrait, F: Fn(&str, &str) -> bool>(
array: &GenericStringArray<T>,
cmp: F,
) -> Option<&str> {
/// Helper function to perform min/max of strings
fn min_max_string<T, F>(array: &GenericStringArray<T>, cmp: F) -> Option<&str>
where
T: StringOffsetSizeTrait,
F: Fn(&str, &str) -> bool,
{
let null_count = array.null_count();

if null_count == array.len() {
return None;
}
let data = array.data();
let mut n;
if null_count == 0 {
n = array.value(0);
for i in 1..data.len() {
let item = array.value(i);
if cmp(n, item) {
n = item;
}
}
None
} else if null_count == 0 {
// JUSTIFICATION
// Benefit: ~8% speedup
// Soundness: `i` is always within the array bounds
(0..array.len())
.map(|i| unsafe { array.value_unchecked(i) })
.reduce(|acc, item| if cmp(acc, item) { item } else { acc })
} else {
n = "";
let mut has_value = false;

for i in 0..data.len() {
let item = array.value(i);
if data.is_valid(i) && (!has_value || cmp(n, item)) {
has_value = true;
n = item;
}
}
array
.iter()
.flatten()
.reduce(|acc, item| if cmp(acc, item) { item } else { acc })
}
Some(n)
}

/// Returns the minimum value in the array, according to the natural order.
Expand Down

0 comments on commit c947027

Please sign in to comment.