Skip to content

Commit

Permalink
Fix ignored limit on lexsort_to_indices (apache#2991)
Browse files Browse the repository at this point in the history
* Fix ignored limit on lexsort_to_indices

* Update comments

* Update arrow/src/compute/kernels/sort.rs

Co-authored-by: Batuhan Taskaya <[email protected]>

Co-authored-by: Batuhan Taskaya <[email protected]>
  • Loading branch information
alamb and isidentical committed Oct 31, 2022
1 parent 7798043 commit fb2b49c
Showing 1 changed file with 33 additions and 7 deletions.
40 changes: 33 additions & 7 deletions arrow/src/compute/kernels/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -950,7 +950,7 @@ pub fn lexsort_to_indices(
});

Ok(UInt32Array::from_iter_values(
value_indices.iter().map(|i| *i as u32),
value_indices.iter().take(len).map(|i| *i as u32),
))
}

Expand Down Expand Up @@ -1422,6 +1422,18 @@ mod tests {
}
}

/// slice all arrays in expected_output to offset/length
fn slice_arrays(
expected_output: Vec<ArrayRef>,
offset: usize,
length: usize,
) -> Vec<ArrayRef> {
expected_output
.into_iter()
.map(|array| array.slice(offset, length))
.collect()
}

fn test_sort_binary_arrays(
data: Vec<Option<Vec<u8>>>,
options: Option<SortOptions>,
Expand Down Expand Up @@ -3439,8 +3451,10 @@ mod tests {
Some(2),
Some(17),
])) as ArrayRef];
test_lex_sort_arrays(input.clone(), expected, None);
test_lex_sort_arrays(input.clone(), expected.clone(), None);
test_lex_sort_arrays(input.clone(), slice_arrays(expected, 0, 2), Some(2));

// Explicitly test a limit on the sort as a demonstration
let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(-1),
Some(0),
Expand Down Expand Up @@ -3519,7 +3533,8 @@ mod tests {
Some(-2),
])) as ArrayRef,
];
test_lex_sort_arrays(input, expected, None);
test_lex_sort_arrays(input.clone(), expected.clone(), None);
test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));

// test mix of string and in64 with option
let input = vec![
Expand Down Expand Up @@ -3562,7 +3577,8 @@ mod tests {
Some("7"),
])) as ArrayRef,
];
test_lex_sort_arrays(input, expected, None);
test_lex_sort_arrays(input.clone(), expected.clone(), None);
test_lex_sort_arrays(input, slice_arrays(expected, 0, 3), Some(3));

// test sort with nulls first
let input = vec![
Expand Down Expand Up @@ -3605,7 +3621,8 @@ mod tests {
Some("world"),
])) as ArrayRef,
];
test_lex_sort_arrays(input, expected, None);
test_lex_sort_arrays(input.clone(), expected.clone(), None);
test_lex_sort_arrays(input, slice_arrays(expected, 0, 1), Some(1));

// test sort with nulls last
let input = vec![
Expand Down Expand Up @@ -3648,7 +3665,8 @@ mod tests {
None,
])) as ArrayRef,
];
test_lex_sort_arrays(input, expected, None);
test_lex_sort_arrays(input.clone(), expected.clone(), None);
test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));

// test sort with opposite options
let input = vec![
Expand Down Expand Up @@ -3695,7 +3713,15 @@ mod tests {
Some("foo"),
])) as ArrayRef,
];
test_lex_sort_arrays(input, expected, None);
test_lex_sort_arrays(input.clone(), expected.clone(), None);
test_lex_sort_arrays(
input.clone(),
slice_arrays(expected.clone(), 0, 5),
Some(5),
);

// Limiting by more rows than present is ok
test_lex_sort_arrays(input, slice_arrays(expected, 0, 5), Some(10));
}

#[test]
Expand Down

0 comments on commit fb2b49c

Please sign in to comment.