From a2205afa0152ad57e75b1e06982fb66746102e4b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 31 Oct 2022 07:16:15 -0400 Subject: [PATCH 1/3] Fix ignored limit on lexsort_to_indices --- arrow/src/compute/kernels/sort.rs | 39 +++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/arrow/src/compute/kernels/sort.rs b/arrow/src/compute/kernels/sort.rs index b297622647e7..c988eb9394ae 100644 --- a/arrow/src/compute/kernels/sort.rs +++ b/arrow/src/compute/kernels/sort.rs @@ -950,7 +950,7 @@ pub fn lexsort_to_indices( }); Ok(UInt32Array::from_iter_values( - value_indices.iter().map(|i| *i as u32), + value_indices.iter().take(len).map(|i| *i as u32), )) } @@ -1422,6 +1422,18 @@ mod tests { } } + /// slace all arrays in expected_output to offset/length + fn slice_arrays( + expected_output: Vec, + offset: usize, + length: usize, + ) -> Vec { + expected_output + .into_iter() + .map(|array| array.slice(offset, length)) + .collect() + } + fn test_sort_binary_arrays( data: Vec>>, options: Option, @@ -3439,7 +3451,8 @@ mod tests { Some(2), Some(17), ])) as ArrayRef]; - test_lex_sort_arrays(input.clone(), expected, None); + test_lex_sort_arrays(input.clone(), expected.clone(), None); + test_lex_sort_arrays(input.clone(), slice_arrays(expected, 0, 2), Some(2)); let expected = vec![Arc::new(PrimitiveArray::::from(vec![ Some(-1), @@ -3519,7 +3532,8 @@ mod tests { Some(-2), ])) as ArrayRef, ]; - test_lex_sort_arrays(input, expected, None); + test_lex_sort_arrays(input.clone(), expected.clone(), None); + test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2)); // test mix of string and in64 with option let input = vec![ @@ -3562,7 +3576,8 @@ mod tests { Some("7"), ])) as ArrayRef, ]; - test_lex_sort_arrays(input, expected, None); + test_lex_sort_arrays(input.clone(), expected.clone(), None); + test_lex_sort_arrays(input, slice_arrays(expected, 0, 3), Some(3)); // test sort with nulls first let input = vec![ @@ -3605,7 +3620,8 @@ mod tests { Some("world"), ])) as ArrayRef, ]; - test_lex_sort_arrays(input, expected, None); + test_lex_sort_arrays(input.clone(), expected.clone(), None); + test_lex_sort_arrays(input, slice_arrays(expected, 0, 1), Some(1)); // test sort with nulls last let input = vec![ @@ -3648,7 +3664,8 @@ mod tests { None, ])) as ArrayRef, ]; - test_lex_sort_arrays(input, expected, None); + test_lex_sort_arrays(input.clone(), expected.clone(), None); + test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2)); // test sort with opposite options let input = vec![ @@ -3695,7 +3712,15 @@ mod tests { Some("foo"), ])) as ArrayRef, ]; - test_lex_sort_arrays(input, expected, None); + test_lex_sort_arrays(input.clone(), expected.clone(), None); + test_lex_sort_arrays( + input.clone(), + slice_arrays(expected.clone(), 0, 5), + Some(5), + ); + + // Limiting by more rows than present should is ok + test_lex_sort_arrays(input, slice_arrays(expected, 0, 5), Some(10)); } #[test] From 8afe667af933e4404f57be6fae596e7488347a00 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 31 Oct 2022 07:26:09 -0400 Subject: [PATCH 2/3] Update comments --- arrow/src/compute/kernels/sort.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arrow/src/compute/kernels/sort.rs b/arrow/src/compute/kernels/sort.rs index c988eb9394ae..f1f7e43b3140 100644 --- a/arrow/src/compute/kernels/sort.rs +++ b/arrow/src/compute/kernels/sort.rs @@ -3454,6 +3454,7 @@ mod tests { test_lex_sort_arrays(input.clone(), expected.clone(), None); test_lex_sort_arrays(input.clone(), slice_arrays(expected, 0, 2), Some(2)); + // Explicitly test a limit on the sort as a demonstration let expected = vec![Arc::new(PrimitiveArray::::from(vec![ Some(-1), Some(0), @@ -3719,7 +3720,7 @@ mod tests { Some(5), ); - // Limiting by more rows than present should is ok + // Limiting by more rows than present is ok test_lex_sort_arrays(input, slice_arrays(expected, 0, 5), Some(10)); } From 2b445180c84ed5c28c257ab6c015dcdc22a8011d Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 31 Oct 2022 08:39:25 -0400 Subject: [PATCH 3/3] Update arrow/src/compute/kernels/sort.rs Co-authored-by: Batuhan Taskaya --- arrow/src/compute/kernels/sort.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow/src/compute/kernels/sort.rs b/arrow/src/compute/kernels/sort.rs index f1f7e43b3140..a10e674ac9d1 100644 --- a/arrow/src/compute/kernels/sort.rs +++ b/arrow/src/compute/kernels/sort.rs @@ -1422,7 +1422,7 @@ mod tests { } } - /// slace all arrays in expected_output to offset/length + /// slice all arrays in expected_output to offset/length fn slice_arrays( expected_output: Vec, offset: usize,