From 1a66136ec63b9dd08880bf703423a0017764a9ab Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 7 Jun 2022 13:39:23 -0700 Subject: [PATCH 1/2] Fix list equal for empty offset list array --- arrow/src/array/equal/list.rs | 16 ++++++++++++---- arrow/src/array/equal/mod.rs | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/arrow/src/array/equal/list.rs b/arrow/src/array/equal/list.rs index 65d320c0079d..1d5be30af4e3 100644 --- a/arrow/src/array/equal/list.rs +++ b/arrow/src/array/equal/list.rs @@ -73,11 +73,19 @@ pub(super) fn list_equal( // however, one is more likely to slice into a list array and get a region that has 0 // child values. // The test that triggered this behaviour had [4, 4] as a slice of 1 value slot. - let lhs_child_length = lhs_offsets[lhs_start + len].to_usize().unwrap() - - lhs_offsets[lhs_start].to_usize().unwrap(); + let lhs_child_length = if len == 0 { + 0 + } else { + lhs_offsets[lhs_start + len].to_usize().unwrap() + - lhs_offsets[lhs_start].to_usize().unwrap() + }; - let rhs_child_length = rhs_offsets[rhs_start + len].to_usize().unwrap() - - rhs_offsets[rhs_start].to_usize().unwrap(); + let rhs_child_length = if len == 0 { + 0 + } else { + rhs_offsets[rhs_start + len].to_usize().unwrap() + - rhs_offsets[rhs_start].to_usize().unwrap() + }; if lhs_child_length == 0 && lhs_child_length == rhs_child_length { return true; diff --git a/arrow/src/array/equal/mod.rs b/arrow/src/array/equal/mod.rs index b89a8fa53e0b..bc4530e4a7a6 100644 --- a/arrow/src/array/equal/mod.rs +++ b/arrow/src/array/equal/mod.rs @@ -629,6 +629,39 @@ mod tests { test_equal(&a, &b, false); } + #[test] + fn test_empty_offsets_list_equal() { + let empty: Vec = vec![]; + let values = Int32Array::from(empty); + let empty_offsets: [u8; 0] = []; + + let a = ArrayDataBuilder::new(DataType::List(Box::new(Field::new( + "item", + DataType::Int32, + true, + )))) + .len(0) + .add_buffer(Buffer::from(&empty_offsets)) + .add_child_data(values.data().clone()) + .null_bit_buffer(Some(Buffer::from(&empty_offsets))) + .build() + .unwrap(); + + let b = ArrayDataBuilder::new(DataType::List(Box::new(Field::new( + "item", + DataType::Int32, + true, + )))) + .len(0) + .add_buffer(Buffer::from(&empty_offsets)) + .add_child_data(values.data().clone()) + .null_bit_buffer(Some(Buffer::from(&empty_offsets))) + .build() + .unwrap(); + + test_equal(&a, &b, true); + } + // Test the case where null_count > 0 #[test] fn test_list_null() { From fbead0dffd50d4da478b98d8a51a7db6c271b7dd Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 8 Jun 2022 09:20:14 -0700 Subject: [PATCH 2/2] For review --- arrow/src/array/equal/list.rs | 21 +++++++++------------ arrow/src/array/equal/mod.rs | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/arrow/src/array/equal/list.rs b/arrow/src/array/equal/list.rs index 1d5be30af4e3..0feefa7aa11a 100644 --- a/arrow/src/array/equal/list.rs +++ b/arrow/src/array/equal/list.rs @@ -73,19 +73,16 @@ pub(super) fn list_equal( // however, one is more likely to slice into a list array and get a region that has 0 // child values. // The test that triggered this behaviour had [4, 4] as a slice of 1 value slot. - let lhs_child_length = if len == 0 { - 0 - } else { - lhs_offsets[lhs_start + len].to_usize().unwrap() - - lhs_offsets[lhs_start].to_usize().unwrap() - }; + // For the edge case that zero length list arrays are always equal. + if len == 0 { + return true; + } - let rhs_child_length = if len == 0 { - 0 - } else { - rhs_offsets[rhs_start + len].to_usize().unwrap() - - rhs_offsets[rhs_start].to_usize().unwrap() - }; + let lhs_child_length = lhs_offsets[lhs_start + len].to_usize().unwrap() + - lhs_offsets[lhs_start].to_usize().unwrap(); + + let rhs_child_length = rhs_offsets[rhs_start + len].to_usize().unwrap() + - rhs_offsets[rhs_start].to_usize().unwrap(); if lhs_child_length == 0 && lhs_child_length == rhs_child_length { return true; diff --git a/arrow/src/array/equal/mod.rs b/arrow/src/array/equal/mod.rs index bc4530e4a7a6..c3b0bbc95c2b 100644 --- a/arrow/src/array/equal/mod.rs +++ b/arrow/src/array/equal/mod.rs @@ -660,6 +660,24 @@ mod tests { .unwrap(); test_equal(&a, &b, true); + + let c = ArrayDataBuilder::new(DataType::List(Box::new(Field::new( + "item", + DataType::Int32, + true, + )))) + .len(0) + .add_buffer(Buffer::from(vec![0i32, 2, 3, 4, 6, 7, 8].to_byte_slice())) + .add_child_data( + Int32Array::from(vec![1, 2, -1, -2, 3, 4, -3, -4]) + .data() + .clone(), + ) + .null_bit_buffer(Some(Buffer::from(vec![0b00001001]))) + .build() + .unwrap(); + + test_equal(&a, &c, true); } // Test the case where null_count > 0