From 14501b283fb839f6d6ec6df855ce42b1b1434c27 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Tue, 10 Nov 2020 17:55:41 -0500 Subject: [PATCH] ARROW-10042: [Rust] Fix tests involving ArrayData/Buffer equality The tests added, unignored, and modified in this PR should illustrate the behavior I'm going for. This gets 2 ignored Parquet -> Arrow roundtrip tests to pass because they were only failing due to differences in their `ArrayData`'s `BufferData` capacities. This leaves `BufferData`'s `PartialEq` implementation as-is (that is, taking the equality of `capacity` into account) and does not modify any of the tests in rust/arrow/src/array/{array, builder, union}.rs or in rust/arrow/src/compute/{kernels/cast, kernels/filter, util}.rs that are testing the internal implementation of `Buffer`. This *does* change `PartialEq` for `ArrayData` to [not assert to fix the problems noted](https://github.com/apache/arrow/pull/8546#discussion_r513993661) by @jorgecarleitao . This has the downside of making test failures where `ArrayData`s should be equal but aren't harder to interpret because the `assert_eq!`s aren't as granular. We can't use `debug_assert_eq!` either, because then we can't write tests that do `assert_ne!(array_data1, array_data2)` (I have included an example of such a test). I think to solve this downside, we should add a function only used in tests that does the more granular assertions. I am leaving that to a future PR. Closes #8590 from carols10cents/bufferdata-eq Authored-by: Carol (Nichols || Goulding) Signed-off-by: alamb --- rust/arrow/src/array/data.rs | 7 +++++++ rust/arrow/src/buffer.rs | 14 +++++++++++--- rust/parquet/src/arrow/arrow_writer.rs | 14 +------------- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/rust/arrow/src/array/data.rs b/rust/arrow/src/array/data.rs index 839c20335fe7a..0aec4e614d93d 100644 --- a/rust/arrow/src/array/data.rs +++ b/rust/arrow/src/array/data.rs @@ -446,4 +446,11 @@ mod tests { assert_eq!(2, new_data.offset()); assert_eq!(data.null_count() - 1, new_data.null_count()); } + + #[test] + fn test_equality() { + let int_data = ArrayData::builder(DataType::Int32).build(); + let float_data = ArrayData::builder(DataType::Float32).build(); + assert_ne!(int_data, float_data); + } } diff --git a/rust/arrow/src/buffer.rs b/rust/arrow/src/buffer.rs index f043b483681a9..a1c87297936f4 100644 --- a/rust/arrow/src/buffer.rs +++ b/rust/arrow/src/buffer.rs @@ -845,7 +845,7 @@ mod tests { #[test] fn test_buffer_data_equality() { let buf1 = Buffer::from(&[0, 1, 2, 3, 4]); - let mut buf2 = Buffer::from(&[0, 1, 2, 3, 4]); + let buf2 = Buffer::from(&[0, 1, 2, 3, 4]); assert_eq!(buf1, buf2); // slice with same offset should still preserve equality @@ -854,12 +854,20 @@ mod tests { let buf4 = buf2.slice(2); assert_eq!(buf3, buf4); + // Different capacities should still preserve equality + let mut buf2 = MutableBuffer::new(65); + buf2.write_all(&[0, 1, 2, 3, 4]) + .expect("write should be OK"); + + let buf2 = buf2.freeze(); + assert_eq!(buf1, buf2); + // unequal because of different elements - buf2 = Buffer::from(&[0, 0, 2, 3, 4]); + let buf2 = Buffer::from(&[0, 0, 2, 3, 4]); assert_ne!(buf1, buf2); // unequal because of different length - buf2 = Buffer::from(&[0, 1, 2, 3]); + let buf2 = Buffer::from(&[0, 1, 2, 3]); assert_ne!(buf1, buf2); } diff --git a/rust/parquet/src/arrow/arrow_writer.rs b/rust/parquet/src/arrow/arrow_writer.rs index c0b2105c2dfea..f2c907baa6179 100644 --- a/rust/parquet/src/arrow/arrow_writer.rs +++ b/rust/parquet/src/arrow/arrow_writer.rs @@ -911,17 +911,7 @@ mod tests { let expected_data = expected_batch.column(i).data(); let actual_data = actual_batch.column(i).data(); - assert_eq!(expected_data.data_type(), actual_data.data_type()); - assert_eq!(expected_data.len(), actual_data.len()); - assert_eq!(expected_data.null_count(), actual_data.null_count()); - assert_eq!(expected_data.offset(), actual_data.offset()); - assert_eq!(expected_data.buffers(), actual_data.buffers()); - assert_eq!(expected_data.child_data(), actual_data.child_data()); - // Null counts should be the same, not necessarily bitmaps - // A null bitmap is optional if an array has no nulls - if expected_data.null_count() != 0 { - assert_eq!(expected_data.null_bitmap(), actual_data.null_bitmap()); - } + assert_eq!(expected_data, actual_data); } } @@ -1198,7 +1188,6 @@ mod tests { } #[test] - #[ignore] // Binary support isn't correct yet - buffers don't match fn binary_single_column() { let one_vec: Vec = (0..SMALL_SIZE as u8).collect(); let many_vecs: Vec<_> = std::iter::repeat(one_vec).take(SMALL_SIZE).collect(); @@ -1209,7 +1198,6 @@ mod tests { } #[test] - #[ignore] // Large binary support isn't correct yet - buffers don't match fn large_binary_single_column() { let one_vec: Vec = (0..SMALL_SIZE as u8).collect(); let many_vecs: Vec<_> = std::iter::repeat(one_vec).take(SMALL_SIZE).collect();