From 14501b283fb839f6d6ec6df855ce42b1b1434c27 Mon Sep 17 00:00:00 2001
From: "Carol (Nichols || Goulding)" <carol.nichols@gmail.com>
Date: Tue, 10 Nov 2020 17:55:41 -0500
Subject: [PATCH] ARROW-10042: [Rust] Fix tests involving ArrayData/Buffer
 equality

The tests added, unignored, and modified in this PR should illustrate the behavior I'm going for.

This gets 2 ignored Parquet -> Arrow roundtrip tests to pass because they were only failing due to differences in their `ArrayData`'s `BufferData` capacities.

This leaves `BufferData`'s `PartialEq` implementation as-is (that is, taking the equality of `capacity` into account) and does not modify any of the tests in rust/arrow/src/array/{array, builder, union}.rs or in rust/arrow/src/compute/{kernels/cast, kernels/filter, util}.rs that are testing the internal implementation of `Buffer`.

This *does* change `PartialEq` for `ArrayData` to [not assert to fix the problems noted](https://github.com/apache/arrow/pull/8546#discussion_r513993661) by @jorgecarleitao .

This has the downside of making test failures where `ArrayData`s should be equal but aren't harder to interpret because the `assert_eq!`s aren't as granular. We can't use `debug_assert_eq!` either, because then we can't write tests that do `assert_ne!(array_data1, array_data2)` (I have included an example of such a test). I think to solve this downside, we should add a function only used in tests that does the more granular assertions. I am leaving that to a future PR.

Closes #8590 from carols10cents/bufferdata-eq

Authored-by: Carol (Nichols || Goulding) <carol.nichols@gmail.com>
Signed-off-by: alamb <andrew@nerdnetworks.org>
---
 rust/arrow/src/array/data.rs           |  7 +++++++
 rust/arrow/src/buffer.rs               | 14 +++++++++++---
 rust/parquet/src/arrow/arrow_writer.rs | 14 +-------------
 3 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/rust/arrow/src/array/data.rs b/rust/arrow/src/array/data.rs
index 839c20335fe7a..0aec4e614d93d 100644
--- a/rust/arrow/src/array/data.rs
+++ b/rust/arrow/src/array/data.rs
@@ -446,4 +446,11 @@ mod tests {
         assert_eq!(2, new_data.offset());
         assert_eq!(data.null_count() - 1, new_data.null_count());
     }
+
+    #[test]
+    fn test_equality() {
+        let int_data = ArrayData::builder(DataType::Int32).build();
+        let float_data = ArrayData::builder(DataType::Float32).build();
+        assert_ne!(int_data, float_data);
+    }
 }
diff --git a/rust/arrow/src/buffer.rs b/rust/arrow/src/buffer.rs
index f043b483681a9..a1c87297936f4 100644
--- a/rust/arrow/src/buffer.rs
+++ b/rust/arrow/src/buffer.rs
@@ -845,7 +845,7 @@ mod tests {
     #[test]
     fn test_buffer_data_equality() {
         let buf1 = Buffer::from(&[0, 1, 2, 3, 4]);
-        let mut buf2 = Buffer::from(&[0, 1, 2, 3, 4]);
+        let buf2 = Buffer::from(&[0, 1, 2, 3, 4]);
         assert_eq!(buf1, buf2);
 
         // slice with same offset should still preserve equality
@@ -854,12 +854,20 @@ mod tests {
         let buf4 = buf2.slice(2);
         assert_eq!(buf3, buf4);
 
+        // Different capacities should still preserve equality
+        let mut buf2 = MutableBuffer::new(65);
+        buf2.write_all(&[0, 1, 2, 3, 4])
+            .expect("write should be OK");
+
+        let buf2 = buf2.freeze();
+        assert_eq!(buf1, buf2);
+
         // unequal because of different elements
-        buf2 = Buffer::from(&[0, 0, 2, 3, 4]);
+        let buf2 = Buffer::from(&[0, 0, 2, 3, 4]);
         assert_ne!(buf1, buf2);
 
         // unequal because of different length
-        buf2 = Buffer::from(&[0, 1, 2, 3]);
+        let buf2 = Buffer::from(&[0, 1, 2, 3]);
         assert_ne!(buf1, buf2);
     }
 
diff --git a/rust/parquet/src/arrow/arrow_writer.rs b/rust/parquet/src/arrow/arrow_writer.rs
index c0b2105c2dfea..f2c907baa6179 100644
--- a/rust/parquet/src/arrow/arrow_writer.rs
+++ b/rust/parquet/src/arrow/arrow_writer.rs
@@ -911,17 +911,7 @@ mod tests {
             let expected_data = expected_batch.column(i).data();
             let actual_data = actual_batch.column(i).data();
 
-            assert_eq!(expected_data.data_type(), actual_data.data_type());
-            assert_eq!(expected_data.len(), actual_data.len());
-            assert_eq!(expected_data.null_count(), actual_data.null_count());
-            assert_eq!(expected_data.offset(), actual_data.offset());
-            assert_eq!(expected_data.buffers(), actual_data.buffers());
-            assert_eq!(expected_data.child_data(), actual_data.child_data());
-            // Null counts should be the same, not necessarily bitmaps
-            // A null bitmap is optional if an array has no nulls
-            if expected_data.null_count() != 0 {
-                assert_eq!(expected_data.null_bitmap(), actual_data.null_bitmap());
-            }
+            assert_eq!(expected_data, actual_data);
         }
     }
 
@@ -1198,7 +1188,6 @@ mod tests {
     }
 
     #[test]
-    #[ignore] // Binary support isn't correct yet - buffers don't match
     fn binary_single_column() {
         let one_vec: Vec<u8> = (0..SMALL_SIZE as u8).collect();
         let many_vecs: Vec<_> = std::iter::repeat(one_vec).take(SMALL_SIZE).collect();
@@ -1209,7 +1198,6 @@ mod tests {
     }
 
     #[test]
-    #[ignore] // Large binary support isn't correct yet - buffers don't match
     fn large_binary_single_column() {
         let one_vec: Vec<u8> = (0..SMALL_SIZE as u8).collect();
         let many_vecs: Vec<_> = std::iter::repeat(one_vec).take(SMALL_SIZE).collect();