From 01743f3f10a377c1ca857cd554acbf84155766d8 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 27 Oct 2021 07:29:42 -0400 Subject: [PATCH] fix: fix a bug in offset calculation for unions (#863) (#871) The `value_offset` function only read the least significant byte in the offset array, causing issues with unions with more than 255 rows of any given variant. Fix the issue by reading the entire i32 offset and add a unit test. Co-authored-by: Helgi Kristvin Sigurbjarnarson --- arrow/src/array/array_union.rs | 47 ++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/arrow/src/array/array_union.rs b/arrow/src/array/array_union.rs index ba563ec796b4..6460e072646b 100644 --- a/arrow/src/array/array_union.rs +++ b/arrow/src/array/array_union.rs @@ -24,7 +24,6 @@ use crate::error::{ArrowError, Result}; use core::fmt; use std::any::Any; -use std::mem::size_of; /// An Array that can represent slots of varying types. /// @@ -177,7 +176,9 @@ impl UnionArray { Some(b) => b.count_set_bits_offset(0, index), None => index, }; - self.data().buffers()[1].as_slice()[valid_slots * size_of::()] as i32 + // safety: reinterpreting is safe since the offset buffer contains `i32` values and is + // properly aligned. + unsafe { self.data().buffers()[1].typed_data::()[valid_slots] } } else { index as i32 } @@ -334,6 +335,48 @@ mod tests { } } + #[test] + fn test_dense_i32_large() { + let mut builder = UnionBuilder::new_dense(1024); + + let expected_type_ids = vec![0_i8; 1024]; + let expected_value_offsets: Vec<_> = (0..1024).collect(); + let expected_array_values: Vec<_> = (1..=1024).collect(); + + expected_array_values + .iter() + .for_each(|v| builder.append::("a", *v).unwrap()); + + let union = builder.build().unwrap(); + + // Check type ids + assert_eq!( + union.data().buffers()[0], + Buffer::from_slice_ref(&expected_type_ids) + ); + for (i, id) in expected_type_ids.iter().enumerate() { + assert_eq!(id, &union.type_id(i)); + } + + // Check offsets + assert_eq!( + union.data().buffers()[1], + Buffer::from_slice_ref(&expected_value_offsets) + ); + for (i, id) in expected_value_offsets.iter().enumerate() { + assert_eq!(&union.value_offset(i), id); + } + + for (i, expected_value) in expected_array_values.iter().enumerate() { + assert!(!union.is_null(i)); + let slot = union.value(i); + let slot = slot.as_any().downcast_ref::().unwrap(); + assert_eq!(slot.len(), 1); + let value = slot.value(0); + assert_eq!(expected_value, &value); + } + } + #[test] fn test_dense_mixed() { let mut builder = UnionBuilder::new_dense(7);