From b4815814e8b3f2135de53ea1539e7a1281bd13a3 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sat, 30 Oct 2021 07:49:55 -0400 Subject: [PATCH] Update union array to new null handling --- arrow/src/array/array.rs | 4 +- arrow/src/array/array_union.rs | 53 +++++---- arrow/src/array/builder.rs | 14 ++- arrow/src/array/data.rs | 178 ++++++++++++++++++++++++++---- arrow/src/array/equal/mod.rs | 2 +- arrow/src/array/equal/utils.rs | 2 +- arrow/src/compute/kernels/cast.rs | 11 +- arrow/src/datatypes/datatype.rs | 11 +- arrow/src/datatypes/field.rs | 6 +- arrow/src/datatypes/mod.rs | 35 +++--- arrow/src/ipc/writer.rs | 2 +- parquet/src/arrow/arrow_writer.rs | 2 +- parquet/src/arrow/levels.rs | 6 +- parquet/src/arrow/schema.rs | 2 +- 14 files changed, 247 insertions(+), 81 deletions(-) diff --git a/arrow/src/array/array.rs b/arrow/src/array/array.rs index 7f790ef8f796..ce3751def8a4 100644 --- a/arrow/src/array/array.rs +++ b/arrow/src/array/array.rs @@ -301,7 +301,7 @@ pub fn make_array(data: ArrayData) -> ArrayRef { DataType::LargeList(_) => Arc::new(LargeListArray::from(data)) as ArrayRef, DataType::Struct(_) => Arc::new(StructArray::from(data)) as ArrayRef, DataType::Map(_, _) => Arc::new(MapArray::from(data)) as ArrayRef, - DataType::Union(_) => Arc::new(UnionArray::from(data)) as ArrayRef, + DataType::Union(_, _) => Arc::new(UnionArray::from(data)) as ArrayRef, DataType::FixedSizeList(_, _) => { Arc::new(FixedSizeListArray::from(data)) as ArrayRef } @@ -472,7 +472,7 @@ pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef { DataType::Map(field, _keys_sorted) => { new_null_list_array::(data_type, field.data_type(), length) } - DataType::Union(_) => { + DataType::Union(_, _) => { unimplemented!("Creating null Union array not yet supported") } DataType::Dictionary(key, value) => { diff --git a/arrow/src/array/array_union.rs b/arrow/src/array/array_union.rs index 56efcfb30c75..d43310eaf9fc 100644 --- a/arrow/src/array/array_union.rs +++ b/arrow/src/array/array_union.rs @@ -17,7 +17,7 @@ /// Contains the `UnionArray` type. /// -use crate::array::{data::count_nulls, make_array, Array, ArrayData, ArrayRef}; +use crate::array::{make_array, Array, ArrayData, ArrayRef}; use crate::buffer::Buffer; use crate::datatypes::*; use crate::error::{ArrowError, Result}; @@ -48,15 +48,15 @@ impl UnionArray { /// caller and assumes that each of the components are correct and consistent with each other. /// See `try_new` for an alternative that validates the data provided. /// - /// # Data Consistency + /// # Safety /// /// The `type_ids` `Buffer` should contain `i8` values. These values should be greater than /// zero and must be less than the number of children provided in `child_arrays`. These values /// are used to index into the `child_arrays`. /// /// The `value_offsets` `Buffer` is only provided in the case of a dense union, sparse unions - /// should use `None`. If provided the `value_offsets` `Buffer` should contain `i32` values. - /// These values should be greater than zero and must be less than the length of the overall + /// should use `None`. If provided the `value_offsets` `Buffer` should contain `i32` values + /// Thee values in this array should be greater than zero and must be less than the length of the overall /// array. /// /// In both cases above we use signed integer types to maintain compatibility with other @@ -65,7 +65,7 @@ impl UnionArray { /// In both of the cases above we are accepting `Buffer`'s which are assumed to be representing /// `i8` and `i32` values respectively. `Buffer` objects are untyped and no attempt is made /// to ensure that the data provided is valid. - pub fn new( + pub unsafe fn new_unchecked( type_ids: Buffer, value_offsets: Option, child_arrays: Vec<(Field, ArrayRef)>, @@ -74,22 +74,28 @@ impl UnionArray { let (field_types, field_values): (Vec<_>, Vec<_>) = child_arrays.into_iter().unzip(); let len = type_ids.len(); - let mut builder = ArrayData::builder(DataType::Union(field_types)) + + let mode = if value_offsets.is_some() { + UnionMode::Dense + } else { + UnionMode::Sparse + }; + + let mut builder = ArrayData::builder(DataType::Union(field_types, mode)) .add_buffer(type_ids) .child_data(field_values.into_iter().map(|a| a.data().clone()).collect()) .len(len); if let Some(bitmap) = bitmap_data { builder = builder.null_bit_buffer(bitmap) } - let data = unsafe { - match value_offsets { - Some(b) => builder.add_buffer(b).build_unchecked(), - None => builder.build_unchecked(), - } + let data = match value_offsets { + Some(b) => builder.add_buffer(b).build_unchecked(), + None => builder.build_unchecked(), }; Self::from(data) } - /// Attempts to create a new `UnionArray` and validates the inputs provided. + + /// Attempts to create a new `UnionArray`, validating the inputs provided. pub fn try_new( type_ids: Buffer, value_offsets: Option, @@ -97,8 +103,7 @@ impl UnionArray { bitmap: Option, ) -> Result { if let Some(b) = &value_offsets { - let nulls = count_nulls(bitmap.as_ref(), 0, type_ids.len()); - if ((type_ids.len() - nulls) * 4) != b.len() { + if ((type_ids.len()) * 4) != b.len() { return Err(ArrowError::InvalidArgumentError( "Type Ids and Offsets represent a different number of array slots." .to_string(), @@ -137,7 +142,10 @@ impl UnionArray { } } - let new_self = Self::new(type_ids, value_offsets, child_arrays, bitmap); + // Unsafe Justification: arguments were validated above (and + // re-revalidated as part of data().validate() below) + let new_self = + unsafe { Self::new_unchecked(type_ids, value_offsets, child_arrays, bitmap) }; new_self.data().validate()?; Ok(new_self) @@ -173,15 +181,9 @@ impl UnionArray { pub fn value_offset(&self, index: usize) -> i32 { assert!(index - self.offset() < self.len()); if self.is_dense() { - // In format v4 unions had their own validity bitmap and offsets are compressed by omitting null values - // Starting with v5 unions don't have a validity bitmap and it's possible to directly index into the offsets buffer - let valid_slots = match self.data.null_buffer() { - Some(b) => b.count_set_bits_offset(0, index), - None => index, - }; // safety: reinterpreting is safe since the offset buffer contains `i32` values and is // properly aligned. - unsafe { self.data().buffers()[1].typed_data::()[valid_slots] } + unsafe { self.data().buffers()[1].typed_data::()[index] } } else { index as i32 } @@ -202,7 +204,7 @@ impl UnionArray { /// Returns the names of the types in the union. pub fn type_names(&self) -> Vec<&str> { match self.data.data_type() { - DataType::Union(fields) => fields + DataType::Union(fields, _) => fields .iter() .map(|f| f.name().as_str()) .collect::>(), @@ -212,7 +214,10 @@ impl UnionArray { /// Returns whether the `UnionArray` is dense (or sparse if `false`). fn is_dense(&self) -> bool { - self.data().buffers().len() == 2 + match self.data.data_type() { + DataType::Union(_, mode) => mode == &UnionMode::Dense, + _ => unreachable!("Union array's data type is not a union!"), + } } } diff --git a/arrow/src/array/builder.rs b/arrow/src/array/builder.rs index 8a5ef6c42799..446967bdc0e0 100644 --- a/arrow/src/array/builder.rs +++ b/arrow/src/array/builder.rs @@ -2143,12 +2143,16 @@ impl UnionBuilder { self.type_id_builder.append(i8::default()); - // Handle sparse union - if self.value_offset_builder.is_none() { - for (_, fd) in self.fields.iter_mut() { - fd.append_null_dynamic()?; + match &mut self.value_offset_builder { + // Handle dense union + Some(value_offset_builder) => value_offset_builder.append(i32::default()), + // Handle sparse union + None => { + for (_, fd) in self.fields.iter_mut() { + fd.append_null_dynamic()?; + } } - } + }; self.len += 1; Ok(()) } diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs index 9b46a79ed799..e14a9a71c835 100644 --- a/arrow/src/array/data.rs +++ b/arrow/src/array/data.rs @@ -18,7 +18,7 @@ //! Contains `ArrayData`, a generic representation of Arrow array data which encapsulates //! common attributes and operations for Arrow array. -use crate::datatypes::{DataType, IntervalUnit}; +use crate::datatypes::{DataType, IntervalUnit, UnionMode}; use crate::error::{ArrowError, Result}; use crate::{bitmap::Bitmap, datatypes::ArrowNativeType}; use crate::{ @@ -194,7 +194,7 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuff MutableBuffer::new(capacity * mem::size_of::()), empty_buffer, ], - DataType::Union(_) => unimplemented!(), + DataType::Union(_, _) => unimplemented!(), } } @@ -560,7 +560,7 @@ impl ArrayData { DataType::Map(field, _) => { vec![Self::new_empty(field.data_type())] } - DataType::Union(_) => unimplemented!(), + DataType::Union(_, _) => unimplemented!(), DataType::Dictionary(_, data_type) => { vec![Self::new_empty(data_type)] } @@ -597,11 +597,6 @@ impl ArrayData { // Check that the data layout conforms to the spec let layout = layout(&self.data_type); - // Will validate Union when conforms to new spec: - // https://github.com/apache/arrow-rs/issues/85 - if matches!(&self.data_type, DataType::Union(_)) { - return Ok(()); - } if self.buffers.len() != layout.buffers.len() { return Err(ArrowError::InvalidArgumentError(format!( "Expected {} buffers in array of type {:?}, got {}", @@ -827,10 +822,21 @@ impl ArrayData { } Ok(()) } - DataType::Union(_fields) => { - // Validate Union Array as part of implementing new Union semantics - // See comments in `ArrayData::validate()` - // https://github.com/apache/arrow-rs/issues/85 + DataType::Union(fields, mode) => { + self.validate_num_child_data(fields.len())?; + + for (i, field) in fields.iter().enumerate() { + let field_data = self.get_valid_child_data(i, field.data_type())?; + + if mode == &UnionMode::Sparse + && field_data.len < (self.len + self.offset) + { + return Err(ArrowError::InvalidArgumentError(format!( + "Sparse union child array #{} has length smaller than expected for union array ({} < {})", + i, field_data.len, self.len + self.offset + ))); + } + } Ok(()) } DataType::Dictionary(_key_type, value_type) => { @@ -951,10 +957,12 @@ impl ArrayData { let child = &self.child_data[0]; self.validate_offsets_full::(child.len + child.offset)?; } - DataType::Union(_) => { + DataType::Union(_, _) => { // Validate Union Array as part of implementing new Union semantics // See comments in `ArrayData::validate()` // https://github.com/apache/arrow-rs/issues/85 + // + // TODO file follow on ticket for full union validation } DataType::Dictionary(key_type, _value_type) => { let dictionary_length: i64 = self.child_data[0].len.try_into().unwrap(); @@ -1200,11 +1208,26 @@ fn layout(data_type: &DataType) -> DataTypeLayout { DataType::FixedSizeList(_, _) => DataTypeLayout::new_empty(), // all in child data DataType::LargeList(_) => DataTypeLayout::new_fixed_width(size_of::()), DataType::Struct(_) => DataTypeLayout::new_empty(), // all in child data, - DataType::Union(_) => { - DataTypeLayout::new_fixed_width(size_of::()) - // Note sparse unions only have one buffer (u8) type_ids, - // and dense unions have 2 (type_ids as well as offsets). - // https://github.com/apache/arrow-rs/issues/85 + DataType::Union(_, mode) => { + let type_ids = BufferSpec::FixedWidth { + byte_width: size_of::(), + }; + + DataTypeLayout { + buffers: match mode { + UnionMode::Sparse => { + vec![type_ids] + } + UnionMode::Dense => { + vec![ + type_ids, + BufferSpec::FixedWidth { + byte_width: size_of::(), + }, + ] + } + }, + } } DataType::Dictionary(key_type, _value_type) => layout(key_type), DataType::Decimal(_, _) => { @@ -1389,8 +1412,8 @@ mod tests { use super::*; use crate::array::{ - Array, BooleanBuilder, Int32Array, Int32Builder, StringArray, StructBuilder, - UInt64Array, + Array, BooleanBuilder, Int32Array, Int32Builder, Int64Array, StringArray, + StructBuilder, UInt64Array, }; use crate::buffer::Buffer; use crate::datatypes::Field; @@ -2272,6 +2295,121 @@ mod tests { Buffer::from_slice_ref(&vec![42f32; n]) } + #[test] + #[should_panic(expected = "Expected Int64 but child data had Int32")] + fn test_validate_union_different_types() { + let field1 = vec![Some(1), Some(2)].into_iter().collect::(); + + let field2 = vec![Some(1), Some(2)].into_iter().collect::(); + + let type_ids = Buffer::from_slice_ref(&[0i8, 1i8]); + + ArrayData::try_new( + DataType::Union( + vec![ + Field::new("field1", DataType::Int32, true), + Field::new("field2", DataType::Int64, true), // data is int32 + ], + UnionMode::Sparse, + ), + 2, + None, + None, + 0, + vec![type_ids], + vec![field1.data().clone(), field2.data().clone()], + ) + .unwrap(); + } + + // sparse with wrong sized children + #[test] + #[should_panic( + expected = "Sparse union child array #1 has length smaller than expected for union array (1 < 2)" + )] + fn test_validate_union_sparse_different_child_len() { + let field1 = vec![Some(1), Some(2)].into_iter().collect::(); + + // field 2 only has 1 item but array should have 2 + let field2 = vec![Some(1)].into_iter().collect::(); + + let type_ids = Buffer::from_slice_ref(&[0i8, 1i8]); + + ArrayData::try_new( + DataType::Union( + vec![ + Field::new("field1", DataType::Int32, true), + Field::new("field2", DataType::Int64, true), + ], + UnionMode::Sparse, + ), + 2, + None, + None, + 0, + vec![type_ids], + vec![field1.data().clone(), field2.data().clone()], + ) + .unwrap(); + } + + #[test] + #[should_panic(expected = "Expected 2 buffers in array of type Union")] + fn test_validate_union_dense_without_offsets() { + let field1 = vec![Some(1), Some(2)].into_iter().collect::(); + + let field2 = vec![Some(1)].into_iter().collect::(); + + let type_ids = Buffer::from_slice_ref(&[0i8, 1i8]); + + ArrayData::try_new( + DataType::Union( + vec![ + Field::new("field1", DataType::Int32, true), + Field::new("field2", DataType::Int64, true), + ], + UnionMode::Dense, + ), + 2, + None, + None, + 0, + vec![type_ids], // need offsets buffer here too + vec![field1.data().clone(), field2.data().clone()], + ) + .unwrap(); + } + + #[test] + #[should_panic( + expected = "Need at least 8 bytes in buffers[1] in array of type Union" + )] + fn test_validate_union_dense_with_bad_len() { + let field1 = vec![Some(1), Some(2)].into_iter().collect::(); + + let field2 = vec![Some(1)].into_iter().collect::(); + + let type_ids = Buffer::from_slice_ref(&[0i8, 1i8]); + let offsets = Buffer::from_slice_ref(&[0i32]); // should have 2 offsets, but only have 1 + + ArrayData::try_new( + DataType::Union( + vec![ + Field::new("field1", DataType::Int32, true), + Field::new("field2", DataType::Int64, true), + ], + UnionMode::Dense, + ), + 2, + None, + None, + 0, + vec![type_ids, offsets], + vec![field1.data().clone(), field2.data().clone()], + ) + .unwrap(); + } + #[test] fn test_try_new_sliced_struct() { let mut builder = StructBuilder::new( diff --git a/arrow/src/array/equal/mod.rs b/arrow/src/array/equal/mod.rs index 9a044e612906..742eeecd6d50 100644 --- a/arrow/src/array/equal/mod.rs +++ b/arrow/src/array/equal/mod.rs @@ -226,7 +226,7 @@ fn equal_values( DataType::Struct(_) => { struct_equal(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len) } - DataType::Union(_) => unimplemented!("See ARROW-8576"), + DataType::Union(_, _) => unimplemented!("See ARROW-8576"), DataType::Dictionary(data_type, _) => match data_type.as_ref() { DataType::Int8 => dictionary_equal::( lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len, diff --git a/arrow/src/array/equal/utils.rs b/arrow/src/array/equal/utils.rs index 7ce8e14993e7..819ae32c5709 100644 --- a/arrow/src/array/equal/utils.rs +++ b/arrow/src/array/equal/utils.rs @@ -161,7 +161,7 @@ pub(super) fn child_logical_null_buffer( }); Some(buffer.into()) } - DataType::Union(_) => { + DataType::Union(_, _) => { unimplemented!("Logical equality not yet implemented for union arrays") } DataType::Dictionary(_, _) => { diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs index 44fbfae52b07..aef27c3ee7f5 100644 --- a/arrow/src/compute/kernels/cast.rs +++ b/arrow/src/compute/kernels/cast.rs @@ -4135,10 +4135,13 @@ mod tests { Field::new("f1", DataType::Int32, false), Field::new("f2", DataType::Utf8, true), ]), - Union(vec![ - Field::new("f1", DataType::Int32, false), - Field::new("f2", DataType::Utf8, true), - ]), + Union( + vec![ + Field::new("f1", DataType::Int32, false), + Field::new("f2", DataType::Utf8, true), + ], + UnionMode::Dense, + ), Dictionary(Box::new(DataType::Int8), Box::new(DataType::Int32)), Dictionary(Box::new(DataType::Int16), Box::new(DataType::Utf8)), Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)), diff --git a/arrow/src/datatypes/datatype.rs b/arrow/src/datatypes/datatype.rs index ae61f0831522..3653ebb9a82c 100644 --- a/arrow/src/datatypes/datatype.rs +++ b/arrow/src/datatypes/datatype.rs @@ -115,7 +115,7 @@ pub enum DataType { /// A nested datatype that contains a number of sub-fields. Struct(Vec), /// A nested datatype that can represent slots of differing types. - Union(Vec), + Union(Vec, UnionMode), /// A dictionary encoded array (`key_type`, `value_type`), where /// each array element is an index of `key_type` into an /// associated dictionary of `value_type`. @@ -176,6 +176,13 @@ pub enum IntervalUnit { MonthDayNano, } +// Sparse or Dense union layouts +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum UnionMode { + Sparse, + Dense, +} + impl fmt::Display for DataType { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{:?}", self) @@ -406,7 +413,7 @@ impl DataType { json!({"name": "fixedsizebinary", "byteWidth": byte_width}) } DataType::Struct(_) => json!({"name": "struct"}), - DataType::Union(_) => json!({"name": "union"}), + DataType::Union(_, _) => json!({"name": "union"}), DataType::List(_) => json!({ "name": "list"}), DataType::LargeList(_) => json!({ "name": "largelist"}), DataType::FixedSizeList(_, length) => { diff --git a/arrow/src/datatypes/field.rs b/arrow/src/datatypes/field.rs index 22e23faa63fd..edf01a263cd1 100644 --- a/arrow/src/datatypes/field.rs +++ b/arrow/src/datatypes/field.rs @@ -111,7 +111,7 @@ impl Field { pub(crate) fn fields(&self) -> Vec<&Field> { let mut collected_fields = vec![self]; match &self.data_type { - DataType::Struct(fields) | DataType::Union(fields) => { + DataType::Struct(fields) | DataType::Union(fields, _) => { collected_fields.extend(fields.iter().map(|f| f.fields()).flatten()) } DataType::List(field) @@ -484,8 +484,8 @@ impl Field { )); } }, - DataType::Union(nested_fields) => match &from.data_type { - DataType::Union(from_nested_fields) => { + DataType::Union(nested_fields, _) => match &from.data_type { + DataType::Union(from_nested_fields, _) => { for from_field in from_nested_fields { let mut is_new_field = true; for self_field in nested_fields.iter_mut() { diff --git a/arrow/src/datatypes/mod.rs b/arrow/src/datatypes/mod.rs index bc866b0145d7..bcbef58d3283 100644 --- a/arrow/src/datatypes/mod.rs +++ b/arrow/src/datatypes/mod.rs @@ -1379,28 +1379,37 @@ mod tests { Schema::try_merge(vec![ Schema::new(vec![Field::new( "c1", - DataType::Union(vec![ - Field::new("c11", DataType::Utf8, true), - Field::new("c12", DataType::Utf8, true), - ]), + DataType::Union( + vec![ + Field::new("c11", DataType::Utf8, true), + Field::new("c12", DataType::Utf8, true), + ], + UnionMode::Dense + ), false ),]), Schema::new(vec![Field::new( "c1", - DataType::Union(vec![ - Field::new("c12", DataType::Utf8, true), - Field::new("c13", DataType::Time64(TimeUnit::Second), true), - ]), + DataType::Union( + vec![ + Field::new("c12", DataType::Utf8, true), + Field::new("c13", DataType::Time64(TimeUnit::Second), true), + ], + UnionMode::Dense + ), false ),]) ])?, Schema::new(vec![Field::new( "c1", - DataType::Union(vec![ - Field::new("c11", DataType::Utf8, true), - Field::new("c12", DataType::Utf8, true), - Field::new("c13", DataType::Time64(TimeUnit::Second), true), - ]), + DataType::Union( + vec![ + Field::new("c11", DataType::Utf8, true), + Field::new("c12", DataType::Utf8, true), + Field::new("c13", DataType::Time64(TimeUnit::Second), true), + ], + UnionMode::Dense + ), false ),]), ); diff --git a/arrow/src/ipc/writer.rs b/arrow/src/ipc/writer.rs index c354eb4890a2..7316209b36fb 100644 --- a/arrow/src/ipc/writer.rs +++ b/arrow/src/ipc/writer.rs @@ -159,7 +159,7 @@ impl IpcDataGenerator { )?; } } - DataType::Union(fields) => { + DataType::Union(fields, _) => { let union = as_union_array(column); for (field, ref column) in fields .iter() diff --git a/parquet/src/arrow/arrow_writer.rs b/parquet/src/arrow/arrow_writer.rs index 9f8742871026..82c6d03dd60e 100644 --- a/parquet/src/arrow/arrow_writer.rs +++ b/parquet/src/arrow/arrow_writer.rs @@ -224,7 +224,7 @@ fn write_leaves( ArrowDataType::Float16 => Err(ParquetError::ArrowError( "Float16 arrays not supported".to_string(), )), - ArrowDataType::FixedSizeList(_, _) | ArrowDataType::Union(_) => { + ArrowDataType::FixedSizeList(_, _) | ArrowDataType::Union(_, _) => { Err(ParquetError::NYI( format!( "Attempting to write an Arrow type {:?} to parquet that is not yet implemented", diff --git a/parquet/src/arrow/levels.rs b/parquet/src/arrow/levels.rs index c9b6052aeb87..601e2c0966da 100644 --- a/parquet/src/arrow/levels.rs +++ b/parquet/src/arrow/levels.rs @@ -241,7 +241,7 @@ impl LevelInfo { list_level.calculate_array_levels(&child_array, list_field) } DataType::FixedSizeList(_, _) => unimplemented!(), - DataType::Union(_) => unimplemented!(), + DataType::Union(_, _) => unimplemented!(), } } DataType::Map(map_field, _) => { @@ -304,7 +304,7 @@ impl LevelInfo { }); struct_levels } - DataType::Union(_) => unimplemented!(), + DataType::Union(_, _) => unimplemented!(), DataType::Dictionary(_, _) => { // Need to check for these cases not implemented in C++: // - "Writing DictionaryArray with nested dictionary type not yet supported" @@ -743,7 +743,7 @@ impl LevelInfo { array_mask, ) } - DataType::FixedSizeList(_, _) | DataType::Union(_) => { + DataType::FixedSizeList(_, _) | DataType::Union(_, _) => { unimplemented!("Getting offsets not yet implemented") } } diff --git a/parquet/src/arrow/schema.rs b/parquet/src/arrow/schema.rs index 5fe94cef94db..51a7a04aa3c6 100644 --- a/parquet/src/arrow/schema.rs +++ b/parquet/src/arrow/schema.rs @@ -536,7 +536,7 @@ fn arrow_to_parquet_type(field: &Field) -> Result { )) } } - DataType::Union(_) => unimplemented!("See ARROW-8817."), + DataType::Union(_, _) => unimplemented!("See ARROW-8817."), DataType::Dictionary(_, ref value) => { // Dictionary encoding not handled at the schema level let dict_field = Field::new(name, *value.clone(), field.is_nullable());