diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs index bd45c4f8ddda..16637570f520 100644 --- a/arrow-data/src/data.rs +++ b/arrow-data/src/data.rs @@ -118,12 +118,20 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuff buffer.push(0i32); [buffer, empty_buffer] } + DataType::ListView(_) => [ + MutableBuffer::new(capacity * mem::size_of::()), + MutableBuffer::new(capacity * mem::size_of::()), + ], DataType::LargeList(_) => { // offset buffer always starts with a zero let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::()); buffer.push(0i64); [buffer, empty_buffer] } + DataType::LargeListView(_) => [ + MutableBuffer::new(capacity * mem::size_of::()), + MutableBuffer::new(capacity * mem::size_of::()), + ], DataType::FixedSizeBinary(size) => { [MutableBuffer::new(capacity * *size as usize), empty_buffer] } @@ -1549,6 +1557,9 @@ pub fn layout(data_type: &DataType) -> DataTypeLayout { } DataType::FixedSizeList(_, _) => DataTypeLayout::new_empty(), // all in child data DataType::List(_) => DataTypeLayout::new_fixed_width::(), + DataType::ListView(_) | DataType::LargeListView(_) => { + unimplemented!("ListView/LargeListView not implemented") + } DataType::LargeList(_) => DataTypeLayout::new_fixed_width::(), DataType::Map(_, _) => DataTypeLayout::new_fixed_width::(), DataType::Struct(_) => DataTypeLayout::new_empty(), // all in child data, diff --git a/arrow-data/src/equal/mod.rs b/arrow-data/src/equal/mod.rs index 1255ff39e097..0987fd4c5637 100644 --- a/arrow-data/src/equal/mod.rs +++ b/arrow-data/src/equal/mod.rs @@ -100,6 +100,9 @@ fn equal_values( unimplemented!("BinaryView/Utf8View not yet implemented") } DataType::List(_) => list_equal::(lhs, rhs, lhs_start, rhs_start, len), + DataType::ListView(_) | DataType::LargeListView(_) => { + unimplemented!("ListView/LargeListView not yet implemented") + } DataType::LargeList(_) => list_equal::(lhs, rhs, lhs_start, rhs_start, len), DataType::FixedSizeList(_, _) => fixed_list_equal(lhs, rhs, lhs_start, rhs_start, len), DataType::Struct(_) => struct_equal(lhs, rhs, lhs_start, rhs_start, len), diff --git a/arrow-data/src/transform/mod.rs b/arrow-data/src/transform/mod.rs index ef53efac2373..b14f6e771033 100644 --- a/arrow-data/src/transform/mod.rs +++ b/arrow-data/src/transform/mod.rs @@ -228,6 +228,9 @@ fn build_extend(array: &ArrayData) -> Extend { unimplemented!("BinaryView/Utf8View not implemented") } DataType::Map(_, _) | DataType::List(_) => list::build_extend::(array), + DataType::ListView(_) | DataType::LargeListView(_) => { + unimplemented!("ListView/LargeListView not implemented") + } DataType::LargeList(_) => list::build_extend::(array), DataType::Dictionary(_, _) => unreachable!("should use build_extend_dictionary"), DataType::Struct(_) => structure::build_extend(array), @@ -273,6 +276,9 @@ fn build_extend_nulls(data_type: &DataType) -> ExtendNulls { unimplemented!("BinaryView/Utf8View not implemented") } DataType::Map(_, _) | DataType::List(_) => list::extend_nulls::, + DataType::ListView(_) | DataType::LargeListView(_) => { + unimplemented!("ListView/LargeListView not implemented") + } DataType::LargeList(_) => list::extend_nulls::, DataType::Dictionary(child_data_type, _) => match child_data_type.as_ref() { DataType::UInt8 => primitive::extend_nulls::, @@ -428,6 +434,9 @@ impl<'a> MutableArrayData<'a> { DataType::BinaryView | DataType::Utf8View => { unimplemented!("BinaryView/Utf8View not implemented") } + DataType::ListView(_) | DataType::LargeListView(_) => { + unimplemented!("ListView/LargeListView not implemented") + } DataType::Map(_, _) | DataType::List(_) | DataType::LargeList(_) => { let children = arrays .iter() diff --git a/arrow-integration-test/src/datatype.rs b/arrow-integration-test/src/datatype.rs index a04db1cf3538..e45e94c24e07 100644 --- a/arrow-integration-test/src/datatype.rs +++ b/arrow-integration-test/src/datatype.rs @@ -281,6 +281,9 @@ pub fn data_type_to_json(data_type: &DataType) -> serde_json::Value { DataType::Union(_, _) => json!({"name": "union"}), DataType::List(_) => json!({ "name": "list"}), DataType::LargeList(_) => json!({ "name": "largelist"}), + DataType::ListView(_) | DataType::LargeListView(_) => { + unimplemented!("ListView/LargeListView not implemented") + } DataType::FixedSizeList(_, length) => { json!({"name":"fixedsizelist", "listSize": length}) } diff --git a/arrow-ipc/src/convert.rs b/arrow-ipc/src/convert.rs index a2ffd4380203..a821008d89ab 100644 --- a/arrow-ipc/src/convert.rs +++ b/arrow-ipc/src/convert.rs @@ -664,6 +664,7 @@ pub(crate) fn get_fb_field_type<'a>( children: Some(fbb.create_vector(&[child])), } } + ListView(_) | LargeListView(_) => unimplemented!("ListView/LargeListView not implemented"), LargeList(ref list_type) => { let child = build_field(fbb, list_type); FBFieldType { diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs index 89c001b0e657..449d363db671 100644 --- a/arrow-schema/src/datatype.rs +++ b/arrow-schema/src/datatype.rs @@ -228,12 +228,30 @@ pub enum DataType { /// /// A single List array can store up to [`i32::MAX`] elements in total. List(FieldRef), + + /// (NOT YET FULLY SUPPORTED) A list of some logical data type with variable length. + /// + /// Note this data type is not yet fully supported. Using it with arrow APIs may result in `panic`s. + /// + /// The ListView layout is defined by three buffers: + /// a validity bitmap, an offsets buffer, and an additional sizes buffer. + /// Sizes and offsets are both 32 bits for this type + ListView(FieldRef), /// A list of some logical data type with fixed length. FixedSizeList(FieldRef, i32), /// A list of some logical data type with variable length and 64-bit offsets. /// /// A single LargeList array can store up to [`i64::MAX`] elements in total. LargeList(FieldRef), + + /// (NOT YET FULLY SUPPORTED) A list of some logical data type with variable length and 64-bit offsets. + /// + /// Note this data type is not yet fully supported. Using it with arrow APIs may result in `panic`s. + /// + /// The LargeListView layout is defined by three buffers: + /// a validity bitmap, an offsets buffer, and an additional sizes buffer. + /// Sizes and offsets are both 64 bits for this type + LargeListView(FieldRef), /// A nested datatype that contains a number of sub-fields. Struct(Fields), /// A nested datatype that can represent slots of differing types. Components: @@ -536,7 +554,11 @@ impl DataType { DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => None, DataType::Binary | DataType::LargeBinary | DataType::BinaryView => None, DataType::FixedSizeBinary(_) => None, - DataType::List(_) | DataType::LargeList(_) | DataType::Map(_, _) => None, + DataType::List(_) + | DataType::ListView(_) + | DataType::LargeList(_) + | DataType::LargeListView(_) + | DataType::Map(_, _) => None, DataType::FixedSizeList(_, _) => None, DataType::Struct(_) => None, DataType::Union(_, _) => None, @@ -581,8 +603,10 @@ impl DataType { | DataType::Decimal256(_, _) => 0, DataType::Timestamp(_, s) => s.as_ref().map(|s| s.len()).unwrap_or_default(), DataType::List(field) + | DataType::ListView(field) | DataType::FixedSizeList(field, _) | DataType::LargeList(field) + | DataType::LargeListView(field) | DataType::Map(field, _) => field.size(), DataType::Struct(fields) => fields.size(), DataType::Union(fields, _) => fields.size(), diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs index 0770cf41a02d..b84a2568ed8a 100644 --- a/arrow-schema/src/field.rs +++ b/arrow-schema/src/field.rs @@ -510,7 +510,9 @@ impl Field { | DataType::BinaryView | DataType::Interval(_) | DataType::LargeList(_) + | DataType::LargeListView(_) | DataType::List(_) + | DataType::ListView(_) | DataType::Map(_, _) | DataType::Dictionary(_, _) | DataType::RunEndEncoded(_, _) diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs index a8bef98d9e8c..4a78db05ed2d 100644 --- a/parquet/src/arrow/schema/mod.rs +++ b/parquet/src/arrow/schema/mod.rs @@ -528,6 +528,7 @@ fn arrow_to_parquet_type(field: &Field) -> Result { .with_id(id) .build() } + DataType::ListView(_) | DataType::LargeListView(_) => unimplemented!("ListView/LargeListView not implemented"), DataType::Struct(fields) => { if fields.is_empty() { return Err(