Skip to content

Commit

Permalink
Add DataType::ListView and DataType::LargeListView (apache#5493)
Browse files Browse the repository at this point in the history
* Add DataType::ListView and DataType::LargeListView

* revert some file to main

* revert some file to main

* revert some file to main

* fix: listview buffer init

* cargo clippy

* cargo fmt

* fix buffer init

* Update arrow-schema/src/datatype.rs

Co-authored-by: Andrew Lamb <[email protected]>

* Update arrow-schema/src/datatype.rs

Co-authored-by: Andrew Lamb <[email protected]>

---------

Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
Kikkon and alamb authored Mar 13, 2024
1 parent 19a3bb0 commit ad3b4c9
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 1 deletion.
11 changes: 11 additions & 0 deletions arrow-data/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,20 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuff
buffer.push(0i32);
[buffer, empty_buffer]
}
DataType::ListView(_) => [
MutableBuffer::new(capacity * mem::size_of::<i32>()),
MutableBuffer::new(capacity * mem::size_of::<i32>()),
],
DataType::LargeList(_) => {
// offset buffer always starts with a zero
let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i64>());
buffer.push(0i64);
[buffer, empty_buffer]
}
DataType::LargeListView(_) => [
MutableBuffer::new(capacity * mem::size_of::<i64>()),
MutableBuffer::new(capacity * mem::size_of::<i64>()),
],
DataType::FixedSizeBinary(size) => {
[MutableBuffer::new(capacity * *size as usize), empty_buffer]
}
Expand Down Expand Up @@ -1549,6 +1557,9 @@ pub fn layout(data_type: &DataType) -> DataTypeLayout {
}
DataType::FixedSizeList(_, _) => DataTypeLayout::new_empty(), // all in child data
DataType::List(_) => DataTypeLayout::new_fixed_width::<i32>(),
DataType::ListView(_) | DataType::LargeListView(_) => {
unimplemented!("ListView/LargeListView not implemented")
}
DataType::LargeList(_) => DataTypeLayout::new_fixed_width::<i64>(),
DataType::Map(_, _) => DataTypeLayout::new_fixed_width::<i32>(),
DataType::Struct(_) => DataTypeLayout::new_empty(), // all in child data,
Expand Down
3 changes: 3 additions & 0 deletions arrow-data/src/equal/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ fn equal_values(
unimplemented!("BinaryView/Utf8View not yet implemented")
}
DataType::List(_) => list_equal::<i32>(lhs, rhs, lhs_start, rhs_start, len),
DataType::ListView(_) | DataType::LargeListView(_) => {
unimplemented!("ListView/LargeListView not yet implemented")
}
DataType::LargeList(_) => list_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
DataType::FixedSizeList(_, _) => fixed_list_equal(lhs, rhs, lhs_start, rhs_start, len),
DataType::Struct(_) => struct_equal(lhs, rhs, lhs_start, rhs_start, len),
Expand Down
9 changes: 9 additions & 0 deletions arrow-data/src/transform/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,9 @@ fn build_extend(array: &ArrayData) -> Extend {
unimplemented!("BinaryView/Utf8View not implemented")
}
DataType::Map(_, _) | DataType::List(_) => list::build_extend::<i32>(array),
DataType::ListView(_) | DataType::LargeListView(_) => {
unimplemented!("ListView/LargeListView not implemented")
}
DataType::LargeList(_) => list::build_extend::<i64>(array),
DataType::Dictionary(_, _) => unreachable!("should use build_extend_dictionary"),
DataType::Struct(_) => structure::build_extend(array),
Expand Down Expand Up @@ -273,6 +276,9 @@ fn build_extend_nulls(data_type: &DataType) -> ExtendNulls {
unimplemented!("BinaryView/Utf8View not implemented")
}
DataType::Map(_, _) | DataType::List(_) => list::extend_nulls::<i32>,
DataType::ListView(_) | DataType::LargeListView(_) => {
unimplemented!("ListView/LargeListView not implemented")
}
DataType::LargeList(_) => list::extend_nulls::<i64>,
DataType::Dictionary(child_data_type, _) => match child_data_type.as_ref() {
DataType::UInt8 => primitive::extend_nulls::<u8>,
Expand Down Expand Up @@ -428,6 +434,9 @@ impl<'a> MutableArrayData<'a> {
DataType::BinaryView | DataType::Utf8View => {
unimplemented!("BinaryView/Utf8View not implemented")
}
DataType::ListView(_) | DataType::LargeListView(_) => {
unimplemented!("ListView/LargeListView not implemented")
}
DataType::Map(_, _) | DataType::List(_) | DataType::LargeList(_) => {
let children = arrays
.iter()
Expand Down
3 changes: 3 additions & 0 deletions arrow-integration-test/src/datatype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,9 @@ pub fn data_type_to_json(data_type: &DataType) -> serde_json::Value {
DataType::Union(_, _) => json!({"name": "union"}),
DataType::List(_) => json!({ "name": "list"}),
DataType::LargeList(_) => json!({ "name": "largelist"}),
DataType::ListView(_) | DataType::LargeListView(_) => {
unimplemented!("ListView/LargeListView not implemented")
}
DataType::FixedSizeList(_, length) => {
json!({"name":"fixedsizelist", "listSize": length})
}
Expand Down
1 change: 1 addition & 0 deletions arrow-ipc/src/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,7 @@ pub(crate) fn get_fb_field_type<'a>(
children: Some(fbb.create_vector(&[child])),
}
}
ListView(_) | LargeListView(_) => unimplemented!("ListView/LargeListView not implemented"),
LargeList(ref list_type) => {
let child = build_field(fbb, list_type);
FBFieldType {
Expand Down
26 changes: 25 additions & 1 deletion arrow-schema/src/datatype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,12 +228,30 @@ pub enum DataType {
///
/// A single List array can store up to [`i32::MAX`] elements in total.
List(FieldRef),

/// (NOT YET FULLY SUPPORTED) A list of some logical data type with variable length.
///
/// Note this data type is not yet fully supported. Using it with arrow APIs may result in `panic`s.
///
/// The ListView layout is defined by three buffers:
/// a validity bitmap, an offsets buffer, and an additional sizes buffer.
/// Sizes and offsets are both 32 bits for this type
ListView(FieldRef),
/// A list of some logical data type with fixed length.
FixedSizeList(FieldRef, i32),
/// A list of some logical data type with variable length and 64-bit offsets.
///
/// A single LargeList array can store up to [`i64::MAX`] elements in total.
LargeList(FieldRef),

/// (NOT YET FULLY SUPPORTED) A list of some logical data type with variable length and 64-bit offsets.
///
/// Note this data type is not yet fully supported. Using it with arrow APIs may result in `panic`s.
///
/// The LargeListView layout is defined by three buffers:
/// a validity bitmap, an offsets buffer, and an additional sizes buffer.
/// Sizes and offsets are both 64 bits for this type
LargeListView(FieldRef),
/// A nested datatype that contains a number of sub-fields.
Struct(Fields),
/// A nested datatype that can represent slots of differing types. Components:
Expand Down Expand Up @@ -536,7 +554,11 @@ impl DataType {
DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => None,
DataType::Binary | DataType::LargeBinary | DataType::BinaryView => None,
DataType::FixedSizeBinary(_) => None,
DataType::List(_) | DataType::LargeList(_) | DataType::Map(_, _) => None,
DataType::List(_)
| DataType::ListView(_)
| DataType::LargeList(_)
| DataType::LargeListView(_)
| DataType::Map(_, _) => None,
DataType::FixedSizeList(_, _) => None,
DataType::Struct(_) => None,
DataType::Union(_, _) => None,
Expand Down Expand Up @@ -581,8 +603,10 @@ impl DataType {
| DataType::Decimal256(_, _) => 0,
DataType::Timestamp(_, s) => s.as_ref().map(|s| s.len()).unwrap_or_default(),
DataType::List(field)
| DataType::ListView(field)
| DataType::FixedSizeList(field, _)
| DataType::LargeList(field)
| DataType::LargeListView(field)
| DataType::Map(field, _) => field.size(),
DataType::Struct(fields) => fields.size(),
DataType::Union(fields, _) => fields.size(),
Expand Down
2 changes: 2 additions & 0 deletions arrow-schema/src/field.rs
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,9 @@ impl Field {
| DataType::BinaryView
| DataType::Interval(_)
| DataType::LargeList(_)
| DataType::LargeListView(_)
| DataType::List(_)
| DataType::ListView(_)
| DataType::Map(_, _)
| DataType::Dictionary(_, _)
| DataType::RunEndEncoded(_, _)
Expand Down
1 change: 1 addition & 0 deletions parquet/src/arrow/schema/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,7 @@ fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
.with_id(id)
.build()
}
DataType::ListView(_) | DataType::LargeListView(_) => unimplemented!("ListView/LargeListView not implemented"),
DataType::Struct(fields) => {
if fields.is_empty() {
return Err(
Expand Down

0 comments on commit ad3b4c9

Please sign in to comment.