Skip to content

Commit

Permalink
[BUG] enable fixed size binary ingest to daft binary (#1612)
Browse files Browse the repository at this point in the history
* Enables reads of fixed sized binary data
* I hit some issues reading UUIDs from parquet files
  • Loading branch information
samster25 authored Nov 16, 2023
1 parent c4b498a commit 7149083
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 3 deletions.
6 changes: 5 additions & 1 deletion daft/datatype.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,11 @@ def from_arrow_type(cls, arrow_type: pa.lib.DataType) -> DataType:
return cls.float64()
elif pa.types.is_string(arrow_type) or pa.types.is_large_string(arrow_type):
return cls.string()
elif pa.types.is_binary(arrow_type) or pa.types.is_large_binary(arrow_type):
elif (
pa.types.is_binary(arrow_type)
or pa.types.is_large_binary(arrow_type)
or pa.types.is_fixed_size_binary(arrow_type)
):
return cls.binary()
elif pa.types.is_boolean(arrow_type):
return cls.bool()
Expand Down
4 changes: 3 additions & 1 deletion src/daft-core/src/datatypes/dtype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,9 @@ impl From<&ArrowType> for DataType {
DataType::Time(timeunit.into())
}
ArrowType::Duration(timeunit) => DataType::Duration(timeunit.into()),
ArrowType::Binary | ArrowType::LargeBinary => DataType::Binary,
ArrowType::Binary | ArrowType::LargeBinary | ArrowType::FixedSizeBinary(_) => {
DataType::Binary
}
ArrowType::Utf8 | ArrowType::LargeUtf8 => DataType::Utf8,
ArrowType::Decimal(precision, scale) => DataType::Decimal128(*precision, *scale),
ArrowType::List(field) | ArrowType::LargeList(field) => {
Expand Down
4 changes: 3 additions & 1 deletion src/daft-core/src/utils/arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ fn coerce_to_daft_compatible_type(
) -> Option<arrow2::datatypes::DataType> {
match dtype {
arrow2::datatypes::DataType::Utf8 => Some(arrow2::datatypes::DataType::LargeUtf8),
arrow2::datatypes::DataType::Binary => Some(arrow2::datatypes::DataType::LargeBinary),
arrow2::datatypes::DataType::Binary | arrow2::datatypes::DataType::FixedSizeBinary(_) => {
Some(arrow2::datatypes::DataType::LargeBinary)
}
arrow2::datatypes::DataType::List(field) => {
let new_field = match coerce_to_daft_compatible_type(field.data_type()) {
Some(new_inner_dtype) => Box::new(
Expand Down

0 comments on commit 7149083

Please sign in to comment.