Skip to content

Commit

Permalink
Arrow: Support Arrow large-string (apache#382)
Browse files Browse the repository at this point in the history
  • Loading branch information
Fokko authored Feb 7, 2024
1 parent 853a77c commit a7794ca
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pyiceberg/io/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -864,7 +864,7 @@ def primitive(self, primitive: pa.DataType) -> PrimitiveType:
elif isinstance(primitive, pa.Decimal128Type):
primitive = cast(pa.Decimal128Type, primitive)
return DecimalType(primitive.precision, primitive.scale)
elif pa.types.is_string(primitive):
elif pa.types.is_string(primitive) or pa.types.is_large_string(primitive):
return StringType()
elif pa.types.is_date32(primitive):
return DateType()
Expand Down
9 changes: 9 additions & 0 deletions tests/io/test_pyarrow_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,15 @@ def test_round_schema_conversion_nested(table_schema_nested: Schema) -> None:
assert actual == expected


def test_round_schema_large_string() -> None:
schema = pa.schema([pa.field("animals", pa.large_string())])
actual = str(pyarrow_to_schema(schema, name_mapping=NameMapping([MappedField(field_id=1, names=["animals"])])))
expected = """table {
1: animals: optional string
}"""
assert actual == expected


def test_simple_schema_has_missing_ids() -> None:
schema = pa.schema([
pa.field('foo', pa.string(), nullable=False),
Expand Down

0 comments on commit a7794ca

Please sign in to comment.