diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index aa27796081..a7a1f5a65f 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -1807,7 +1807,11 @@ def struct( else: raise ResolveError(f"Field is required, and could not be found in the file: {field}") - return pa.StructArray.from_arrays(arrays=field_arrays, fields=pa.struct(fields)) + return pa.StructArray.from_arrays( + arrays=field_arrays, + fields=pa.struct(fields), + mask=struct_array.is_null() if isinstance(struct_array, pa.StructArray) else None, + ) def field(self, field: NestedField, _: Optional[pa.Array], field_array: Optional[pa.Array]) -> Optional[pa.Array]: return field_array diff --git a/tests/integration/test_writes/test_writes.py b/tests/integration/test_writes/test_writes.py index 9cccb542d6..49c7c2dff6 100644 --- a/tests/integration/test_writes/test_writes.py +++ b/tests/integration/test_writes/test_writes.py @@ -1450,6 +1450,47 @@ def test_rewrite_manifest_after_partition_evolution(session_catalog: Catalog) -> ) +@pytest.mark.integration +def test_writing_null_structs(session_catalog: Catalog) -> None: + import pyarrow as pa + + schema = pa.schema([ + pa.field( + "struct_field_1", + pa.struct([ + pa.field("string_nested_1", pa.string()), + pa.field("int_item_2", pa.int32()), + pa.field("float_item_2", pa.float32()), + ]), + ), + ]) + + records = [ + { + "struct_field_1": { + "string_nested_1": "nest_1", + "int_item_2": 1234, + "float_item_2": 1.234, + }, + }, + {}, + ] + + try: + session_catalog.drop_table( + identifier="default.test_writing_null_structs", + ) + except NoSuchTableError: + pass + + table = session_catalog.create_table("default.test_writing_null_structs", schema) + + pyarrow_table: pa.Table = pa.Table.from_pylist(records, schema=schema) + table.append(pyarrow_table) + + assert pyarrow_table.to_pandas()["struct_field_1"].tolist() == table.scan().to_pandas()["struct_field_1"].tolist() + + @pytest.mark.integration @pytest.mark.parametrize("format_version", [1, 2]) def test_abort_table_transaction_on_exception(