Minor fixes, apache#523 followup (apache#563)

HonahX · Mar 31, 2024 · 7e3e508 · 7e3e508
1 parent 8143df6
commit 7e3e508
Show file tree

Hide file tree

Showing 4 changed files with 4 additions and 8 deletions.
diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
@@ -1131,8 +1131,9 @@ def append(self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT)
 
         _check_schema_compatible(self.schema(), other_schema=df.schema)
         # cast if the two schemas are compatible but not equal
-        if self.schema().as_arrow() != df.schema:
-            df = df.cast(self.schema().as_arrow())
+        table_arrow_schema = self.schema().as_arrow()
+        if table_arrow_schema != df.schema:
+            df = df.cast(table_arrow_schema)
 
         with self.transaction() as txn:
             with txn.update_snapshot(snapshot_properties=snapshot_properties).fast_append() as update_snapshot:

diff --git a/tests/catalog/test_sql.py b/tests/catalog/test_sql.py
@@ -197,7 +197,7 @@ def test_create_table_with_pyarrow_schema(
     'catalog',
     [
         lazy_fixture('catalog_memory'),
-        # lazy_fixture('catalog_sqlite'),
+        lazy_fixture('catalog_sqlite'),
     ],
 )
 def test_write_pyarrow_schema(catalog: SqlCatalog, random_identifier: Identifier) -> None:
@@ -220,9 +220,6 @@ def test_write_pyarrow_schema(catalog: SqlCatalog, random_identifier: Identifier
     database_name, _table_name = random_identifier
     catalog.create_namespace(database_name)
     table = catalog.create_table(random_identifier, pyarrow_table.schema)
-    print(pyarrow_table.schema)
-    print(table.schema().as_struct())
-    print()
     table.overwrite(pyarrow_table)
 
 

diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py
@@ -274,7 +274,6 @@ def test_ray_nan_rewritten(catalog: Catalog) -> None:
 def test_ray_not_nan_count(catalog: Catalog) -> None:
     table_test_null_nan_rewritten = catalog.load_table("default.test_null_nan_rewritten")
     ray_dataset = table_test_null_nan_rewritten.scan(row_filter=NotNaN("col_numeric"), selected_fields=("idx",)).to_ray()
-    print(ray_dataset.take())
     assert ray_dataset.count() == 2
 
 

diff --git a/tests/integration/test_writes.py b/tests/integration/test_writes.py
@@ -480,7 +480,6 @@ def test_write_parquet_other_properties(
     properties: Dict[str, Any],
     expected_kwargs: Dict[str, Any],
 ) -> None:
-    print(type(mocker))
     identifier = "default.test_write_parquet_other_properties"
 
     # The properties we test cannot be checked on the resulting Parquet file, so we spy on the ParquetWriter call instead