From 9d01b02f14e761b36fe0b88a82274037f7ced5a7 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Mon, 4 Mar 2024 23:47:13 -0800 Subject: [PATCH] Explictly check for `schema_id` in tests (#487) --- tests/catalog/test_base.py | 8 ++--- tests/integration/test_partition_evolution.py | 2 +- tests/integration/test_reads.py | 1 - tests/integration/test_rest_schema.py | 4 +-- tests/table/test_init.py | 31 +++++++++++-------- tests/table/test_metadata.py | 1 - 6 files changed, 25 insertions(+), 22 deletions(-) diff --git a/tests/catalog/test_base.py b/tests/catalog/test_base.py index c7d3f01ff1..1defb0996f 100644 --- a/tests/catalog/test_base.py +++ b/tests/catalog/test_base.py @@ -614,9 +614,9 @@ def test_add_column(catalog: InMemoryCatalog) -> None: NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"), NestedField(field_id=3, name="z", field_type=LongType(), required=True), NestedField(field_id=4, name="new_column1", field_type=IntegerType(), required=False), - schema_id=0, identifier_field_ids=[], ) + assert given_table.schema().schema_id == 1 transaction = given_table.transaction() transaction.update_schema().add_column(path="new_column2", field_type=IntegerType(), doc="doc").commit() @@ -628,9 +628,9 @@ def test_add_column(catalog: InMemoryCatalog) -> None: NestedField(field_id=3, name="z", field_type=LongType(), required=True), NestedField(field_id=4, name="new_column1", field_type=IntegerType(), required=False), NestedField(field_id=5, name="new_column2", field_type=IntegerType(), required=False, doc="doc"), - schema_id=0, identifier_field_ids=[], ) + assert given_table.schema().schema_id == 2 def test_add_column_with_statement(catalog: InMemoryCatalog) -> None: @@ -644,9 +644,9 @@ def test_add_column_with_statement(catalog: InMemoryCatalog) -> None: NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"), NestedField(field_id=3, name="z", field_type=LongType(), required=True), NestedField(field_id=4, name="new_column1", field_type=IntegerType(), required=False), - schema_id=0, identifier_field_ids=[], ) + assert given_table.schema().schema_id == 1 with given_table.transaction() as tx: tx.update_schema().add_column(path="new_column2", field_type=IntegerType(), doc="doc").commit() @@ -657,9 +657,9 @@ def test_add_column_with_statement(catalog: InMemoryCatalog) -> None: NestedField(field_id=3, name="z", field_type=LongType(), required=True), NestedField(field_id=4, name="new_column1", field_type=IntegerType(), required=False), NestedField(field_id=5, name="new_column2", field_type=IntegerType(), required=False, doc="doc"), - schema_id=0, identifier_field_ids=[], ) + assert given_table.schema().schema_id == 2 def test_catalog_repr(catalog: InMemoryCatalog) -> None: diff --git a/tests/integration/test_partition_evolution.py b/tests/integration/test_partition_evolution.py index 16feef565d..85ae32374d 100644 --- a/tests/integration/test_partition_evolution.py +++ b/tests/integration/test_partition_evolution.py @@ -419,9 +419,9 @@ def test_change_specs_and_schema_transaction(catalog: Catalog) -> None: NestedField(field_id=2, name='event_ts', field_type=TimestampType(), required=False), NestedField(field_id=3, name='str', field_type=StringType(), required=False), NestedField(field_id=4, name='col_string', field_type=StringType(), required=False), - schema_id=1, identifier_field_ids=[], ) + assert table.schema().schema_id == 1 @pytest.mark.integration diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py index c03bc78a18..072fd7db25 100644 --- a/tests/integration/test_reads.py +++ b/tests/integration/test_reads.py @@ -93,7 +93,6 @@ def create_table(catalog: Catalog) -> Table: NestedField(field_id=2, name="int", field_type=IntegerType(), required=True), NestedField(field_id=3, name="bool", field_type=BooleanType(), required=False), NestedField(field_id=4, name="datetime", field_type=TimestampType(), required=False), - schema_id=1, ) return catalog.create_table(identifier=TABLE_NAME, schema=schema) diff --git a/tests/integration/test_rest_schema.py b/tests/integration/test_rest_schema.py index 17fb338080..4c758e4c3e 100644 --- a/tests/integration/test_rest_schema.py +++ b/tests/integration/test_rest_schema.py @@ -361,7 +361,6 @@ def test_revert_changes(simple_table: Table, table_schema_simple: Schema) -> Non NestedField(field_id=1, name='foo', field_type=StringType(), required=False), NestedField(field_id=2, name='bar', field_type=IntegerType(), required=True), NestedField(field_id=3, name='baz', field_type=BooleanType(), required=False), - schema_id=0, identifier_field_ids=[2], ), 1: Schema( @@ -369,11 +368,12 @@ def test_revert_changes(simple_table: Table, table_schema_simple: Schema) -> Non NestedField(field_id=2, name='bar', field_type=IntegerType(), required=True), NestedField(field_id=3, name='baz', field_type=BooleanType(), required=False), NestedField(field_id=4, name='data', field_type=IntegerType(), required=False), - schema_id=1, identifier_field_ids=[2], ), } assert simple_table.schema().schema_id == 0 + assert simple_table.schemas()[0].schema_id == 0 + assert simple_table.schemas()[1].schema_id == 1 @pytest.mark.integration diff --git a/tests/table/test_init.py b/tests/table/test_init.py index 04efc5f402..b8097f5fcf 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -107,26 +107,26 @@ def test_schema(table_v2: Table) -> None: NestedField(field_id=1, name="x", field_type=LongType(), required=True), NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"), NestedField(field_id=3, name="z", field_type=LongType(), required=True), - schema_id=1, identifier_field_ids=[1, 2], ) + assert table_v2.schema().schema_id == 1 def test_schemas(table_v2: Table) -> None: assert table_v2.schemas() == { 0: Schema( NestedField(field_id=1, name="x", field_type=LongType(), required=True), - schema_id=0, identifier_field_ids=[], ), 1: Schema( NestedField(field_id=1, name="x", field_type=LongType(), required=True), NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"), NestedField(field_id=3, name="z", field_type=LongType(), required=True), - schema_id=1, identifier_field_ids=[1, 2], ), } + assert table_v2.schemas()[0].schema_id == 0 + assert table_v2.schemas()[1].schema_id == 1 def test_spec(table_v2: Table) -> None: @@ -266,31 +266,34 @@ def test_table_scan_ref_does_not_exists(table_v2: Table) -> None: def test_table_scan_projection_full_schema(table_v2: Table) -> None: scan = table_v2.scan() - assert scan.select("x", "y", "z").projection() == Schema( + projection_schema = scan.select("x", "y", "z").projection() + assert projection_schema == Schema( NestedField(field_id=1, name="x", field_type=LongType(), required=True), NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"), NestedField(field_id=3, name="z", field_type=LongType(), required=True), - schema_id=1, identifier_field_ids=[1, 2], ) + assert projection_schema.schema_id == 1 def test_table_scan_projection_single_column(table_v2: Table) -> None: scan = table_v2.scan() - assert scan.select("y").projection() == Schema( + projection_schema = scan.select("y").projection() + assert projection_schema == Schema( NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"), - schema_id=1, identifier_field_ids=[2], ) + assert projection_schema.schema_id == 1 def test_table_scan_projection_single_column_case_sensitive(table_v2: Table) -> None: scan = table_v2.scan() - assert scan.with_case_sensitive(False).select("Y").projection() == Schema( + projection_schema = scan.with_case_sensitive(False).select("Y").projection() + assert projection_schema == Schema( NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"), - schema_id=1, identifier_field_ids=[2], ) + assert projection_schema.schema_id == 1 def test_table_scan_projection_unknown_column(table_v2: Table) -> None: @@ -983,20 +986,22 @@ def test_correct_schema() -> None: ) # Should use the current schema, instead the one from the snapshot - assert t.scan().projection() == Schema( + projection_schema = t.scan().projection() + assert projection_schema == Schema( NestedField(field_id=1, name='x', field_type=LongType(), required=True), NestedField(field_id=2, name='y', field_type=LongType(), required=True), NestedField(field_id=3, name='z', field_type=LongType(), required=True), - schema_id=1, identifier_field_ids=[1, 2], ) + assert projection_schema.schema_id == 1 # When we explicitly filter on the commit, we want to have the schema that's linked to the snapshot - assert t.scan(snapshot_id=123).projection() == Schema( + projection_schema = t.scan(snapshot_id=123).projection() + assert projection_schema == Schema( NestedField(field_id=1, name='x', field_type=LongType(), required=True), - schema_id=0, identifier_field_ids=[], ) + assert projection_schema.schema_id == 0 with pytest.warns(UserWarning, match="Metadata does not contain schema with id: 10"): t.scan(snapshot_id=234).projection() diff --git a/tests/table/test_metadata.py b/tests/table/test_metadata.py index 97a7931cbb..c05700ecbb 100644 --- a/tests/table/test_metadata.py +++ b/tests/table/test_metadata.py @@ -120,7 +120,6 @@ def test_v1_metadata_parsing_directly(example_table_metadata_v1: Dict[str, Any]) NestedField(field_id=1, name="x", field_type=LongType(), required=True), NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"), NestedField(field_id=3, name="z", field_type=LongType(), required=True), - schema_id=0, identifier_field_ids=[], ) ]