Skip to content

Commit

Permalink
fix: support MonthTransform for partitioning (apache#1176)
Browse files Browse the repository at this point in the history
* fix: support MonthTransform for partitioning

* test: add tests for generating default names for other transforms

* fix: delete duplicate test case

* chore: run formatting
  • Loading branch information
felixscherz authored Sep 16, 2024
1 parent e545a05 commit de47590
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 0 deletions.
3 changes: 3 additions & 0 deletions pyiceberg/partitioning.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
DayTransform,
HourTransform,
IdentityTransform,
MonthTransform,
Transform,
TruncateTransform,
UnknownTransform,
Expand Down Expand Up @@ -359,6 +360,8 @@ def _visit_partition_field(schema: Schema, field: PartitionField, visitor: Parti
return visitor.day(field.field_id, source_name, field.source_id)
elif isinstance(transform, HourTransform):
return visitor.hour(field.field_id, source_name, field.source_id)
elif isinstance(transform, MonthTransform):
return visitor.month(field.field_id, source_name, field.source_id)
elif isinstance(transform, YearTransform):
return visitor.year(field.field_id, source_name, field.source_id)
elif isinstance(transform, VoidTransform):
Expand Down
64 changes: 64 additions & 0 deletions tests/integration/test_partition_evolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,14 @@ def test_add_year(catalog: Catalog) -> None:
_validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, YearTransform(), "year_transform"))


@pytest.mark.integration
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
def test_add_year_generates_default_name(catalog: Catalog) -> None:
table = _table(catalog)
table.update_spec().add_field("event_ts", YearTransform()).commit()
_validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, YearTransform(), "event_ts_year"))


@pytest.mark.integration
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
def test_add_month(catalog: Catalog) -> None:
Expand All @@ -100,6 +108,14 @@ def test_add_month(catalog: Catalog) -> None:
_validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, MonthTransform(), "month_transform"))


@pytest.mark.integration
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
def test_add_month_generates_default_name(catalog: Catalog) -> None:
table = _table(catalog)
table.update_spec().add_field("event_ts", MonthTransform()).commit()
_validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, MonthTransform(), "event_ts_month"))


@pytest.mark.integration
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
def test_add_day(catalog: Catalog) -> None:
Expand All @@ -108,6 +124,14 @@ def test_add_day(catalog: Catalog) -> None:
_validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, DayTransform(), "day_transform"))


@pytest.mark.integration
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
def test_add_day_generates_default_name(catalog: Catalog) -> None:
table = _table(catalog)
table.update_spec().add_field("event_ts", DayTransform()).commit()
_validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, DayTransform(), "event_ts_day"))


@pytest.mark.integration
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
def test_add_hour(catalog: Catalog) -> None:
Expand All @@ -116,6 +140,14 @@ def test_add_hour(catalog: Catalog) -> None:
_validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, HourTransform(), "hour_transform"))


@pytest.mark.integration
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
def test_add_hour_generates_default_name(catalog: Catalog) -> None:
table = _table(catalog)
table.update_spec().add_field("event_ts", HourTransform()).commit()
_validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 1000, HourTransform(), "event_ts_hour"))


@pytest.mark.integration
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
def test_add_bucket(catalog: Catalog, table_schema_simple: Schema) -> None:
Expand All @@ -124,6 +156,14 @@ def test_add_bucket(catalog: Catalog, table_schema_simple: Schema) -> None:
_validate_new_partition_fields(simple_table, 1000, 1, 1000, PartitionField(1, 1000, BucketTransform(12), "bucket_transform"))


@pytest.mark.integration
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
def test_add_bucket_generates_default_name(catalog: Catalog, table_schema_simple: Schema) -> None:
simple_table = _create_table_with_schema(catalog, table_schema_simple, "1")
simple_table.update_spec().add_field("foo", BucketTransform(12)).commit()
_validate_new_partition_fields(simple_table, 1000, 1, 1000, PartitionField(1, 1000, BucketTransform(12), "foo_bucket_12"))


@pytest.mark.integration
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
def test_add_truncate(catalog: Catalog, table_schema_simple: Schema) -> None:
Expand All @@ -134,6 +174,14 @@ def test_add_truncate(catalog: Catalog, table_schema_simple: Schema) -> None:
)


@pytest.mark.integration
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
def test_add_truncate_generates_default_name(catalog: Catalog, table_schema_simple: Schema) -> None:
simple_table = _create_table_with_schema(catalog, table_schema_simple, "1")
simple_table.update_spec().add_field("foo", TruncateTransform(1)).commit()
_validate_new_partition_fields(simple_table, 1000, 1, 1000, PartitionField(1, 1000, TruncateTransform(1), "foo_trunc_1"))


@pytest.mark.integration
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
def test_multiple_adds(catalog: Catalog) -> None:
Expand All @@ -152,6 +200,22 @@ def test_multiple_adds(catalog: Catalog) -> None:
)


@pytest.mark.integration
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
def test_add_void(catalog: Catalog, table_schema_simple: Schema) -> None:
simple_table = _create_table_with_schema(catalog, table_schema_simple, "1")
simple_table.update_spec().add_field("foo", VoidTransform(), "void_transform").commit()
_validate_new_partition_fields(simple_table, 1000, 1, 1000, PartitionField(1, 1000, VoidTransform(), "void_transform"))


@pytest.mark.integration
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
def test_add_void_generates_default_name(catalog: Catalog, table_schema_simple: Schema) -> None:
simple_table = _create_table_with_schema(catalog, table_schema_simple, "1")
simple_table.update_spec().add_field("foo", VoidTransform()).commit()
_validate_new_partition_fields(simple_table, 1000, 1, 1000, PartitionField(1, 1000, VoidTransform(), "foo_null"))


@pytest.mark.integration
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
def test_add_hour_to_day(catalog: Catalog) -> None:
Expand Down

0 comments on commit de47590

Please sign in to comment.