Skip to content

Commit

Permalink
Allow empty names in mapped field of Name Mapping (apache#927)
Browse files Browse the repository at this point in the history
* Remove check_at_least_one field validator

Iceberg spec permits an emtpy list of names in the default name mapping. check_at_least_one is therefore unnecessary.

* Remove irrelevant test case

* Fixing pydantic model

No longer requiring minimum length of names list to be 1.

* Added test case for empty names in name mapping

* Fixed formatting error
  • Loading branch information
spock-abadai authored Jul 14, 2024
1 parent 3f44dfe commit e27cd90
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 18 deletions.
14 changes: 1 addition & 13 deletions pyiceberg/table/name_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,26 +37,14 @@

class MappedField(IcebergBaseModel):
field_id: int = Field(alias="field-id")
names: List[str] = conlist(str, min_length=1)
names: List[str] = conlist(str)
fields: List[MappedField] = Field(default_factory=list)

@field_validator("fields", mode="before")
@classmethod
def convert_null_to_empty_List(cls, v: Any) -> Any:
return v or []

@field_validator("names", mode="after")
@classmethod
def check_at_least_one(cls, v: List[str]) -> Any:
"""
Conlist constraint does not seem to be validating the class on instantiation.
Adding a custom validator to enforce min_length=1 constraint.
"""
if len(v) < 1:
raise ValueError("At least one mapped name must be provided for the field")
return v

@model_serializer
def ser_model(self) -> Dict[str, Any]:
"""Set custom serializer to leave out the field when it is empty."""
Expand Down
22 changes: 17 additions & 5 deletions tests/table/test_name_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,23 @@ def test_json_mapped_field_deserialization() -> None:
assert MappedField(field_id=1, names=["id", "record_id"]) == MappedField.model_validate_json(mapped_field_with_null_fields)


def test_json_mapped_field_no_names_deserialization() -> None:
mapped_field = """{
"field-id": 1,
"names": []
}
"""
assert MappedField(field_id=1, names=[]) == MappedField.model_validate_json(mapped_field)

mapped_field_with_null_fields = """{
"field-id": 1,
"names": [],
"fields": null
}
"""
assert MappedField(field_id=1, names=[]) == MappedField.model_validate_json(mapped_field_with_null_fields)


def test_json_name_mapping_deserialization() -> None:
name_mapping = """
[
Expand Down Expand Up @@ -247,11 +264,6 @@ def test_mapping_lookup_by_name(table_name_mapping_nested: NameMapping) -> None:
table_name_mapping_nested.find("boom")


def test_invalid_mapped_field() -> None:
with pytest.raises(ValueError):
MappedField(field_id=1, names=[])


def test_update_mapping_no_updates_or_adds(table_name_mapping_nested: NameMapping) -> None:
assert update_mapping(table_name_mapping_nested, {}, {}) == table_name_mapping_nested

Expand Down

0 comments on commit e27cd90

Please sign in to comment.