Skip to content

Commit

Permalink
Wildcard matching (#397)
Browse files Browse the repository at this point in the history
* Add wildcard matching for CodeLists

* Add test for wildcard matching

* Remove nonsensical test

* Apply suggestions from code review

Co-authored-by: Daniel Huppmann <[email protected]>

* Use pyam.utils.pattern_match for wildcard validation

* Update test file path

* Pin common-definitions to make tests pass

---------

Co-authored-by: Daniel Huppmann <[email protected]>
  • Loading branch information
phackstock and danielhuppmann authored Sep 25, 2024
1 parent 9314fef commit 30fb5b1
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 10 deletions.
5 changes: 3 additions & 2 deletions nomenclature/codelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pandas as pd
import yaml
from pyam import IamDataFrame
from pyam.utils import is_list_like, write_sheet
from pyam.utils import is_list_like, write_sheet, pattern_match
from pydantic import BaseModel, ValidationInfo, field_validator
from pydantic_core import PydanticCustomError

Expand Down Expand Up @@ -118,7 +118,8 @@ def validate_items(self, items: List[str]) -> List[str]:
list
Returns the list of items that are **not** defined in the codelist
"""
return [c for c in items if c not in self.keys()]
matches = pattern_match(pd.Series(items), self.keys())
return [item for item, match in zip(items, matches) if not match]

@classmethod
def replace_tags(
Expand Down
1 change: 1 addition & 0 deletions tests/data/codelist/wildcard/scenario/scenario.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
- scen_*
1 change: 1 addition & 0 deletions tests/data/config/general-config-only/nomenclature.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
repositories:
common-definitions:
url: https://github.com/IAMconsortium/common-definitions.git/
hash: cb85704
definitions:
region:
repository: common-definitions
Expand Down
1 change: 1 addition & 0 deletions tests/data/config/general-config/nomenclature.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ dimensions: [region, variable]
repositories:
common-definitions:
url: https://github.com/IAMconsortium/common-definitions.git/
hash: cb85704
definitions:
region:
repository: common-definitions
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
repositories:
common-definitions:
url: https://github.com/IAMconsortium/common-definitions.git/
hash: cb85704
definitions:
region:
repository: common-definitions
Expand Down
20 changes: 12 additions & 8 deletions tests/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,6 @@ def test_validation_fails_region(simple_definition, simple_df, caplog):
)


def test_validation_fails_region_as_int(simple_definition, simple_df):
"""Using a region name as integer raises the expected error"""
simple_df.rename(region={"World": 1}, inplace=True)

with pytest.raises(ValueError, match=MATCH_FAIL_VALIDATION):
simple_definition.validate(simple_df)


def test_validation_with_custom_dimension(simple_df):
"""Check validation with a custom DataStructureDefinition dimension"""

Expand All @@ -95,3 +87,15 @@ def test_validation_with_custom_dimension(simple_df):

# validating against all dimensions works
definition.validate(simple_df)


def test_wildcard_match(simple_df):
definition = DataStructureDefinition(
TEST_DATA_DIR / "codelist" / "wildcard",
dimensions=["scenario"],
)

assert definition.validate(simple_df) is None

with pytest.raises(ValueError, match=MATCH_FAIL_VALIDATION):
definition.validate(simple_df.rename(scenario={"scen_a": "foo"}))

0 comments on commit 30fb5b1

Please sign in to comment.