diff --git a/abis_mapping/plugins/__init__.py b/abis_mapping/plugins/__init__.py index 362c8069..f5bc8466 100644 --- a/abis_mapping/plugins/__init__.py +++ b/abis_mapping/plugins/__init__.py @@ -8,6 +8,7 @@ from . import empty from . import list from . import logical_or +from . import lookup_match from . import mutual_exclusion from . import mutual_inclusion from . import required diff --git a/abis_mapping/plugins/lookup_match.py b/abis_mapping/plugins/lookup_match.py new file mode 100644 index 00000000..a65f32c0 --- /dev/null +++ b/abis_mapping/plugins/lookup_match.py @@ -0,0 +1,72 @@ +"""Provides extra frictionless lookup match checks for the package""" + +# Third-Party +import frictionless +import frictionless.errors +import attrs + +# Typing +from typing import Iterator, Mapping + + +@attrs.define(kw_only=True, repr=False) +class VLookupMatch(frictionless.Check): + """Takes the as a key, the value of one column to perform a VLOOKUP style check. + + Validation fails if the cell value for `key_field` does not match any keys of the provided + map. If a null value for key is encountered then check is bypassed. + + Attributes: + key_field: name of the column to use as the key for the lookup. + value_field: name of the column to be compared against during lookup comparison. + lu_map: map consisting of the valid combinations value for a given key. + """ + + # Check Attributes + type = "vlookup-match" + Errors = [frictionless.errors.RowConstraintError] + + # Attributes specific to this check + key_field: str + value_field: str + lu_map: Mapping + + def validate_row(self, row: frictionless.Row) -> Iterator[frictionless.Error]: + """Called to validate the given row (on every row). + + Args: + row (frictionless.Row): The row to check the mutual inclusion of. + + Yields: + frictionless.Error: For when the mutual inclusion is violated. + """ + # Check for null key + if row[self.key_field] is None: + # Bypass + return + + # Confirm key column value exists in map + if self.lu_map.get(row[self.key_field]) is not None: + # Extract lookup values + expected = self.lu_map[row[self.key_field]] + actual = row[self.value_field] + + # Perform lookup check + if actual == expected: + # Valid + return + else: + # Customise error note for the result + note = ( + f"Expected cell value `{expected}` for field `{self.value_field}` given key" + f" `{row[self.key_field]}` for field `{self.key_field}; got `{actual}`" + ) + else: + # Customise note for error + note = f"Index `{row[self.key_field]}` does not exist in the provided lookup map" + + # Yield Error + yield frictionless.errors.RowConstraintError.from_row( + row=row, + note=note, + ) diff --git a/abis_mapping/templates/survey_occurrence_data_v2/mapping.py b/abis_mapping/templates/survey_occurrence_data_v2/mapping.py index 1c0789cd..59f4cbd8 100644 --- a/abis_mapping/templates/survey_occurrence_data_v2/mapping.py +++ b/abis_mapping/templates/survey_occurrence_data_v2/mapping.py @@ -108,6 +108,7 @@ def apply_validation(self, data: base.types.ReadableType, **kwargs: Any) -> fric for given siteID. site_visit_id_temporal_map (dict[str, str]): Default RDF (serialized as turtle) to use for temporal entity for given siteVisitID. + site_visit_id_site_id_map (dict[str, str]): Valid site ID for a given site visit ID. Returns: frictionless.Report: Validation report for the specified data. @@ -115,6 +116,7 @@ def apply_validation(self, data: base.types.ReadableType, **kwargs: Any) -> fric # Extract kwargs site_id_geometry_map = kwargs.get("site_id_geometry_map") site_visit_id_temporal_map = kwargs.get("site_visit_id_temporal_map") + site_visit_id_site_id_map = kwargs.get("site_visit_id_site_id_map") # Construct Schema schema = self.extra_fields_schema( @@ -152,6 +154,17 @@ def apply_validation(self, data: base.types.ReadableType, **kwargs: Any) -> fric ], ) + # Modify checklist in the event site visit id to site id map provided + if site_visit_id_site_id_map is not None: + # Add lookup match check + checklist.add_check( + plugins.lookup_match.VLookupMatch( + key_field="siteVisitID", + value_field="siteID", + lu_map=site_visit_id_site_id_map, + ) + ) + # Modify schema and checklist in the event default temporal map provided if site_visit_id_temporal_map is not None: # Need to make sure that required is false from the eventDate field diff --git a/tests/plugins/test_chained_inclusion.py b/tests/plugins/test_chained_inclusion.py index d2f2b533..c727e1e3 100644 --- a/tests/plugins/test_chained_inclusion.py +++ b/tests/plugins/test_chained_inclusion.py @@ -1,4 +1,4 @@ -"""Provides Unit Tests for the `abis_mapping.plugins.mutual_inclusion` module""" +"""Provides Unit Tests for the `abis_mapping.plugins.chained_inclusion` module""" # Third-Party import frictionless diff --git a/tests/plugins/test_lookup_match.py b/tests/plugins/test_lookup_match.py new file mode 100644 index 00000000..cd06a8a9 --- /dev/null +++ b/tests/plugins/test_lookup_match.py @@ -0,0 +1,51 @@ +"""Provides Unit Tests for the `abis_mapping.plugins.lookup_match` module""" + +# Third-Party +import frictionless + +# Local +from abis_mapping import plugins + + +def test_checks_vlookup_match() -> None: + """Tests the VLookupMatch Checker""" + # Construct Fake Resource + resource = frictionless.Resource( + source=[ + # Valid + {"a": "A", "b": "B", "c": "C"}, + {"a": "A", "b": "B", "c": None}, + {"a": "A1", "b": "B1", "c": None}, + {"a": None, "b": "B", "c": "C"}, + {"a": None, "b": "B", "c": None}, + {"a": None, "b": None, "c": "C"}, + # Invalid + {"a": "A", "b": None, "c": None}, + {"a": "A", "b": None, "c": "C"}, + {"a": "A1", "b": "B2", "c": "C"}, + {"a": "A", "b": "B1", "c": "C"}, + {"a": "A2", "b": "B", "c": "C"}, + ], + ) + + lookup_map: dict[str, str] = {"A": "B", "A1": "B1"} + + # Validate + report: frictionless.Report = resource.validate( + checklist=frictionless.Checklist( + checks=[ + plugins.lookup_match.VLookupMatch( + key_field="a", + value_field="b", + lu_map=lookup_map, + ), + ] + ) + ) + + # Check + assert not report.valid + row_numbers = report.flatten(["rowNumber"]) + assert len(row_numbers) == 5 + # Confirm that the rows in error are where we expect + assert all([r[0] > 7 for r in row_numbers]) diff --git a/tests/templates/test_survey_occurrence_data_v2.py b/tests/templates/test_survey_occurrence_data_v2.py index e6a5bc02..9c485bd5 100644 --- a/tests/templates/test_survey_occurrence_data_v2.py +++ b/tests/templates/test_survey_occurrence_data_v2.py @@ -427,3 +427,90 @@ def test_apply_mapping(self, mapper: Mapper) -> None: res_g = next(graphs) # Ensure temporal entity added to graph assert next(res_g.subjects(a, ftn)) is not None + + +class TestSiteVisitIDSiteIDMap: + """Tests specific to the provision of a site visit id -> site id map.""" + + @attrs.define(kw_only=True) + class Scenario: + """Dataclass to hold the scenario parameters.""" + + name: str + raws: list[list[str]] + expected_error_codes: set[str] = set() + lookup_map: dict[str, str] + + scenarios: list[Scenario] = [ + Scenario( + name="valid_with_default_map", + raws=[ + ["SV1", "S1"], + ["SV2", "S1"], + ["SV3", "S1"], + ["SV4", "S1"], + ["", "S1"], + ], + lookup_map={"SV1": "S1", "SV2": "S1", "SV3": "S1", "SV4": "S1"}, + ), + Scenario( + name="invalid_with_default_map", + raws=[ + ["SV1", "S1"], + ["SV2", "S1"], + ["SV3", "S1"], + ["SV4", "S1"], + ], + lookup_map={"SV2": "S2"}, + expected_error_codes={"row-constraint"}, + ), + ] + + @pytest.mark.parametrize( + argnames="scenario", + argvalues=scenarios, + ids=[scenario.name for scenario in scenarios], + ) + def test_apply_validation(self, scenario: Scenario, mocker: pytest_mock.MockerFixture, mapper: Mapper) -> None: + """Tests the `apply_validation` method with a supplied default map. + + Args: + scenario (Scenario): The parameters of the scenario under test. + mocker (pytest_mock.MockerFixture): The mocker fixture. + mapper (Mapper): Mapper instance fixture. + """ + # Construct fake data + rawh = [ + "siteVisitID", + "siteID", + ] + all_raw = [{hname: val for hname, val in zip(rawh, ln, strict=True)} for ln in scenario.raws] + + # Modify schema to only fields required for test + descriptor = {"fields": [field for field in Mapper.schema()["fields"] if field["name"] in rawh]} + descriptor["fields"].sort(key=lambda f: rawh.index(f["name"])) + + # Patch the schema for the test + mocker.patch.object(base.mapper.ABISMapper, "schema").return_value = descriptor + + # Create raw data csv string + with io.StringIO() as output: + csv_writer = csv.DictWriter(output, fieldnames=rawh) + csv_writer.writeheader() + + for row in all_raw: + csv_writer.writerow(row) + + csv_data = output.getvalue().encode("utf-8") + + # Apply validation + report = mapper.apply_validation( + data=csv_data, + site_visit_id_site_id_map=scenario.lookup_map, + ) + + # Assert + assert report.valid == (scenario.expected_error_codes == set()) + if not report.valid: + error_codes = [code for codes in report.flatten(["type"]) for code in codes] + assert set(error_codes) == scenario.expected_error_codes