gaiaresources · Lincoln-GR · Oct 31, 2024 · Oct 30, 2024 · Oct 30, 2024
diff --git a/abis_mapping/plugins/__init__.py b/abis_mapping/plugins/__init__.py
@@ -8,6 +8,7 @@
 from . import empty
 from . import list
 from . import logical_or
+from . import lookup_match
 from . import mutual_exclusion
 from . import mutual_inclusion
 from . import required

diff --git a/abis_mapping/plugins/lookup_match.py b/abis_mapping/plugins/lookup_match.py
@@ -0,0 +1,72 @@
+"""Provides extra frictionless lookup match checks for the package"""
+
+# Third-Party
+import frictionless
+import frictionless.errors
+import attrs
+
+# Typing
+from typing import Iterator, Mapping
+
+
+@attrs.define(kw_only=True, repr=False)
+class VLookupMatch(frictionless.Check):
+    """Takes the as a key, the value of one column to perform a VLOOKUP style check.
+
+    Validation fails if the cell value for `key_field` does not match any keys of the provided
+    map. If a null value for key is encountered then check is bypassed.
+
+    Attributes:
+        key_field: name of the column to use as the key for the lookup.
+        value_field: name of the column to be compared against during lookup comparison.
+        lu_map: map consisting of the valid combinations value for a given key.
+    """
+
+    # Check Attributes
+    type = "vlookup-match"
+    Errors = [frictionless.errors.RowConstraintError]
+
+    # Attributes specific to this check
+    key_field: str
+    value_field: str
+    lu_map: Mapping
+
+    def validate_row(self, row: frictionless.Row) -> Iterator[frictionless.Error]:
+        """Called to validate the given row (on every row).
+
+        Args:
+            row (frictionless.Row): The row to check the mutual inclusion of.
+
+        Yields:
+            frictionless.Error: For when the mutual inclusion is violated.
+        """
+        # Check for null key
+        if row[self.key_field] is None:
+            # Bypass
+            return
+
+        # Confirm key column value exists in map
+        if self.lu_map.get(row[self.key_field]) is not None:
+            # Extract lookup values
+            expected = self.lu_map[row[self.key_field]]
+            actual = row[self.value_field]
+
+            # Perform lookup check
+            if actual == expected:
+                # Valid
+                return
+            else:
+                # Customise error note for the result
+                note = (
+                    f"Expected cell value `{expected}` for field `{self.value_field}` given key"
+                    f" `{row[self.key_field]}` for field `{self.key_field}; got `{actual}`"
+                )
+        else:
+            # Customise note for error
+            note = f"Index `{row[self.key_field]}` does not exist in the provided lookup map"
+
+        # Yield Error
+        yield frictionless.errors.RowConstraintError.from_row(
+            row=row,
+            note=note,
+        )
diff --git a/abis_mapping/templates/survey_occurrence_data_v2/mapping.py b/abis_mapping/templates/survey_occurrence_data_v2/mapping.py
@@ -108,13 +108,15 @@ def apply_validation(self, data: base.types.ReadableType, **kwargs: Any) -> fric
                 for given siteID.
             site_visit_id_temporal_map (dict[str, str]): Default RDF (serialized as turtle)
                 to use for temporal entity for given siteVisitID.
+            site_visit_id_site_id_map (dict[str, str]): Valid site ID for a given site visit ID.
 
         Returns:
             frictionless.Report: Validation report for the specified data.
         """
         # Extract kwargs
         site_id_geometry_map = kwargs.get("site_id_geometry_map")
         site_visit_id_temporal_map = kwargs.get("site_visit_id_temporal_map")
+        site_visit_id_site_id_map = kwargs.get("site_visit_id_site_id_map")
 
         # Construct Schema
         schema = self.extra_fields_schema(
@@ -152,6 +154,17 @@ def apply_validation(self, data: base.types.ReadableType, **kwargs: Any) -> fric
             ],
         )
 
+        # Modify checklist in the event site visit id to site id map provided
+        if site_visit_id_site_id_map is not None:
+            # Add lookup match check
+            checklist.add_check(
+                plugins.lookup_match.VLookupMatch(
+                    key_field="siteVisitID",
+                    value_field="siteID",
+                    lu_map=site_visit_id_site_id_map,
+                )
+            )
+
         # Modify schema and checklist in the event default temporal map provided
         if site_visit_id_temporal_map is not None:
             # Need to make sure that required is false from the eventDate field

diff --git a/tests/plugins/test_chained_inclusion.py b/tests/plugins/test_chained_inclusion.py
@@ -1,4 +1,4 @@
-"""Provides Unit Tests for the `abis_mapping.plugins.mutual_inclusion` module"""
+"""Provides Unit Tests for the `abis_mapping.plugins.chained_inclusion` module"""
 
 # Third-Party
 import frictionless

diff --git a/tests/plugins/test_lookup_match.py b/tests/plugins/test_lookup_match.py
@@ -0,0 +1,51 @@
+"""Provides Unit Tests for the `abis_mapping.plugins.lookup_match` module"""
+
+# Third-Party
+import frictionless
+
+# Local
+from abis_mapping import plugins
+
+
+def test_checks_vlookup_match() -> None:
+    """Tests the VLookupMatch Checker"""
+    # Construct Fake Resource
+    resource = frictionless.Resource(
+        source=[
+            # Valid
+            {"a": "A", "b": "B", "c": "C"},
+            {"a": "A", "b": "B", "c": None},
+            {"a": "A1", "b": "B1", "c": None},
+            {"a": None, "b": "B", "c": "C"},
+            {"a": None, "b": "B", "c": None},
+            {"a": None, "b": None, "c": "C"},
+            # Invalid
+            {"a": "A", "b": None, "c": None},
+            {"a": "A", "b": None, "c": "C"},
+            {"a": "A1", "b": "B2", "c": "C"},
+            {"a": "A", "b": "B1", "c": "C"},
+            {"a": "A2", "b": "B", "c": "C"},
+        ],
+    )
+
+    lookup_map: dict[str, str] = {"A": "B", "A1": "B1"}
+
+    # Validate
+    report: frictionless.Report = resource.validate(
+        checklist=frictionless.Checklist(
+            checks=[
+                plugins.lookup_match.VLookupMatch(
+                    key_field="a",
+                    value_field="b",
+                    lu_map=lookup_map,
+                ),
+            ]
+        )
+    )
+
+    # Check
+    assert not report.valid
+    row_numbers = report.flatten(["rowNumber"])
+    assert len(row_numbers) == 5
+    # Confirm that the rows in error are where we expect
+    assert all([r[0] > 7 for r in row_numbers])
diff --git a/tests/templates/test_survey_occurrence_data_v2.py b/tests/templates/test_survey_occurrence_data_v2.py
@@ -427,3 +427,90 @@ def test_apply_mapping(self, mapper: Mapper) -> None:
         res_g = next(graphs)
         # Ensure temporal entity added to graph
         assert next(res_g.subjects(a, ftn)) is not None
+
+
+class TestSiteVisitIDSiteIDMap:
+    """Tests specific to the provision of a site visit id -> site id map."""
+
+    @attrs.define(kw_only=True)
+    class Scenario:
+        """Dataclass to hold the scenario parameters."""
+
+        name: str
+        raws: list[list[str]]
+        expected_error_codes: set[str] = set()
+        lookup_map: dict[str, str]
+
+    scenarios: list[Scenario] = [
+        Scenario(
+            name="valid_with_default_map",
+            raws=[
+                ["SV1", "S1"],
+                ["SV2", "S1"],
+                ["SV3", "S1"],
+                ["SV4", "S1"],
+                ["", "S1"],
+            ],
+            lookup_map={"SV1": "S1", "SV2": "S1", "SV3": "S1", "SV4": "S1"},
+        ),
+        Scenario(
+            name="invalid_with_default_map",
+            raws=[
+                ["SV1", "S1"],
+                ["SV2", "S1"],
+                ["SV3", "S1"],
+                ["SV4", "S1"],
+            ],
+            lookup_map={"SV2": "S2"},
+            expected_error_codes={"row-constraint"},
+        ),
+    ]
+
+    @pytest.mark.parametrize(
+        argnames="scenario",
+        argvalues=scenarios,
+        ids=[scenario.name for scenario in scenarios],
+    )
+    def test_apply_validation(self, scenario: Scenario, mocker: pytest_mock.MockerFixture, mapper: Mapper) -> None:
+        """Tests the `apply_validation` method with a supplied default map.
+
+        Args:
+            scenario (Scenario): The parameters of the scenario under test.
+            mocker (pytest_mock.MockerFixture): The mocker fixture.
+            mapper (Mapper): Mapper instance fixture.
+        """
+        # Construct fake data
+        rawh = [
+            "siteVisitID",
+            "siteID",
+        ]
+        all_raw = [{hname: val for hname, val in zip(rawh, ln, strict=True)} for ln in scenario.raws]
+
+        # Modify schema to only fields required for test
+        descriptor = {"fields": [field for field in Mapper.schema()["fields"] if field["name"] in rawh]}
+        descriptor["fields"].sort(key=lambda f: rawh.index(f["name"]))
+
+        # Patch the schema for the test
+        mocker.patch.object(base.mapper.ABISMapper, "schema").return_value = descriptor
+
+        # Create raw data csv string
+        with io.StringIO() as output:
+            csv_writer = csv.DictWriter(output, fieldnames=rawh)
+            csv_writer.writeheader()
+
+            for row in all_raw:
+                csv_writer.writerow(row)
+
+            csv_data = output.getvalue().encode("utf-8")
+
+        # Apply validation
+        report = mapper.apply_validation(
+            data=csv_data,
+            site_visit_id_site_id_map=scenario.lookup_map,
+        )
+
+        # Assert
+        assert report.valid == (scenario.expected_error_codes == set())
+        if not report.valid:
+            error_codes = [code for codes in report.flatten(["type"]) for code in codes]
+            assert set(error_codes) == scenario.expected_error_codes