Skip to content

Commit

Permalink
Merge pull request #289 from gaiaresources/BDRSPS-955
Browse files Browse the repository at this point in the history
BDRSPS-955 Added siteVisitID -> siteID lookup check for the survey occurrence template
  • Loading branch information
Lincoln-GR authored Oct 31, 2024
2 parents 50ee5e5 + c0f15fb commit 38c862a
Show file tree
Hide file tree
Showing 6 changed files with 225 additions and 1 deletion.
1 change: 1 addition & 0 deletions abis_mapping/plugins/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from . import empty
from . import list
from . import logical_or
from . import lookup_match
from . import mutual_exclusion
from . import mutual_inclusion
from . import required
Expand Down
72 changes: 72 additions & 0 deletions abis_mapping/plugins/lookup_match.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""Provides extra frictionless lookup match checks for the package"""

# Third-Party
import frictionless
import frictionless.errors
import attrs

# Typing
from typing import Iterator, Mapping


@attrs.define(kw_only=True, repr=False)
class VLookupMatch(frictionless.Check):
"""Takes the as a key, the value of one column to perform a VLOOKUP style check.
Validation fails if the cell value for `key_field` does not match any keys of the provided
map. If a null value for key is encountered then check is bypassed.
Attributes:
key_field: name of the column to use as the key for the lookup.
value_field: name of the column to be compared against during lookup comparison.
lu_map: map consisting of the valid combinations value for a given key.
"""

# Check Attributes
type = "vlookup-match"
Errors = [frictionless.errors.RowConstraintError]

# Attributes specific to this check
key_field: str
value_field: str
lu_map: Mapping

def validate_row(self, row: frictionless.Row) -> Iterator[frictionless.Error]:
"""Called to validate the given row (on every row).
Args:
row (frictionless.Row): The row to check the mutual inclusion of.
Yields:
frictionless.Error: For when the mutual inclusion is violated.
"""
# Check for null key
if row[self.key_field] is None:
# Bypass
return

# Confirm key column value exists in map
if self.lu_map.get(row[self.key_field]) is not None:
# Extract lookup values
expected = self.lu_map[row[self.key_field]]
actual = row[self.value_field]

# Perform lookup check
if actual == expected:
# Valid
return
else:
# Customise error note for the result
note = (
f"Expected cell value `{expected}` for field `{self.value_field}` given key"
f" `{row[self.key_field]}` for field `{self.key_field}; got `{actual}`"
)
else:
# Customise note for error
note = f"Index `{row[self.key_field]}` does not exist in the provided lookup map"

# Yield Error
yield frictionless.errors.RowConstraintError.from_row(
row=row,
note=note,
)
13 changes: 13 additions & 0 deletions abis_mapping/templates/survey_occurrence_data_v2/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,15 @@ def apply_validation(self, data: base.types.ReadableType, **kwargs: Any) -> fric
for given siteID.
site_visit_id_temporal_map (dict[str, str]): Default RDF (serialized as turtle)
to use for temporal entity for given siteVisitID.
site_visit_id_site_id_map (dict[str, str]): Valid site ID for a given site visit ID.
Returns:
frictionless.Report: Validation report for the specified data.
"""
# Extract kwargs
site_id_geometry_map = kwargs.get("site_id_geometry_map")
site_visit_id_temporal_map = kwargs.get("site_visit_id_temporal_map")
site_visit_id_site_id_map = kwargs.get("site_visit_id_site_id_map")

# Construct Schema
schema = self.extra_fields_schema(
Expand Down Expand Up @@ -152,6 +154,17 @@ def apply_validation(self, data: base.types.ReadableType, **kwargs: Any) -> fric
],
)

# Modify checklist in the event site visit id to site id map provided
if site_visit_id_site_id_map is not None:
# Add lookup match check
checklist.add_check(
plugins.lookup_match.VLookupMatch(
key_field="siteVisitID",
value_field="siteID",
lu_map=site_visit_id_site_id_map,
)
)

# Modify schema and checklist in the event default temporal map provided
if site_visit_id_temporal_map is not None:
# Need to make sure that required is false from the eventDate field
Expand Down
2 changes: 1 addition & 1 deletion tests/plugins/test_chained_inclusion.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Provides Unit Tests for the `abis_mapping.plugins.mutual_inclusion` module"""
"""Provides Unit Tests for the `abis_mapping.plugins.chained_inclusion` module"""

# Third-Party
import frictionless
Expand Down
51 changes: 51 additions & 0 deletions tests/plugins/test_lookup_match.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""Provides Unit Tests for the `abis_mapping.plugins.lookup_match` module"""

# Third-Party
import frictionless

# Local
from abis_mapping import plugins


def test_checks_vlookup_match() -> None:
"""Tests the VLookupMatch Checker"""
# Construct Fake Resource
resource = frictionless.Resource(
source=[
# Valid
{"a": "A", "b": "B", "c": "C"},
{"a": "A", "b": "B", "c": None},
{"a": "A1", "b": "B1", "c": None},
{"a": None, "b": "B", "c": "C"},
{"a": None, "b": "B", "c": None},
{"a": None, "b": None, "c": "C"},
# Invalid
{"a": "A", "b": None, "c": None},
{"a": "A", "b": None, "c": "C"},
{"a": "A1", "b": "B2", "c": "C"},
{"a": "A", "b": "B1", "c": "C"},
{"a": "A2", "b": "B", "c": "C"},
],
)

lookup_map: dict[str, str] = {"A": "B", "A1": "B1"}

# Validate
report: frictionless.Report = resource.validate(
checklist=frictionless.Checklist(
checks=[
plugins.lookup_match.VLookupMatch(
key_field="a",
value_field="b",
lu_map=lookup_map,
),
]
)
)

# Check
assert not report.valid
row_numbers = report.flatten(["rowNumber"])
assert len(row_numbers) == 5
# Confirm that the rows in error are where we expect
assert all([r[0] > 7 for r in row_numbers])
87 changes: 87 additions & 0 deletions tests/templates/test_survey_occurrence_data_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,3 +427,90 @@ def test_apply_mapping(self, mapper: Mapper) -> None:
res_g = next(graphs)
# Ensure temporal entity added to graph
assert next(res_g.subjects(a, ftn)) is not None


class TestSiteVisitIDSiteIDMap:
"""Tests specific to the provision of a site visit id -> site id map."""

@attrs.define(kw_only=True)
class Scenario:
"""Dataclass to hold the scenario parameters."""

name: str
raws: list[list[str]]
expected_error_codes: set[str] = set()
lookup_map: dict[str, str]

scenarios: list[Scenario] = [
Scenario(
name="valid_with_default_map",
raws=[
["SV1", "S1"],
["SV2", "S1"],
["SV3", "S1"],
["SV4", "S1"],
["", "S1"],
],
lookup_map={"SV1": "S1", "SV2": "S1", "SV3": "S1", "SV4": "S1"},
),
Scenario(
name="invalid_with_default_map",
raws=[
["SV1", "S1"],
["SV2", "S1"],
["SV3", "S1"],
["SV4", "S1"],
],
lookup_map={"SV2": "S2"},
expected_error_codes={"row-constraint"},
),
]

@pytest.mark.parametrize(
argnames="scenario",
argvalues=scenarios,
ids=[scenario.name for scenario in scenarios],
)
def test_apply_validation(self, scenario: Scenario, mocker: pytest_mock.MockerFixture, mapper: Mapper) -> None:
"""Tests the `apply_validation` method with a supplied default map.
Args:
scenario (Scenario): The parameters of the scenario under test.
mocker (pytest_mock.MockerFixture): The mocker fixture.
mapper (Mapper): Mapper instance fixture.
"""
# Construct fake data
rawh = [
"siteVisitID",
"siteID",
]
all_raw = [{hname: val for hname, val in zip(rawh, ln, strict=True)} for ln in scenario.raws]

# Modify schema to only fields required for test
descriptor = {"fields": [field for field in Mapper.schema()["fields"] if field["name"] in rawh]}
descriptor["fields"].sort(key=lambda f: rawh.index(f["name"]))

# Patch the schema for the test
mocker.patch.object(base.mapper.ABISMapper, "schema").return_value = descriptor

# Create raw data csv string
with io.StringIO() as output:
csv_writer = csv.DictWriter(output, fieldnames=rawh)
csv_writer.writeheader()

for row in all_raw:
csv_writer.writerow(row)

csv_data = output.getvalue().encode("utf-8")

# Apply validation
report = mapper.apply_validation(
data=csv_data,
site_visit_id_site_id_map=scenario.lookup_map,
)

# Assert
assert report.valid == (scenario.expected_error_codes == set())
if not report.valid:
error_codes = [code for codes in report.flatten(["type"]) for code in codes]
assert set(error_codes) == scenario.expected_error_codes

0 comments on commit 38c862a

Please sign in to comment.