Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BDRSPS-955 Added siteVisitID -> siteID lookup check for the survey occurrence template #289

Merged
merged 2 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions abis_mapping/plugins/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from . import empty
from . import list
from . import logical_or
from . import lookup_match
from . import mutual_exclusion
from . import mutual_inclusion
from . import required
Expand Down
72 changes: 72 additions & 0 deletions abis_mapping/plugins/lookup_match.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""Provides extra frictionless lookup match checks for the package"""

# Third-Party
import frictionless
import frictionless.errors
import attrs

# Typing
from typing import Iterator, Mapping


@attrs.define(kw_only=True, repr=False)
class VLookupMatch(frictionless.Check):
"""Takes the as a key, the value of one column to perform a VLOOKUP style check.

Validation fails if the cell value for `key_field` does not match any keys of the provided
map. If a null value for key is encountered then check is bypassed.

Attributes:
key_field: name of the column to use as the key for the lookup.
value_field: name of the column to be compared against during lookup comparison.
lu_map: map consisting of the valid combinations value for a given key.
"""

# Check Attributes
type = "vlookup-match"
Errors = [frictionless.errors.RowConstraintError]

# Attributes specific to this check
key_field: str
value_field: str
lu_map: Mapping

def validate_row(self, row: frictionless.Row) -> Iterator[frictionless.Error]:
"""Called to validate the given row (on every row).

Args:
row (frictionless.Row): The row to check the mutual inclusion of.

Yields:
frictionless.Error: For when the mutual inclusion is violated.
"""
# Check for null key
if row[self.key_field] is None:
# Bypass
return

# Confirm key column value exists in map
if self.lu_map.get(row[self.key_field]) is not None:
# Extract lookup values
expected = self.lu_map[row[self.key_field]]
actual = row[self.value_field]

# Perform lookup check
if actual == expected:
# Valid
return
else:
# Customise error note for the result
note = (
f"Expected cell value `{expected}` for field `{self.value_field}` given key"
f" `{row[self.key_field]}` for field `{self.key_field}; got `{actual}`"
)
else:
# Customise note for error
note = f"Index `{row[self.key_field]}` does not exist in the provided lookup map"

# Yield Error
yield frictionless.errors.RowConstraintError.from_row(
row=row,
note=note,
)
13 changes: 13 additions & 0 deletions abis_mapping/templates/survey_occurrence_data_v2/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,15 @@ def apply_validation(self, data: base.types.ReadableType, **kwargs: Any) -> fric
for given siteID.
site_visit_id_temporal_map (dict[str, str]): Default RDF (serialized as turtle)
to use for temporal entity for given siteVisitID.
site_visit_id_site_id_map (dict[str, str]): Valid site ID for a given site visit ID.

Returns:
frictionless.Report: Validation report for the specified data.
"""
# Extract kwargs
site_id_geometry_map = kwargs.get("site_id_geometry_map")
site_visit_id_temporal_map = kwargs.get("site_visit_id_temporal_map")
site_visit_id_site_id_map = kwargs.get("site_visit_id_site_id_map")

# Construct Schema
schema = self.extra_fields_schema(
Expand Down Expand Up @@ -152,6 +154,17 @@ def apply_validation(self, data: base.types.ReadableType, **kwargs: Any) -> fric
],
)

# Modify checklist in the event site visit id to site id map provided
if site_visit_id_site_id_map is not None:
# Add lookup match check
checklist.add_check(
plugins.lookup_match.VLookupMatch(
key_field="siteVisitID",
value_field="siteID",
lu_map=site_visit_id_site_id_map,
)
)

# Modify schema and checklist in the event default temporal map provided
if site_visit_id_temporal_map is not None:
# Need to make sure that required is false from the eventDate field
Expand Down
2 changes: 1 addition & 1 deletion tests/plugins/test_chained_inclusion.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Provides Unit Tests for the `abis_mapping.plugins.mutual_inclusion` module"""
"""Provides Unit Tests for the `abis_mapping.plugins.chained_inclusion` module"""

# Third-Party
import frictionless
Expand Down
51 changes: 51 additions & 0 deletions tests/plugins/test_lookup_match.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""Provides Unit Tests for the `abis_mapping.plugins.lookup_match` module"""

# Third-Party
import frictionless

# Local
from abis_mapping import plugins


def test_checks_vlookup_match() -> None:
"""Tests the VLookupMatch Checker"""
# Construct Fake Resource
resource = frictionless.Resource(
source=[
# Valid
{"a": "A", "b": "B", "c": "C"},
{"a": "A", "b": "B", "c": None},
{"a": "A1", "b": "B1", "c": None},
{"a": None, "b": "B", "c": "C"},
{"a": None, "b": "B", "c": None},
{"a": None, "b": None, "c": "C"},
# Invalid
{"a": "A", "b": None, "c": None},
{"a": "A", "b": None, "c": "C"},
{"a": "A1", "b": "B2", "c": "C"},
{"a": "A", "b": "B1", "c": "C"},
{"a": "A2", "b": "B", "c": "C"},
],
)

lookup_map: dict[str, str] = {"A": "B", "A1": "B1"}

# Validate
report: frictionless.Report = resource.validate(
checklist=frictionless.Checklist(
checks=[
plugins.lookup_match.VLookupMatch(
key_field="a",
value_field="b",
lu_map=lookup_map,
),
]
)
)

# Check
assert not report.valid
row_numbers = report.flatten(["rowNumber"])
assert len(row_numbers) == 5
# Confirm that the rows in error are where we expect
assert all([r[0] > 7 for r in row_numbers])
87 changes: 87 additions & 0 deletions tests/templates/test_survey_occurrence_data_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,3 +427,90 @@ def test_apply_mapping(self, mapper: Mapper) -> None:
res_g = next(graphs)
# Ensure temporal entity added to graph
assert next(res_g.subjects(a, ftn)) is not None


class TestSiteVisitIDSiteIDMap:
"""Tests specific to the provision of a site visit id -> site id map."""

@attrs.define(kw_only=True)
class Scenario:
"""Dataclass to hold the scenario parameters."""

name: str
raws: list[list[str]]
expected_error_codes: set[str] = set()
lookup_map: dict[str, str]

scenarios: list[Scenario] = [
Scenario(
name="valid_with_default_map",
raws=[
["SV1", "S1"],
["SV2", "S1"],
["SV3", "S1"],
["SV4", "S1"],
["", "S1"],
],
lookup_map={"SV1": "S1", "SV2": "S1", "SV3": "S1", "SV4": "S1"},
),
Scenario(
name="invalid_with_default_map",
raws=[
["SV1", "S1"],
["SV2", "S1"],
["SV3", "S1"],
["SV4", "S1"],
],
lookup_map={"SV2": "S2"},
expected_error_codes={"row-constraint"},
),
]

@pytest.mark.parametrize(
argnames="scenario",
argvalues=scenarios,
ids=[scenario.name for scenario in scenarios],
)
def test_apply_validation(self, scenario: Scenario, mocker: pytest_mock.MockerFixture, mapper: Mapper) -> None:
"""Tests the `apply_validation` method with a supplied default map.

Args:
scenario (Scenario): The parameters of the scenario under test.
mocker (pytest_mock.MockerFixture): The mocker fixture.
mapper (Mapper): Mapper instance fixture.
"""
# Construct fake data
rawh = [
"siteVisitID",
"siteID",
]
all_raw = [{hname: val for hname, val in zip(rawh, ln, strict=True)} for ln in scenario.raws]

# Modify schema to only fields required for test
descriptor = {"fields": [field for field in Mapper.schema()["fields"] if field["name"] in rawh]}
descriptor["fields"].sort(key=lambda f: rawh.index(f["name"]))

# Patch the schema for the test
mocker.patch.object(base.mapper.ABISMapper, "schema").return_value = descriptor

# Create raw data csv string
with io.StringIO() as output:
csv_writer = csv.DictWriter(output, fieldnames=rawh)
csv_writer.writeheader()

for row in all_raw:
csv_writer.writerow(row)

csv_data = output.getvalue().encode("utf-8")

# Apply validation
report = mapper.apply_validation(
data=csv_data,
site_visit_id_site_id_map=scenario.lookup_map,
)

# Assert
assert report.valid == (scenario.expected_error_codes == set())
if not report.valid:
error_codes = [code for codes in report.flatten(["type"]) for code in codes]
assert set(error_codes) == scenario.expected_error_codes