diff --git a/abis_mapping/plugins/__init__.py b/abis_mapping/plugins/__init__.py index cd9acc60..1619d08e 100644 --- a/abis_mapping/plugins/__init__.py +++ b/abis_mapping/plugins/__init__.py @@ -9,6 +9,7 @@ from . import logical_or from . import mutual_exclusion from . import mutual_inclusion +from . import required from . import sites_geometry from . import tabular from . import timestamp diff --git a/abis_mapping/plugins/logical_or.py b/abis_mapping/plugins/logical_or.py index f6a5a61c..a2504fdf 100644 --- a/abis_mapping/plugins/logical_or.py +++ b/abis_mapping/plugins/logical_or.py @@ -6,7 +6,7 @@ import attrs # Typing -from typing import Iterator, Any +from typing import Iterator @attrs.define(kw_only=True, repr=False) @@ -21,11 +21,6 @@ class LogicalOr(frictionless.Check): # Field names to perform check on field_names: list[str] - # Special case check, occurs if value not provided in field_names - # fields then checks current row field provided as key to foreign_keys - # and ensures its value is provided in the corresponding set. - foreign_keys: dict[str, set[Any]] = dict() - def validate_row(self, row: frictionless.Row) -> Iterator[frictionless.Error]: """Called to validate the given row (on every row). @@ -42,22 +37,8 @@ def validate_row(self, row: frictionless.Row) -> Iterator[frictionless.Error]: if len(filtered) > 0: return - # Perform special case check on foreign key sets - row_fk_map = { - field_name: row[field_name] for field_name, fk_set in self.foreign_keys.items() if row[field_name] in fk_set - } - - # If there is at least one item in the dictionary then it is deemed valid. - if len(row_fk_map) > 0: - return - - # Create error note base on values provided to the check - note = f"the fields {self.field_names}" - note += f" and foreign key fields {self.foreign_keys.keys()}" if len(self.foreign_keys) > 0 else "" - note += " are constrained by logical OR, one or more value must be provided" - # Yield Error yield frictionless.errors.RowConstraintError.from_row( row=row, - note=note, + note=f"the fields {self.field_names} are constrained by logical OR, one or more value must be provided", ) diff --git a/abis_mapping/plugins/mutual_inclusion.py b/abis_mapping/plugins/mutual_inclusion.py index 230f2d16..29132d0b 100644 --- a/abis_mapping/plugins/mutual_inclusion.py +++ b/abis_mapping/plugins/mutual_inclusion.py @@ -40,11 +40,13 @@ def validate_row(self, row: frictionless.Row) -> Iterator[frictionless.Error]: # Short-circuit return + note = ( + f"the columns {self.field_names} are mutually inclusive and values" + f" must be provided together (columns {missing} are missing values)" + ) + # Yield Error yield frictionless.errors.RowConstraintError.from_row( row=row, - note=( - f"the columns {self.field_names} are mutually inclusive and must be provided together " - f"(columns {missing} are missing)" - ), + note=note, ) diff --git a/abis_mapping/plugins/required.py b/abis_mapping/plugins/required.py new file mode 100644 index 00000000..df8f82eb --- /dev/null +++ b/abis_mapping/plugins/required.py @@ -0,0 +1,83 @@ +"""Provides extra frictionless bypassable row-wise required checks for the package""" + +# Third-Party +import frictionless +import frictionless.errors +import attrs + +# Typing +from typing import Iterable + + +@attrs.define(kw_only=True, repr=False) +class RequiredEnhanced(frictionless.Check): + """Checks whether specified columns are all provided in a row-wise manner. + + It also allows the bypassing of the constraint through the provision + of whitelists. Note: This check is only effective when the original + schema for each field is `required = False`, otherwise the check, + does nothing. + + Attributes: + field_names (list[str]): Names of fields in the row to be checked. + whitelists (dict[str, set]): A dictionary with the key corresponding + to a fieldname in the row, not necessarily provided in `field_names, + which contains a set of values which will allow the check to be + bypassed, if encountered as a value in any of that given row's + corresponding fields' cells + """ + + # Check Attributes + type = "required-enhanced" + Errors = [frictionless.errors.RowConstraintError] + + # Attributes specific to this check + field_names: list[str] + whitelists: dict[str, set] = {} + + def validate_start(self) -> Iterable[frictionless.Error]: + """Called to validate the resource after opening + + Yields: + Error: found errors + """ + # Check whitelist keys correspond to fields + for f in self.whitelists: + if f not in self.resource.schema.field_names: + note = f"required enhanced value check requires field {f} to exist" + yield frictionless.errors.CheckError(note=note) + + def validate_row(self, row: frictionless.Row) -> Iterable[frictionless.Error]: + """Called to validate the given row (on every row). + + Args: + row (frictionless.Row): The row to check the required enhanced of. + + Yields: + frictionless.Error: For when the required enhanced is violated. + """ + # Retrieve Field Names for Missing Cells + missing = [f for f in self.field_names if not row[f]] + + # Check to see if any missing fields found + if len(missing) == 0: + return + + # Determine if rule is bypassable + bypassable_values = [row[k] for k, v in self.whitelists.items() if row[k] in v] + if len(bypassable_values) > 0: + return + + note = f"the columns {self.field_names} are all required" + if self.whitelists: + note += ( + f" unless the values provided in fields {self.whitelists.keys()}" + " match any of those in their supplied whitelists" + ) + note += f" ({missing} are missing)." + + # Yield Error + yield frictionless.errors.RowConstraintError.from_row( + row=row, + note=note, + ) diff --git a/abis_mapping/templates/survey_occurrence_data_v2/schema.json b/abis_mapping/templates/survey_occurrence_data_v2/schema.json index 77922d15..7f45091a 100644 --- a/abis_mapping/templates/survey_occurrence_data_v2/schema.json +++ b/abis_mapping/templates/survey_occurrence_data_v2/schema.json @@ -741,14 +741,14 @@ { "fields": "siteID", "reference": { - "resource": "survey_site_data.csv", + "resource": "survey_site_data", "fields": "siteID" } }, { "fields": "siteVisitID", "reference": { - "resource": "survey_site_visit_data.csv", + "resource": "survey_site_visit_data", "fields": "siteVisitID" } } diff --git a/abis_mapping/templates/survey_site_visit_data_v2/mapping.py b/abis_mapping/templates/survey_site_visit_data_v2/mapping.py index 7f9b4493..07a7b717 100644 --- a/abis_mapping/templates/survey_site_visit_data_v2/mapping.py +++ b/abis_mapping/templates/survey_site_visit_data_v2/mapping.py @@ -43,34 +43,50 @@ def apply_validation( frictionless.Report: Validation report for the specified data. """ # Extract keyword arguments - # TODO: Uncomment - # site_visit_id_map: dict[str, bool] = kwargs.get("site_visit_id_map", {}) + site_visit_id_map: dict[str, bool] = kwargs.get("site_visit_id_map", {}) # Construct schema schema = self.extra_fields_schema(data=data, full_schema=True) # Construct resource - resource = frictionless.Resource( + resource_site_visit_data = frictionless.Resource( source=data, format="csv", schema=schema, encoding="utf-8", ) - # Validate - report = resource.validate( + # Base extra custom checks + checks = [ + plugins.tabular.IsTabular(), + plugins.empty.NotEmpty(), + plugins.chronological.ChronologicalOrder( + field_names=["siteVisitStart", "siteVisitEnd"], + ), + plugins.logical_or.LogicalOr( + field_names=["siteVisitStart", "siteVisitEnd"], + ), + ] + + # Check to see if site visit id map was provided or was empty + if site_visit_id_map: + # Construct foreign key map + fk_map = {"siteVisitID": set(site_visit_id_map)} + + # Add custom check for temporal flexibility with whitelists. Here deferring + # the check on any ids found in the occurrence template, to when validaation + # occurs on it in line with temporal flexibility rules. + checks += [ + plugins.required.RequiredEnhanced( + field_names=["siteVisitStart"], + whitelists=fk_map, + ) + ] + + # Validate the site visit resource + report: frictionless.Report = resource_site_visit_data.validate( checklist=frictionless.Checklist( - checks=[ - # Extra custom checks - plugins.tabular.IsTabular(), - plugins.empty.NotEmpty(), - plugins.logical_or.LogicalOr( - field_names=["siteVisitStart", "siteVisitEnd"], - ), - plugins.chronological.ChronologicalOrder( - field_names=["siteVisitStart", "siteVisitEnd"], - ), - ], + checks=checks, ), ) @@ -109,7 +125,10 @@ def extract_temporal_defaults( start_date: types.temporal.Timestamp = row["siteVisitStart"] end_date: types.temporal.Timestamp = row["siteVisitEnd"] site_visit_id: str = row["siteVisitID"] - if not start_date and not end_date: + + # Temporal flexibility is dependent upon a start_date being + # present only. + if not start_date: continue # Create new graph @@ -146,12 +165,12 @@ def add_temporal_coverage_bnode( # Create temporal coverage node temporal_coverage = rdflib.BNode() graph.add((temporal_coverage, a, rdflib.TIME.TemporalEntity)) - if start_date: + if start_date is not None: begin = rdflib.BNode() graph.add((temporal_coverage, rdflib.TIME.hasBeginning, begin)) graph.add((begin, a, rdflib.TIME.Instant)) graph.add((begin, start_date.rdf_in_xsd, start_date.to_rdf_literal())) - if end_date: + if end_date is not None: end = rdflib.BNode() graph.add((temporal_coverage, rdflib.TIME.hasEnd, end)) graph.add((end, a, rdflib.TIME.Instant)) diff --git a/abis_mapping/templates/survey_site_visit_data_v2/schema.json b/abis_mapping/templates/survey_site_visit_data_v2/schema.json index 4ecd34eb..bc579938 100644 --- a/abis_mapping/templates/survey_site_visit_data_v2/schema.json +++ b/abis_mapping/templates/survey_site_visit_data_v2/schema.json @@ -1,165 +1,165 @@ { - "fields": [ - { - "name": "surveyID", - "title": "SurveyID", - "description": "The identifier of the Survey that the Site is related to in this dataset.", - "example": "AR220-01", - "type": "string", - "format": "default", - "constraints": { - "required": false - } - }, - { - "name": "siteID", - "title": "Site ID", - "description": "A unique within dataset string identifier for the site. Valid values include strings that are used specifically for this survey or URIs from BDR Sites that have been established in previous surveys.", - "example": "P1", - "type": "string", - "format": "default", - "constraints": { - "required": true - } - }, - { - "name": "siteIDSource", - "title": "Site ID Source", - "description": "The organisation that assigned the SiteID to this Site", - "example": "TERN", - "type": "string", - "format": "default", - "constraints": { - "required": false - } - }, - { - "name": "siteVisitID", - "title": "Site Visit ID", - "description": "The unique key assigned to a visit. A visit is a time distinct assessment conducted within a survey at a designated site.", - "example": "CPXEI0000001", - "type": "string", - "format": "default", - "constraints": { - "required": true, - "unique": true - } - }, - { - "name": "siteVisitStart", - "title": "Site Visit Start", - "description": "The temporal start of when the Site was being used to collect data for the survey. Expected values include date, dateTime, dateTimeStamp.", - "example": "2016-02-28", - "type": "timestamp", - "format": "default", - "constraints": { - "required": false - } - }, - { - "name": "siteVisitEnd", - "title": "Site Visit End", - "description": "The temporal end of when the Site was being used to collect data for the survey. Expected values include date, dateTime, dateTimeStamp.", - "example": "2016-02-28", - "type": "timestamp", - "format": "default", - "constraints": { - "required": false - } - }, - { - "name": "visitOrgs", - "title": "Visit Orgs", - "description": "The names of the organisations responsible for recording the original Occurrence.", - "example": "NSW Dept of Planning, Industry and Environment.", - "type": "list", - "format": "default", - "constraints": { - "required": false - } - }, - { - "name": "visitObservers", - "title": "Visit Observers", - "description": "A list (concatenated and separated using |) of names of people, groups, or organisations responsible for recording the original Occurrence.", - "example": "Oliver P. Pearson | Anita K. Pearson", - "type": "list", - "format": "default", - "constraints": { - "required": false - } - }, - { - "name": "condition", - "title": "Condition", - "description": "The state of a patch of vegetation at the time of sampling relative to some specified standard or benchmark (where available).", - "example": "Burnt", - "type": "string", - "format": "default", - "constraints": { - "required": false - } - }, - { - "name": "targetTaxonomicScope", - "title": "Target Taxonomic Scope", - "description": "The taxonomic group targeted for sampling during the Site Visit", - "example": "Coleoptera", - "type": "string", - "format": "default", - "constraints": { - "required": false - } - }, - { - "name": "protocolName", - "title": "Protocol Name", - "description": "Categorical descriptive name for the method used during the Site Visit.", - "example": "harpTrapping", - "type": "string", - "format": "default", - "constraints": { - "required": false - }, - "vocabularies": [ - "VISIT_PROTOCOL_NAME" - ] - }, - { - "name": "protocolDescription", - "title": "Protocol Description", - "description": "A detailed description of the method used during the Site Visit. The description may include deviations from a protocol referred to in eco:protocolReferences. Recommended good practice is to provide information about instruments used, calibration, etc.", - "example": "Three conventional harp traps (3.2m ht x 2.2m w) were established in flight path zones for a period of 4 hrs at dawn and dusk for a total of 10 trap nights. Traps were visited on an hourly basis during each deployment period and the trap catch recorded for species, size, weight, sex, age and maternal status.", - "type": "string", - "format": "default", - "constraints": { - "required": false - } - }, - { - "name": "samplingEffortValue", - "title": "Sample Effort", - "description": "Similar to eco:samplingEffortValue. The total sampling effort value. A samplingEffortValue must have a corresponding samplingEffortUnit", - "example": "20 x 12", - "type": "string", - "format": "default", - "constraints": { - "required": false - } - }, - { - "name": "samplingEffortUnit", - "title": "Sampling Effort Units", - "description": "Similar to eco:samplingEffortUnit. The units associated with samplingEffortValue.", - "example": "trapDays", - "type": "string", - "format": "default", - "constraints": { - "required": false - }, - "vocabularies": [ - "SAMPLING_EFFORT_UNIT" - ] - } - ] + "fields": [ + { + "name": "surveyID", + "title": "SurveyID", + "description": "The identifier of the Survey that the Site is related to in this dataset.", + "example": "AR220-01", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "siteID", + "title": "Site ID", + "description": "A unique within dataset string identifier for the site. Valid values include strings that are used specifically for this survey or URIs from BDR Sites that have been established in previous surveys.", + "example": "P1", + "type": "string", + "format": "default", + "constraints": { + "required": true + } + }, + { + "name": "siteIDSource", + "title": "Site ID Source", + "description": "The organisation that assigned the SiteID to this Site", + "example": "TERN", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "siteVisitID", + "title": "Site Visit ID", + "description": "The unique key assigned to a visit. A visit is a time distinct assessment conducted within a survey at a designated site.", + "example": "CPXEI0000001", + "type": "string", + "format": "default", + "constraints": { + "required": true, + "unique": true + } + }, + { + "name": "siteVisitStart", + "title": "Site Visit Start", + "description": "The temporal start of when the Site was being used to collect data for the survey. Expected values include date, dateTime, dateTimeStamp.", + "example": "2016-02-28", + "type": "timestamp", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "siteVisitEnd", + "title": "Site Visit End", + "description": "The temporal end of when the Site was being used to collect data for the survey. Expected values include date, dateTime, dateTimeStamp.", + "example": "2016-02-28", + "type": "timestamp", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "visitOrgs", + "title": "Visit Orgs", + "description": "The names of the organisations responsible for recording the original Occurrence.", + "example": "NSW Dept of Planning, Industry and Environment.", + "type": "list", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "visitObservers", + "title": "Visit Observers", + "description": "A list (concatenated and separated using |) of names of people, groups, or organisations responsible for recording the original Occurrence.", + "example": "Oliver P. Pearson | Anita K. Pearson", + "type": "list", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "condition", + "title": "Condition", + "description": "The state of a patch of vegetation at the time of sampling relative to some specified standard or benchmark (where available).", + "example": "Burnt", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "targetTaxonomicScope", + "title": "Target Taxonomic Scope", + "description": "The taxonomic group targeted for sampling during the Site Visit", + "example": "Coleoptera", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "protocolName", + "title": "Protocol Name", + "description": "Categorical descriptive name for the method used during the Site Visit.", + "example": "harpTrapping", + "type": "string", + "format": "default", + "constraints": { + "required": false + }, + "vocabularies": [ + "VISIT_PROTOCOL_NAME" + ] + }, + { + "name": "protocolDescription", + "title": "Protocol Description", + "description": "A detailed description of the method used during the Site Visit. The description may include deviations from a protocol referred to in eco:protocolReferences. Recommended good practice is to provide information about instruments used, calibration, etc.", + "example": "Three conventional harp traps (3.2m ht x 2.2m w) were established in flight path zones for a period of 4 hrs at dawn and dusk for a total of 10 trap nights. Traps were visited on an hourly basis during each deployment period and the trap catch recorded for species, size, weight, sex, age and maternal status.", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "samplingEffortValue", + "title": "Sample Effort", + "description": "Similar to eco:samplingEffortValue. The total sampling effort value. A samplingEffortValue must have a corresponding samplingEffortUnit", + "example": "20 x 12", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "samplingEffortUnit", + "title": "Sampling Effort Units", + "description": "Similar to eco:samplingEffortUnit. The units associated with samplingEffortValue.", + "example": "trapDays", + "type": "string", + "format": "default", + "constraints": { + "required": false + }, + "vocabularies": [ + "SAMPLING_EFFORT_UNIT" + ] + } + ] } diff --git a/tests/plugins/test_logical_or.py b/tests/plugins/test_logical_or.py index 1053cf2a..ca3d5cbe 100644 --- a/tests/plugins/test_logical_or.py +++ b/tests/plugins/test_logical_or.py @@ -2,7 +2,6 @@ # Third-Party import frictionless -import pytest # Local from abis_mapping import plugins @@ -40,45 +39,3 @@ def test_check_logical_or() -> None: # Check assert not report.valid assert len(report.flatten()) == 1 - - -@pytest.mark.parametrize( - "fk_set,n_err", - [ - ({"d": {"D", "DD", "DDD"}}, 0), - ({"e": {"E"}, "d": {"D"}}, 0), - ({"e": {"D"}, "d": {"E"}}, 1), - ], -) -def test_check_logical_or_with_foreign_keys(fk_set: dict[str, set[str]], n_err: int) -> None: - """Tests the logical or checker with foreign keys provided.""" - # Construct fake resource - resource = frictionless.Resource( - source=[ - # Valid - {"a": "A", "b": None, "c": None, "d": "D", "e": "E"}, - {"a": None, "b": "B", "c": None, "d": "D", "e": "E"}, - {"a": None, "b": None, "c": "C", "d": "D", "e": "E"}, - {"a": "A", "b": "B", "c": None, "d": "D", "e": "E"}, - {"a": "A", "b": None, "c": "C", "d": "D", "e": "E"}, - {"a": None, "b": "B", "c": "C", "d": "D", "e": "E"}, - {"a": "A", "b": "B", "c": "C", "d": "D", "e": "E"}, - # Invalid - {"a": None, "b": None, "c": None, "d": "D", "e": "E"}, - ] - ) - - report = resource.validate( - checklist=frictionless.Checklist( - checks=[ - plugins.logical_or.LogicalOr( - field_names=["a", "b", "c"], - foreign_keys=fk_set, - ) - ] - ) - ) - - # Check - assert report.valid == (n_err == 0) - assert len(report.flatten()) == n_err diff --git a/tests/plugins/test_required.py b/tests/plugins/test_required.py new file mode 100644 index 00000000..6dd761ea --- /dev/null +++ b/tests/plugins/test_required.py @@ -0,0 +1,109 @@ +"""Provides Unit Tests for the `plugins.required` module""" + +# Third-party +import frictionless +import pytest + +# Local +from abis_mapping import plugins + + +def test_check_required_enhanced() -> None: + """Tests the MutuallyInclusive Checker""" + # Construct Fake Resource + resource = frictionless.Resource( + source=[ + # Valid + {"a": "A", "b": "B", "c": "C"}, + {"a": "A", "b": "B", "c": None}, + # Invalid + {"a": None, "b": None, "c": "C"}, + {"a": None, "b": "B", "c": "C"}, + {"a": None, "b": "B", "c": None}, + {"a": "A", "b": None, "c": "C"}, + {"a": "A", "b": None, "c": None}, + ], + ) + + # Validate + report: frictionless.Report = resource.validate( + checklist=frictionless.Checklist( + checks=[ + plugins.required.RequiredEnhanced( + field_names=["a", "b"], + ), + ] + ) + ) + + # Check + assert not report.valid + assert len(report.flatten()) == 5 + + +@pytest.mark.parametrize( + "fk_set,n_err", + [ + ({"d": {"D", "DD", "DDD"}}, 0), + ({"e": {"E"}, "d": {"D"}}, 0), + ({"e": {"D"}, "d": {"E"}}, 7), + ], +) +def test_check_required_enhanced_with_whitelists(fk_set: dict[str, set[str]], n_err: int) -> None: + """Tests the enhanced required checker with whitelists provided.""" + # Construct fake resource + resource = frictionless.Resource( + source=[ + {"a": "A", "b": None, "c": None, "d": "D", "e": "E"}, + {"a": None, "b": "B", "c": None, "d": "D", "e": "E"}, + {"a": None, "b": None, "c": "C", "d": "D", "e": "E"}, + {"a": "A", "b": "B", "c": None, "d": "D", "e": "E"}, + {"a": "A", "b": None, "c": "C", "d": "D", "e": "E"}, + {"a": None, "b": "B", "c": "C", "d": "D", "e": "E"}, + {"a": "A", "b": "B", "c": "C", "d": "D", "e": "E"}, + {"a": None, "b": None, "c": None, "d": "D", "e": "E"}, + ] + ) + + report = resource.validate( + checklist=frictionless.Checklist( + checks=[ + plugins.required.RequiredEnhanced( + field_names=["a", "b", "c"], + whitelists=fk_set, + ) + ] + ) + ) + + # Check + assert report.valid == (n_err == 0) + assert len(report.flatten()) == n_err + assert report.flatten(["type"]) == [["row-constraint"]] * n_err + + +def test_check_required_enhanced_with_invalid_whitelist() -> None: + """Tests the enhanced required checker with an invalid whitelist.""" + # Construct fake resource + resource = frictionless.Resource( + source=[ + {"a": "A", "b": None, "c": None, "d": "D", "e": "E"}, + {"a": None, "b": "B", "c": None, "d": "D", "e": "E"}, + {"a": None, "b": None, "c": "C", "d": "D", "e": "E"}, + {"a": "A", "b": "B", "c": None, "d": "D", "e": "E"}, + {"a": "A", "b": None, "c": "C", "d": "D", "e": "E"}, + {"a": None, "b": "B", "c": "C", "d": "D", "e": "E"}, + {"a": "A", "b": "B", "c": "C", "d": "D", "e": "E"}, + {"a": None, "b": None, "c": None, "d": "D", "e": "E"}, + ] + ) + + report = resource.validate( + checklist=frictionless.Checklist( + checks=[plugins.required.RequiredEnhanced(field_names=["d", "e"], whitelists={"fake": {"FAKE"}})] + ) + ) + + assert not report.valid + assert len(report.flatten()) == 1 + assert report.flatten(["type"]) == [["check-error"]] diff --git a/tests/templates/test_survey_site_visit_data_v2.py b/tests/templates/test_survey_site_visit_data_v2.py index 7782c5fe..f60fd354 100644 --- a/tests/templates/test_survey_site_visit_data_v2.py +++ b/tests/templates/test_survey_site_visit_data_v2.py @@ -5,9 +5,9 @@ import dataclasses import io import pathlib -import unittest # Third-party +import pandas as pd import pyshacl import pytest import pytest_mock @@ -15,14 +15,30 @@ # Local from abis_mapping import types -import abis_mapping.templates.survey_site_visit_data_v2.mapping +from abis_mapping.templates.survey_site_visit_data_v2 import mapping # Typing -from typing import Callable +from typing import Callable, Iterator -# Alias mapper -Mapper = abis_mapping.templates.survey_site_visit_data_v2.mapping.SurveySiteVisitMapper +@pytest.fixture +def mapper() -> Iterator[mapping.SurveySiteVisitMapper]: + """Provides site visit mapper for tests. + + Yields: + SurveySiteVisitMapper: site visit mapper instance. + """ + # Create mapper + mapper = mapping.SurveySiteVisitMapper() + + # Clear schema cache + mapper.schema.cache_clear() + + # Yield mapper + yield mapper + + # Clear schema cache again + mapper.schema.cache_clear() @dataclasses.dataclass @@ -57,12 +73,15 @@ class Scenario: argvalues=scenarios, ids=[s.name for s in scenarios], ) -def test_add_temporal_coverage_node(graph_comparer: Callable, scenario: Scenario) -> None: +def test_add_temporal_coverage_node( + graph_comparer: Callable, scenario: Scenario, mapper: mapping.SurveySiteVisitMapper +) -> None: """Tests the graph output from add_temporal_coverage_node method. Args: scenario (Scenario): Data structure containing test parameters. graph_comparer (Callable): Graph comparer fixture. + mapper (SurveySiteVisitMapper): Site visit mapper instance fixture. """ # Parse dates date_fn = lambda x: types.temporal.parse_timestamp(x) if x is not None else None # noqa: E731 @@ -72,9 +91,6 @@ def test_add_temporal_coverage_node(graph_comparer: Callable, scenario: Scenario # Create graph graph = rdflib.Graph() - # Create mapper - mapper = Mapper() - # Invoke mapper.add_temporal_coverage_bnode( graph=graph, @@ -92,18 +108,19 @@ def test_add_temporal_coverage_node(graph_comparer: Callable, scenario: Scenario class TestExtractTemporalDefaults: - @pytest.fixture - def mocked_schema(self, mocker: pytest_mock.MockerFixture) -> unittest.mock.MagicMock: - """Patches and returns mock for schema method on mapper. + def test_extract_temporal_defaults( + self, + mapper: mapping.SurveySiteVisitMapper, + mocker: pytest_mock.MockerFixture, + ) -> None: + """Tests the extract_temporal_defaults method. Args: - mocker (pytest_mock.MockerFixture): Mocker fixture. - - Returns: - unittest.mock.MagicMock: Mocked schema. + mapper (SurveySiteVisitMapper): Site visit mapper instance fixture. + mocker (pytest_mock.MockerFixture): The mocker fixture """ # Retrieve actual descriptor - descriptor = Mapper.schema() + descriptor = mapping.SurveySiteVisitMapper.schema() # Define fields of relevance for tests fieldnames = ["siteVisitID", "siteVisitStart", "siteVisitEnd"] @@ -111,15 +128,9 @@ def mocked_schema(self, mocker: pytest_mock.MockerFixture) -> unittest.mock.Magi # Make descriptor only include these fields descriptor["fields"] = [f for f in descriptor["fields"] if f["name"] in fieldnames] - # Patch and return - return mocker.patch.object(Mapper, "schema", return_value=descriptor) - - def test_extract_temporal_defaults(self, mocked_schema: unittest.mock.MagicMock) -> None: - """Tests the extract_temporal_defaults method. + # Patch schema + mocked_schema = mocker.patch.object(mapping.SurveySiteVisitMapper, "schema", return_value=descriptor) - Args: - mocked_schema (unittest.mock.MagicMock): Mocked schema method fixture. - """ # Declare some raw data rows = [ { @@ -131,19 +142,18 @@ def test_extract_temporal_defaults(self, mocked_schema: unittest.mock.MagicMock) "siteVisitID": "SV2", "siteVisitStart": "2024-10-14", }, + # The map should exclude these since there are no + # values for default temporal entity must have start date { "siteVisitID": "SV3", "siteVisitEnd": "2025-10-14", }, - # The map should exclude this since there are no - # values for temporal entity provided without error { "siteVisitID": "SV4", }, ] # Build elements for expected map - graphs = [rdflib.Graph() for _ in range(3)] - mapper = Mapper() + graphs = [rdflib.Graph() for _ in range(2)] for g, r in zip(graphs, rows, strict=False): raw_start = r.get("siteVisitStart") raw_end = r.get("siteVisitEnd") @@ -164,7 +174,99 @@ def test_extract_temporal_defaults(self, mocked_schema: unittest.mock.MagicMock) csv_data = output.getvalue().encode("utf-8") # Invoke - actual = Mapper().extract_temporal_defaults(csv_data) + actual = mapper.extract_temporal_defaults(csv_data) # Assert assert actual == expected + mocked_schema.assert_called_once() + + +class TestApplyValidation: + @pytest.fixture(scope="class") + def data(self) -> bytes: + """Takes an existing csv path and returns it unmodified. + + The csv returned is expected to have both start and end dates plus site visit id + included for all rows. + """ + # Create path object and return contents + return pathlib.Path("abis_mapping/templates/survey_site_visit_data_v2/examples/minimal.csv").read_bytes() + + def _nullify_columns(self, columns: list[str], data: bytes) -> bytes: + """Replaces any values in specified csv colunms with null. + + Args: + columns (list[str]): Field names in supplied csv + to make null values. + data (bytes): Original csv data to modify. + + Returns: + bytes: Modified csv. + """ + # Create dataframe from existing csv + df = pd.read_csv(io.BytesIO(data)) + # Set all values for columns to null + for col in columns: + df[col].values[:] = pd.NA + # Return csv + result: bytes = df.to_csv(index=False).encode("utf-8") + return result + + @pytest.fixture(scope="class") + def data_no_start_date(self, data: bytes) -> bytes: + """Modifies existing csv and sets all start dates to null. + + Args: + data (bytes): The original data fixture + + Returns: + bytes: Modified csv. + """ + return self._nullify_columns(["siteVisitStart"], data) + + @pytest.fixture(scope="class") + def data_no_end_date(self, data: bytes) -> bytes: + """Modifies existing csv and sets all end dates to null. + + Args: + data (bytes): The original csv data fixture. + + Returns: + bytes: Modified csv. + """ + return self._nullify_columns(["siteVisitEnd"], data) + + def test_with_site_visit_id_map(self, mapper: mapping.SurveySiteVisitMapper, data_no_start_date: bytes) -> None: + """Tests the apply_validation method with site_visit_id map supplied and no start date. + + Args: + mapper (SurveySiteVisitMapper): Site visit mapper instance fixture. + data_no_start_date (bytes): Csv with no start dates. + """ + # Construct map + svid_map = {"VA-99": True, "FAKEID": True} + + # Invoke + report = mapper.apply_validation(data_no_start_date, site_visit_id_map=svid_map) + + # Assert + assert report.valid + + def test_with_site_visit_id_map_invalid( + self, mapper: mapping.SurveySiteVisitMapper, data_no_start_date: bytes + ) -> None: + """Tests the apply_validation method with site_visit_id_map supplied and no corresponding id in map. + + Args: + mapper (mapping.SurveySiteVisitMapper): Site visit mapper instances fixture. + data_no_start_date (bytes): Csv with no start dates. + """ + # Construct map + svid_map = {"FAKEID": True} + + # Invoke + report = mapper.apply_validation(data_no_start_date, site_visit_id_map=svid_map) + + # Assert and check errors + assert not report.valid + assert report.flatten(["type"]) == [["row-constraint"]]