Skip to content

Commit

Permalink
BDRSPS-913 Added site visit id cross validation input to site visit t…
Browse files Browse the repository at this point in the history
…emplate. (#264)

* Added site visit id cross validation input to site visit template. 
* Removed logical or foreign key logic.
* Created enhanced required custom check and used that within the site visit validation.
* Removed schema patching fixture from site visit tests.
  • Loading branch information
joecrowleygaia authored Oct 17, 2024
1 parent 3523f9f commit 4d9c9f6
Show file tree
Hide file tree
Showing 10 changed files with 536 additions and 282 deletions.
1 change: 1 addition & 0 deletions abis_mapping/plugins/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from . import logical_or
from . import mutual_exclusion
from . import mutual_inclusion
from . import required
from . import sites_geometry
from . import tabular
from . import timestamp
Expand Down
23 changes: 2 additions & 21 deletions abis_mapping/plugins/logical_or.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import attrs

# Typing
from typing import Iterator, Any
from typing import Iterator


@attrs.define(kw_only=True, repr=False)
Expand All @@ -21,11 +21,6 @@ class LogicalOr(frictionless.Check):
# Field names to perform check on
field_names: list[str]

# Special case check, occurs if value not provided in field_names
# fields then checks current row field provided as key to foreign_keys
# and ensures its value is provided in the corresponding set.
foreign_keys: dict[str, set[Any]] = dict()

def validate_row(self, row: frictionless.Row) -> Iterator[frictionless.Error]:
"""Called to validate the given row (on every row).
Expand All @@ -42,22 +37,8 @@ def validate_row(self, row: frictionless.Row) -> Iterator[frictionless.Error]:
if len(filtered) > 0:
return

# Perform special case check on foreign key sets
row_fk_map = {
field_name: row[field_name] for field_name, fk_set in self.foreign_keys.items() if row[field_name] in fk_set
}

# If there is at least one item in the dictionary then it is deemed valid.
if len(row_fk_map) > 0:
return

# Create error note base on values provided to the check
note = f"the fields {self.field_names}"
note += f" and foreign key fields {self.foreign_keys.keys()}" if len(self.foreign_keys) > 0 else ""
note += " are constrained by logical OR, one or more value must be provided"

# Yield Error
yield frictionless.errors.RowConstraintError.from_row(
row=row,
note=note,
note=f"the fields {self.field_names} are constrained by logical OR, one or more value must be provided",
)
10 changes: 6 additions & 4 deletions abis_mapping/plugins/mutual_inclusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,13 @@ def validate_row(self, row: frictionless.Row) -> Iterator[frictionless.Error]:
# Short-circuit
return

note = (
f"the columns {self.field_names} are mutually inclusive and values"
f" must be provided together (columns {missing} are missing values)"
)

# Yield Error
yield frictionless.errors.RowConstraintError.from_row(
row=row,
note=(
f"the columns {self.field_names} are mutually inclusive and must be provided together "
f"(columns {missing} are missing)"
),
note=note,
)
83 changes: 83 additions & 0 deletions abis_mapping/plugins/required.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""Provides extra frictionless bypassable row-wise required checks for the package"""

# Third-Party
import frictionless
import frictionless.errors
import attrs

# Typing
from typing import Iterable


@attrs.define(kw_only=True, repr=False)
class RequiredEnhanced(frictionless.Check):
"""Checks whether specified columns are all provided in a row-wise manner.
It also allows the bypassing of the constraint through the provision
of whitelists. Note: This check is only effective when the original
schema for each field is `required = False`, otherwise the check,
does nothing.
Attributes:
field_names (list[str]): Names of fields in the row to be checked.
whitelists (dict[str, set]): A dictionary with the key corresponding
to a fieldname in the row, not necessarily provided in `field_names,
which contains a set of values which will allow the check to be
bypassed, if encountered as a value in any of that given row's
corresponding fields' cells
"""

# Check Attributes
type = "required-enhanced"
Errors = [frictionless.errors.RowConstraintError]

# Attributes specific to this check
field_names: list[str]
whitelists: dict[str, set] = {}

def validate_start(self) -> Iterable[frictionless.Error]:
"""Called to validate the resource after opening
Yields:
Error: found errors
"""
# Check whitelist keys correspond to fields
for f in self.whitelists:
if f not in self.resource.schema.field_names:
note = f"required enhanced value check requires field {f} to exist"
yield frictionless.errors.CheckError(note=note)

def validate_row(self, row: frictionless.Row) -> Iterable[frictionless.Error]:
"""Called to validate the given row (on every row).
Args:
row (frictionless.Row): The row to check the required enhanced of.
Yields:
frictionless.Error: For when the required enhanced is violated.
"""
# Retrieve Field Names for Missing Cells
missing = [f for f in self.field_names if not row[f]]

# Check to see if any missing fields found
if len(missing) == 0:
return

# Determine if rule is bypassable
bypassable_values = [row[k] for k, v in self.whitelists.items() if row[k] in v]
if len(bypassable_values) > 0:
return

note = f"the columns {self.field_names} are all required"
if self.whitelists:
note += (
f" unless the values provided in fields {self.whitelists.keys()}"
" match any of those in their supplied whitelists"
)
note += f" ({missing} are missing)."

# Yield Error
yield frictionless.errors.RowConstraintError.from_row(
row=row,
note=note,
)
4 changes: 2 additions & 2 deletions abis_mapping/templates/survey_occurrence_data_v2/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -741,14 +741,14 @@
{
"fields": "siteID",
"reference": {
"resource": "survey_site_data.csv",
"resource": "survey_site_data",
"fields": "siteID"
}
},
{
"fields": "siteVisitID",
"reference": {
"resource": "survey_site_visit_data.csv",
"resource": "survey_site_visit_data",
"fields": "siteVisitID"
}
}
Expand Down
57 changes: 38 additions & 19 deletions abis_mapping/templates/survey_site_visit_data_v2/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,34 +43,50 @@ def apply_validation(
frictionless.Report: Validation report for the specified data.
"""
# Extract keyword arguments
# TODO: Uncomment
# site_visit_id_map: dict[str, bool] = kwargs.get("site_visit_id_map", {})
site_visit_id_map: dict[str, bool] = kwargs.get("site_visit_id_map", {})

# Construct schema
schema = self.extra_fields_schema(data=data, full_schema=True)

# Construct resource
resource = frictionless.Resource(
resource_site_visit_data = frictionless.Resource(
source=data,
format="csv",
schema=schema,
encoding="utf-8",
)

# Validate
report = resource.validate(
# Base extra custom checks
checks = [
plugins.tabular.IsTabular(),
plugins.empty.NotEmpty(),
plugins.chronological.ChronologicalOrder(
field_names=["siteVisitStart", "siteVisitEnd"],
),
plugins.logical_or.LogicalOr(
field_names=["siteVisitStart", "siteVisitEnd"],
),
]

# Check to see if site visit id map was provided or was empty
if site_visit_id_map:
# Construct foreign key map
fk_map = {"siteVisitID": set(site_visit_id_map)}

# Add custom check for temporal flexibility with whitelists. Here deferring
# the check on any ids found in the occurrence template, to when validaation
# occurs on it in line with temporal flexibility rules.
checks += [
plugins.required.RequiredEnhanced(
field_names=["siteVisitStart"],
whitelists=fk_map,
)
]

# Validate the site visit resource
report: frictionless.Report = resource_site_visit_data.validate(
checklist=frictionless.Checklist(
checks=[
# Extra custom checks
plugins.tabular.IsTabular(),
plugins.empty.NotEmpty(),
plugins.logical_or.LogicalOr(
field_names=["siteVisitStart", "siteVisitEnd"],
),
plugins.chronological.ChronologicalOrder(
field_names=["siteVisitStart", "siteVisitEnd"],
),
],
checks=checks,
),
)

Expand Down Expand Up @@ -109,7 +125,10 @@ def extract_temporal_defaults(
start_date: types.temporal.Timestamp = row["siteVisitStart"]
end_date: types.temporal.Timestamp = row["siteVisitEnd"]
site_visit_id: str = row["siteVisitID"]
if not start_date and not end_date:

# Temporal flexibility is dependent upon a start_date being
# present only.
if not start_date:
continue

# Create new graph
Expand Down Expand Up @@ -146,12 +165,12 @@ def add_temporal_coverage_bnode(
# Create temporal coverage node
temporal_coverage = rdflib.BNode()
graph.add((temporal_coverage, a, rdflib.TIME.TemporalEntity))
if start_date:
if start_date is not None:
begin = rdflib.BNode()
graph.add((temporal_coverage, rdflib.TIME.hasBeginning, begin))
graph.add((begin, a, rdflib.TIME.Instant))
graph.add((begin, start_date.rdf_in_xsd, start_date.to_rdf_literal()))
if end_date:
if end_date is not None:
end = rdflib.BNode()
graph.add((temporal_coverage, rdflib.TIME.hasEnd, end))
graph.add((end, a, rdflib.TIME.Instant))
Expand Down
Loading

0 comments on commit 4d9c9f6

Please sign in to comment.