Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BDRSPS-913 Added site visit id cross validation input to site visit template. #264

Merged
merged 7 commits into from
Oct 17, 2024
1 change: 1 addition & 0 deletions abis_mapping/plugins/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from . import logical_or
from . import mutual_exclusion
from . import mutual_inclusion
from . import required
from . import sites_geometry
from . import tabular
from . import timestamp
Expand Down
23 changes: 2 additions & 21 deletions abis_mapping/plugins/logical_or.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import attrs

# Typing
from typing import Iterator, Any
from typing import Iterator


@attrs.define(kw_only=True, repr=False)
Expand All @@ -21,11 +21,6 @@ class LogicalOr(frictionless.Check):
# Field names to perform check on
field_names: list[str]

# Special case check, occurs if value not provided in field_names
# fields then checks current row field provided as key to foreign_keys
# and ensures its value is provided in the corresponding set.
foreign_keys: dict[str, set[Any]] = dict()

def validate_row(self, row: frictionless.Row) -> Iterator[frictionless.Error]:
"""Called to validate the given row (on every row).

Expand All @@ -42,22 +37,8 @@ def validate_row(self, row: frictionless.Row) -> Iterator[frictionless.Error]:
if len(filtered) > 0:
return

# Perform special case check on foreign key sets
row_fk_map = {
field_name: row[field_name] for field_name, fk_set in self.foreign_keys.items() if row[field_name] in fk_set
}

# If there is at least one item in the dictionary then it is deemed valid.
if len(row_fk_map) > 0:
return

# Create error note base on values provided to the check
note = f"the fields {self.field_names}"
note += f" and foreign key fields {self.foreign_keys.keys()}" if len(self.foreign_keys) > 0 else ""
note += " are constrained by logical OR, one or more value must be provided"

# Yield Error
yield frictionless.errors.RowConstraintError.from_row(
row=row,
note=note,
note=f"the fields {self.field_names} are constrained by logical OR, one or more value must be provided",
)
10 changes: 6 additions & 4 deletions abis_mapping/plugins/mutual_inclusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,13 @@ def validate_row(self, row: frictionless.Row) -> Iterator[frictionless.Error]:
# Short-circuit
return

note = (
f"the columns {self.field_names} are mutually inclusive and values"
f" must be provided together (columns {missing} are missing values)"
)

# Yield Error
yield frictionless.errors.RowConstraintError.from_row(
row=row,
note=(
f"the columns {self.field_names} are mutually inclusive and must be provided together "
f"(columns {missing} are missing)"
),
note=note,
)
83 changes: 83 additions & 0 deletions abis_mapping/plugins/required.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""Provides extra frictionless bypassable row-wise required checks for the package"""

# Third-Party
import frictionless
import frictionless.errors
import attrs

# Typing
from typing import Iterable


@attrs.define(kw_only=True, repr=False)
class RequiredEnhanced(frictionless.Check):
"""Checks whether specified columns are all provided in a row-wise manner.

It also allows the bypassing of the constraint through the provision
of whitelists. Note: This check is only effective when the original
schema for each field is `required = False`, otherwise the check,
does nothing.

Attributes:
field_names (list[str]): Names of fields in the row to be checked.
whitelists (dict[str, set]): A dictionary with the key corresponding
to a fieldname in the row, not necessarily provided in `field_names,
which contains a set of values which will allow the check to be
bypassed, if encountered as a value in any of that given row's
corresponding fields' cells
"""

# Check Attributes
type = "required-enhanced"
Errors = [frictionless.errors.RowConstraintError]

# Attributes specific to this check
field_names: list[str]
whitelists: dict[str, set] = {}

def validate_start(self) -> Iterable[frictionless.Error]:
"""Called to validate the resource after opening

Yields:
Error: found errors
"""
# Check whitelist keys correspond to fields
for f in self.whitelists:
if f not in self.resource.schema.field_names:
note = f"required enhanced value check requires field {f} to exist"
yield frictionless.errors.CheckError(note=note)

def validate_row(self, row: frictionless.Row) -> Iterable[frictionless.Error]:
"""Called to validate the given row (on every row).

Args:
row (frictionless.Row): The row to check the required enhanced of.

Yields:
frictionless.Error: For when the required enhanced is violated.
"""
# Retrieve Field Names for Missing Cells
missing = [f for f in self.field_names if not row[f]]

# Check to see if any missing fields found
if len(missing) == 0:
return

# Determine if rule is bypassable
bypassable_values = [row[k] for k, v in self.whitelists.items() if row[k] in v]
if len(bypassable_values) > 0:
return

note = f"the columns {self.field_names} are all required"
if self.whitelists:
note += (
f" unless the values provided in fields {self.whitelists.keys()}"
" match any of those in their supplied whitelists"
)
note += f" ({missing} are missing)."

# Yield Error
yield frictionless.errors.RowConstraintError.from_row(
row=row,
note=note,
)
4 changes: 2 additions & 2 deletions abis_mapping/templates/survey_occurrence_data_v2/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -741,14 +741,14 @@
{
"fields": "siteID",
"reference": {
"resource": "survey_site_data.csv",
"resource": "survey_site_data",
"fields": "siteID"
}
},
{
"fields": "siteVisitID",
"reference": {
"resource": "survey_site_visit_data.csv",
"resource": "survey_site_visit_data",
"fields": "siteVisitID"
}
}
Expand Down
57 changes: 38 additions & 19 deletions abis_mapping/templates/survey_site_visit_data_v2/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,34 +43,50 @@ def apply_validation(
frictionless.Report: Validation report for the specified data.
"""
# Extract keyword arguments
# TODO: Uncomment
# site_visit_id_map: dict[str, bool] = kwargs.get("site_visit_id_map", {})
site_visit_id_map: dict[str, bool] = kwargs.get("site_visit_id_map", {})
Lincoln-GR marked this conversation as resolved.
Show resolved Hide resolved

# Construct schema
schema = self.extra_fields_schema(data=data, full_schema=True)

# Construct resource
resource = frictionless.Resource(
resource_site_visit_data = frictionless.Resource(
source=data,
format="csv",
schema=schema,
encoding="utf-8",
)

# Validate
report = resource.validate(
# Base extra custom checks
checks = [
plugins.tabular.IsTabular(),
plugins.empty.NotEmpty(),
plugins.chronological.ChronologicalOrder(
field_names=["siteVisitStart", "siteVisitEnd"],
),
plugins.logical_or.LogicalOr(
field_names=["siteVisitStart", "siteVisitEnd"],
),
]

# Check to see if site visit id map was provided or was empty
if site_visit_id_map:
Lincoln-GR marked this conversation as resolved.
Show resolved Hide resolved
# Construct foreign key map
fk_map = {"siteVisitID": set(site_visit_id_map)}

# Add custom check for temporal flexibility with whitelists. Here deferring
# the check on any ids found in the occurrence template, to when validaation
# occurs on it in line with temporal flexibility rules.
checks += [
plugins.required.RequiredEnhanced(
field_names=["siteVisitStart"],
whitelists=fk_map,
)
]

# Validate the site visit resource
report: frictionless.Report = resource_site_visit_data.validate(
checklist=frictionless.Checklist(
checks=[
# Extra custom checks
plugins.tabular.IsTabular(),
plugins.empty.NotEmpty(),
plugins.logical_or.LogicalOr(
field_names=["siteVisitStart", "siteVisitEnd"],
),
plugins.chronological.ChronologicalOrder(
field_names=["siteVisitStart", "siteVisitEnd"],
),
],
checks=checks,
),
)

Expand Down Expand Up @@ -109,7 +125,10 @@ def extract_temporal_defaults(
start_date: types.temporal.Timestamp = row["siteVisitStart"]
end_date: types.temporal.Timestamp = row["siteVisitEnd"]
site_visit_id: str = row["siteVisitID"]
if not start_date and not end_date:

# Temporal flexibility is dependent upon a start_date being
# present only.
if not start_date:
continue

# Create new graph
Expand Down Expand Up @@ -146,12 +165,12 @@ def add_temporal_coverage_bnode(
# Create temporal coverage node
temporal_coverage = rdflib.BNode()
graph.add((temporal_coverage, a, rdflib.TIME.TemporalEntity))
if start_date:
if start_date is not None:
begin = rdflib.BNode()
graph.add((temporal_coverage, rdflib.TIME.hasBeginning, begin))
graph.add((begin, a, rdflib.TIME.Instant))
graph.add((begin, start_date.rdf_in_xsd, start_date.to_rdf_literal()))
if end_date:
if end_date is not None:
end = rdflib.BNode()
graph.add((temporal_coverage, rdflib.TIME.hasEnd, end))
graph.add((end, a, rdflib.TIME.Instant))
Expand Down
Loading