-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #380 from gaiaresources/BDRSPS-1144-resusable-sites
BDRSPS-1144 Re-usable Sites schema and validation changes
- Loading branch information
Showing
37 changed files
with
1,530 additions
and
240 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
"""Provides models related to "identifiers" in the template data.""" | ||
|
||
# Standard library | ||
import dataclasses | ||
|
||
# Third-party | ||
import frictionless | ||
|
||
# Typing | ||
from typing import Self | ||
|
||
|
||
@dataclasses.dataclass(eq=True, frozen=True, kw_only=True) | ||
class SiteIdentifier: | ||
"""A class to represent how a Site is identified in a row from a template. | ||
This is effectively either the existingBDRSiteIRI field, | ||
or a combination of the siteID and siteIDSource fields. | ||
These are the two ways Sites can be identified in a template. | ||
""" | ||
|
||
site_id: str | None | ||
site_id_source: str | None | ||
existing_bdr_site_iri: str | None | ||
|
||
@classmethod | ||
def from_row(cls, row: frictionless.Row) -> Self | None: | ||
"""Given a row in a template, return a SiteIdentifier for the site-id related fields. | ||
Args: | ||
row: The row of data. | ||
Returns: | ||
The SiteIdentifier for the siteID-related fields. | ||
None when the siteID-related fields are not in the row. | ||
""" | ||
# "existingBDRSiteIRI" is considered a higher "source of truth", | ||
# if a row has that, only use that as the identifier. | ||
# This means that two sets of identifier fields will compare equal if their | ||
# existingBDRSiteIRI matches, even if the others fields do not match. | ||
existing_bdr_site_iri: str | None = row["existingBDRSiteIRI"] | ||
if existing_bdr_site_iri: | ||
return cls( | ||
site_id=None, | ||
site_id_source=None, | ||
existing_bdr_site_iri=existing_bdr_site_iri, | ||
) | ||
|
||
# Otherwise try to use siteID and siteIDSource. | ||
site_id: str | None = row["siteID"] | ||
site_id_source: str | None = row["siteIDSource"] | ||
if site_id and site_id_source: | ||
return cls( | ||
site_id=site_id, | ||
site_id_source=site_id_source, | ||
existing_bdr_site_iri=None, | ||
) | ||
|
||
# Otherwise return None. | ||
return None | ||
|
||
def __format__(self, format_spec: str) -> str: | ||
"""Format the SiteIdentifier how it should be represented in error messages.""" | ||
if self.existing_bdr_site_iri: | ||
return f'existingBDRSiteIRI "{self.existing_bdr_site_iri}"' | ||
else: | ||
return f'siteID "{self.site_id}" and siteIDSource "{self.site_id_source}"' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
"""Provides extra frictionless check""" | ||
|
||
# Third-Party | ||
import attrs | ||
import frictionless | ||
import frictionless.errors | ||
|
||
# Typing | ||
from collections.abc import Iterator | ||
|
||
|
||
@attrs.define(kw_only=True, repr=False) | ||
class SiteIdentifierCheck(frictionless.Check): | ||
"""Checks if the row has either (siteID + siteIDSource) or existingBDRSiteIRI""" | ||
|
||
# Check Attributes | ||
type = "site-identifier" | ||
Errors = [frictionless.errors.RowConstraintError] | ||
|
||
# optionally only apply this check when this field has a value | ||
skip_when_missing: str | None = None | ||
|
||
def validate_row(self, row: frictionless.Row) -> Iterator[frictionless.Error]: | ||
"""Called to validate the given row (on every row). | ||
Args: | ||
row: The row to check. | ||
Yields: | ||
Any errors found in the row. | ||
""" | ||
if self.skip_when_missing is not None and row[self.skip_when_missing] is None: | ||
return | ||
|
||
# Get values | ||
site_id: str | None = row["siteID"] | ||
site_id_source: str | None = row["siteIDSource"] | ||
existing_bdr_site_iri: str | None = row["existingBDRSiteIRI"] | ||
|
||
if not ((site_id and site_id_source) or existing_bdr_site_iri): | ||
note = "Either siteID and siteIDSource, or existingBDRSiteIRI must be provided" | ||
if self.skip_when_missing is not None: | ||
note += f", when {self.skip_when_missing} is provided" | ||
note += "." | ||
yield frictionless.errors.RowConstraintError.from_row( | ||
row=row, | ||
note=note, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
"""Provides extra frictionless check""" | ||
|
||
# Third-Party | ||
import attrs | ||
import frictionless | ||
import frictionless.errors | ||
|
||
# Local | ||
from abis_mapping import models | ||
|
||
# Typing | ||
from collections.abc import Iterator, Mapping | ||
|
||
|
||
@attrs.define(kw_only=True, repr=False) | ||
class SiteIdentifierMatches(frictionless.Check): | ||
"""Checks if the row's siteVisitID+SiteIdentifier matches another template. | ||
This is used by the survey_occurrence_data template to check that each occurrence | ||
with a siteVisitID, has a SiteIdentifier that matches the SiteIdentifier for that | ||
siteVisitID in the survey_site_data_visit template. | ||
i.e. The 'source of truth' FKs linking an Occurrence to a Site (when there is a Visit) are; | ||
occurrence.siteVisitID --> site_visit.siteVisitID && site_visit.SiteIdentifier --> site.SiteIdentifier | ||
There is also a 'short-cut' FK directly from Occurrence to Site; | ||
occurrence.SiteIdentifier --> site.SiteIdentifier | ||
This Check ensures the 'short-cut' FK agrees with the 'source of truth' ones. | ||
""" | ||
|
||
# Check Attributes | ||
type = "site-identifier-matches" | ||
Errors = [frictionless.errors.RowConstraintError, frictionless.errors.ConstraintError] | ||
|
||
# Map from siteVisitID to SiteIdentifier, from the other template (typically survey_site_visit_data). | ||
site_visit_id_site_id_map: Mapping[str, models.identifier.SiteIdentifier | None] | ||
|
||
def validate_row(self, row: frictionless.Row) -> Iterator[frictionless.Error]: | ||
"""Called to validate the given row (on every row). | ||
Args: | ||
row: The row to check. | ||
Yields: | ||
Any errors found in the row. | ||
""" | ||
# If this template has no siteVisitID, skip the check. | ||
site_visit_id: str | None = row["siteVisitID"] | ||
if not site_visit_id: | ||
return | ||
# If siteVisitID should be compulsory, enforce that with a required constraint or similar. | ||
|
||
# If this template has no identifier, skip the check | ||
identifier = models.identifier.SiteIdentifier.from_row(row) | ||
if not identifier: | ||
return | ||
# If the identifier must be provided, enforce that with the SiteIdentifierCheck plugin. | ||
|
||
# if siteVisitID not in the map, means it wasn't in the site visit data template, | ||
# that's an error in this template. | ||
if site_visit_id not in self.site_visit_id_site_id_map: | ||
yield frictionless.errors.ConstraintError.from_row( | ||
row=row, | ||
note="siteVisitID must match a siteVisitID in the survey_site_visit_data template", | ||
field_name="siteVisitID", | ||
) | ||
return | ||
|
||
expected_site_identifier = self.site_visit_id_site_id_map[site_visit_id] | ||
if not expected_site_identifier: | ||
# The site_visit_data template is missing the site identifier, | ||
# that will be an error in that template, no need to raise an error here. | ||
return | ||
|
||
# both templates have SiteIdentifiers, check if they don't match. | ||
if expected_site_identifier != identifier: | ||
if expected_site_identifier.existing_bdr_site_iri: | ||
fields = "existingBDRSiteIRI" | ||
else: | ||
fields = "siteID and siteIDSource" | ||
yield frictionless.errors.RowConstraintError.from_row( | ||
row=row, | ||
note=( | ||
f'{fields} must match their values in the survey_site_visit_data template at the row with siteVisitID "{site_visit_id}".' | ||
), | ||
) | ||
return | ||
|
||
# Otherwise identifiers match, no error to raise. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.