From 82083280809762a471cc2187610da53be461bacb Mon Sep 17 00:00:00 2001 From: Lincoln Puzey Date: Tue, 5 Nov 2024 10:43:29 +0800 Subject: [PATCH 1/8] Add uri_quoted rdf util function --- abis_mapping/utils/rdf.py | 48 +++++++++++++++++++++++++++++++++++++++ tests/utils/test_rdf.py | 35 ++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/abis_mapping/utils/rdf.py b/abis_mapping/utils/rdf.py index 4795992b..bb3ed89d 100644 --- a/abis_mapping/utils/rdf.py +++ b/abis_mapping/utils/rdf.py @@ -87,6 +87,54 @@ def uri( return namespace[internal_id] +def uri_quoted( + namespace: Optional[rdflib.Namespace], + path: str, + /, + **kwargs: str, +) -> rdflib.URIRef: + """Generates a rdflib.URIRef using the supplied namespace and path. + + The path string can contain fields to be replaced in braces, e.g "survey/{survey_id}" + These fields are replaced by the contents of kwargs. + Each kwarg value is sanitised by being url-quoted before being inserted in the path. + Then the resulting path is then appended to the namespace to get the URI. + + Some examples: + >>> uri_quoted(namespaces.EXAMPLE, "someField/{the_value}", the_value="foo") + rdflib.URIRef("http://example.com/someField/foo") + >>> uri_quoted(namespaces.EXAMPLE, "Type/{field}/{value}", field="foo", value="Hello There!") + rdflib.URIRef("http://example.com/Type/foo/Hello%20There%21") + + url-quoting is used instead of slugify-ing; + 1. So that the exact original value can be determined from the URI. + This is not possible with slugify-ing which will remove or replace chars with "-". + 2. Different input values always result in different final URIs. + This is not always the case with slugify-ing, since chars can be dropped or replaced with "-". + + Args: + namespace: Namespace for the uri. + path: Path to append to the namespace. + Can contain fields to be replaced, e.g. "survey/{survey_id}". + kwargs: Fields and values to replace in the path string. + + Returns: + Generated URI. + Raises: + KeyError: When the path string contains a field not specified in kwargs. + """ + # Check for namespace + if namespace is None: + # Set Default Namespace + namespace = namespaces.CREATEME + + # fill in any {field} names in path with url-quoted kwargs. + path = path.format_map({field: urllib.parse.quote(value, safe="") for field, value in kwargs.items()}) + + # Create URIRef and Return + return namespace[path] + + def extend_uri( base: rdflib.URIRef, *parts: str, diff --git a/tests/utils/test_rdf.py b/tests/utils/test_rdf.py index c7e3b44c..65afa442 100644 --- a/tests/utils/test_rdf.py +++ b/tests/utils/test_rdf.py @@ -41,6 +41,41 @@ def test_rdf_uri() -> None: assert isinstance(d, rdflib.URIRef) +@pytest.mark.parametrize( + ("namespace", "path", "fields", "expected"), + [ + (rdflib.Namespace("https://test.com/foo/"), "", {}, rdflib.URIRef("https://test.com/foo/")), + (rdflib.Namespace("https://test.com/foo/"), "bar", {}, rdflib.URIRef("https://test.com/foo/bar")), + ( + rdflib.Namespace("https://test.com/foo/"), + "bar/{v}", + {"v": "123"}, + rdflib.URIRef("https://test.com/foo/bar/123"), + ), + ( + rdflib.Namespace("https://test.com/foo/"), + "bar/{v1}/cat/{v2}", + {"v1": "123", "v2": "A B C?!"}, + rdflib.URIRef("https://test.com/foo/bar/123/cat/A%20B%20C%3F%21"), + ), + ], +) +def test_uri_quoted( + namespace: rdflib.Namespace, + path: str, + fields: dict[str, str], + expected: rdflib.URIRef, +) -> None: + """Test the uri_quoted function. + + * different length paths + * with/without special chars + * with/without replacement fields + """ + result = utils.rdf.uri_quoted(namespace, path, **fields) + assert result == expected + + @pytest.mark.parametrize( ("base", "extension", "expected"), [ From c201bfc981b7f04bf0aa816c28a6ed876d8032dc Mon Sep 17 00:00:00 2001 From: Lincoln Puzey Date: Thu, 7 Nov 2024 08:30:55 +0800 Subject: [PATCH 2/8] BDRSPS-966 Update tern:Survey IRI construction Put in helper function to ensure it stays consistent between templates --- .../survey_metadata_v2/examples/minimal.ttl | 6 ++-- .../templates/survey_metadata_v2/mapping.py | 7 ++-- .../margaret_river_flora.ttl | 32 ++++++++--------- .../examples/organism_qty.ttl | 2 +- .../survey_occurrence_data_v2/mapping.py | 10 ++---- .../examples/minimal.ttl | 6 ++-- .../survey_site_visit_data_v2/mapping.py | 10 ++---- abis_mapping/utils/__init__.py | 1 + abis_mapping/utils/iri_patterns.py | 36 +++++++++++++++++++ 9 files changed, 68 insertions(+), 42 deletions(-) create mode 100644 abis_mapping/utils/iri_patterns.py diff --git a/abis_mapping/templates/survey_metadata_v2/examples/minimal.ttl b/abis_mapping/templates/survey_metadata_v2/examples/minimal.ttl index b930679b..f4c7dfd4 100644 --- a/abis_mapping/templates/survey_metadata_v2/examples/minimal.ttl +++ b/abis_mapping/templates/survey_metadata_v2/examples/minimal.ttl @@ -53,7 +53,7 @@ a bdr:Project ; void:inDataset ; - schema:hasPart ; + schema:hasPart ; schema:identifier "COL1" ; schema:name "Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits - Summer" . @@ -121,7 +121,7 @@ rdfs:label "Insecta" ; rdf:value . - a tern:Survey ; + a tern:Survey ; bdr:purpose "Summer sampling for peak insect diversity." ; bdr:target "Coleoptera", "Insecta" ; @@ -161,7 +161,7 @@ geo:asWKT " POLYGON ((-33.826 146.363, -33.826 148.499, -34.411 148.499, -33.826 146.363))"^^geo:wktLiteral ] ; rdf:object _:N52d3d4f338b894a95032d27200000000 ; rdf:predicate geo:hasGeometry ; - rdf:subject ; + rdf:subject ; rdfs:comment "supplied as" . _:N52d3d4f338b894a95032d27200000000 a geo:Geometry ; diff --git a/abis_mapping/templates/survey_metadata_v2/mapping.py b/abis_mapping/templates/survey_metadata_v2/mapping.py index f8de8f8d..152fcf88 100644 --- a/abis_mapping/templates/survey_metadata_v2/mapping.py +++ b/abis_mapping/templates/survey_metadata_v2/mapping.py @@ -2,7 +2,6 @@ # Standard import dataclasses -import urllib.parse # Third-party import frictionless @@ -193,9 +192,9 @@ def apply_mapping_row( # Create BDR project IRI project = utils.rdf.uri(f"project/SSD-Survey-Project/{row_num}", base_iri) - # Create TERN survey IRI - Note this needs to match the iri construction of the - # survey occurrence and site vist template mapping, ensuring they will resolve properly. - survey = utils.rdf.uri("survey/", base_iri) + urllib.parse.quote(row["surveyID"], safe="") + # Create TERN survey IRI from surveyID field + survey_id: str | None = row["surveyID"] + survey = utils.iri_patterns.survey_iri(base_iri, survey_id=survey_id) # Create survey plan IRI survey_plan = utils.rdf.uri(f"survey/SSD-survey/{row_num}/plan") diff --git a/abis_mapping/templates/survey_occurrence_data_v2/examples/margaret_river_flora/margaret_river_flora.ttl b/abis_mapping/templates/survey_occurrence_data_v2/examples/margaret_river_flora/margaret_river_flora.ttl index c18b7d95..df034b65 100644 --- a/abis_mapping/templates/survey_occurrence_data_v2/examples/margaret_river_flora/margaret_river_flora.ttl +++ b/abis_mapping/templates/survey_occurrence_data_v2/examples/margaret_river_flora/margaret_river_flora.ttl @@ -1634,7 +1634,7 @@ tern:FeatureOfInterest ; void:inDataset ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f00000021 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1655,7 +1655,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f00000000 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1668,7 +1668,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f0000001b ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1681,7 +1681,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f0000001e ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1694,7 +1694,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f00000024 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1707,7 +1707,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f00000003 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1720,7 +1720,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f00000006 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1733,7 +1733,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f00000009 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1746,7 +1746,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f0000000c ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1759,7 +1759,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f0000000f ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1772,7 +1772,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f00000012 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1785,7 +1785,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f00000015 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1798,7 +1798,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f00000018 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1820,7 +1820,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f00000027 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1837,7 +1837,7 @@ "M12378"^^, "MR-456"^^, "PE:12:8832"^^ ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f0000002a ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1856,7 +1856,7 @@ "M12379"^^, "MR-457"^^, "PE:12:8833"^^ ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f0000002d ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; diff --git a/abis_mapping/templates/survey_occurrence_data_v2/examples/organism_qty.ttl b/abis_mapping/templates/survey_occurrence_data_v2/examples/organism_qty.ttl index d546254a..00858a35 100644 --- a/abis_mapping/templates/survey_occurrence_data_v2/examples/organism_qty.ttl +++ b/abis_mapping/templates/survey_occurrence_data_v2/examples/organism_qty.ttl @@ -84,7 +84,7 @@ tern:FeatureOfInterest ; void:inDataset ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:Nbf8d3720a20d4c0a80c10b6800000000 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; diff --git a/abis_mapping/templates/survey_occurrence_data_v2/mapping.py b/abis_mapping/templates/survey_occurrence_data_v2/mapping.py index d6249a9f..c741f16e 100644 --- a/abis_mapping/templates/survey_occurrence_data_v2/mapping.py +++ b/abis_mapping/templates/survey_occurrence_data_v2/mapping.py @@ -601,13 +601,9 @@ def apply_mapping_row( sensitivity_category_value = None sensitivity_category_collection = None - # Conditionally create uri dependent on surveyID field. - if survey_id := row["surveyID"]: - # Create TERN.Survey subject IRI - Note this needs to match the iri construction of the - # survey metadata and site vist template mapping, ensuring they will resolve properly. - survey = utils.rdf.uri("survey/", base_iri) + urllib.parse.quote(survey_id, safe="") - else: - survey = utils.rdf.uri("survey/1", base_iri) + # Create TERN survey IRI from surveyID field + survey_id: str | None = row["surveyID"] + survey = utils.iri_patterns.survey_iri(base_iri, survey_id=survey_id) # Conditionally create uri dependent on siteVisitID field. if site_visit_id := row["siteVisitID"]: diff --git a/abis_mapping/templates/survey_site_visit_data_v2/examples/minimal.ttl b/abis_mapping/templates/survey_site_visit_data_v2/examples/minimal.ttl index 394e654c..7f62b677 100644 --- a/abis_mapping/templates/survey_site_visit_data_v2/examples/minimal.ttl +++ b/abis_mapping/templates/survey_site_visit_data_v2/examples/minimal.ttl @@ -157,7 +157,7 @@ prov:wasAssociatedWith , ; schema:identifier "TIS-24-03-P1-03" ; - schema:isPartOf ; + schema:isPartOf ; tern:hasSite , "P1"^^ . @@ -178,7 +178,7 @@ , ; schema:identifier "TIS-24-03-P1-01" ; - schema:isPartOf ; + schema:isPartOf ; tern:hasSite , "P1"^^ ; tern:siteDescription "dry" . @@ -192,7 +192,7 @@ prov:wasAssociatedWith , ; schema:identifier "TIS-24-03-P1-02" ; - schema:isPartOf ; + schema:isPartOf ; tern:hasSite , "P1"^^ ; tern:siteDescription "moist leaf litter after recent rain" . diff --git a/abis_mapping/templates/survey_site_visit_data_v2/mapping.py b/abis_mapping/templates/survey_site_visit_data_v2/mapping.py index d7628893..e6d0a98f 100644 --- a/abis_mapping/templates/survey_site_visit_data_v2/mapping.py +++ b/abis_mapping/templates/survey_site_visit_data_v2/mapping.py @@ -350,15 +350,9 @@ def apply_mapping_row( # survey site and occurrence template mapping, ensuring they will resolve properly. uri_site = utils.rdf.uri("site/", base_iri) + urllib.parse.quote(row_site_id, safe="") - # URI for the Survey + # Create TERN survey IRI from surveyID field row_survey_id: str | None = row["surveyID"] - if row_survey_id: - # Create TERN survey IRI - Note this needs to match the iri construction of the - # survey occurrence and metadata template mapping, ensuring they will resolve properly. - uri_survey = utils.rdf.uri("survey/", base_iri) + urllib.parse.quote(row_survey_id, safe="") - else: - # Default IRI - uri_survey = utils.rdf.uri("survey/1", base_iri) + uri_survey = utils.iri_patterns.survey_iri(base_iri, survey_id=row_survey_id) # URI for the Site Visit Plan uri_site_visit_plan = utils.rdf.extend_uri_quoted(dataset, "visit", "plan", row_site_visit_id) diff --git a/abis_mapping/utils/__init__.py b/abis_mapping/utils/__init__.py index 1a750db8..ad0d0e52 100644 --- a/abis_mapping/utils/__init__.py +++ b/abis_mapping/utils/__init__.py @@ -2,6 +2,7 @@ # Local from . import coords +from . import iri_patterns from . import namespaces from . import rdf from . import strings diff --git a/abis_mapping/utils/iri_patterns.py b/abis_mapping/utils/iri_patterns.py new file mode 100644 index 00000000..99884cdd --- /dev/null +++ b/abis_mapping/utils/iri_patterns.py @@ -0,0 +1,36 @@ +"""This module contains functions for constructing IRIs according to common patterns. + +This is important when the exact same IRI needs to be constructed from multiple template +mappings so that the output RDF links together on these IRIs.""" + +# third party +import rdflib + +# local +from abis_mapping import utils + + +def survey_iri( + base_iri: rdflib.Namespace | None, + *, + survey_id: str | None, +) -> rdflib.URIRef: + """Get the IRI for the tern:Survey node, constructed from the surveyID field. + + This IRI is used in mapping multiple Systematic Survey template, + and needs to be the same for all of them. + + Args: + base_iri: The namespace to construct the IRI from. + survey_id: The surveyID field from the template. + + Returns: + The IRI for the tern:Survey node. + """ + return utils.rdf.uri_quoted( + base_iri, + "Survey/{survey_id}", + # surveyID is an optional field. When missing fallback to the row number. + # (which is always 1 for a Survey, since metadata template must have 1 row) + survey_id=(survey_id or "1"), + ) From 4658f02832934a3ffb9342a88be13e002a09ebc2 Mon Sep 17 00:00:00 2001 From: Lincoln Puzey Date: Tue, 5 Nov 2024 11:43:59 +0800 Subject: [PATCH 3/8] BDRSPS-966 Construct prov:Plan IRI Pattern for the survey plan from the base_ri --- .../survey_metadata_v2/examples/minimal.ttl | 12 ++++++------ abis_mapping/templates/survey_metadata_v2/mapping.py | 6 +++++- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/abis_mapping/templates/survey_metadata_v2/examples/minimal.ttl b/abis_mapping/templates/survey_metadata_v2/examples/minimal.ttl index f4c7dfd4..96cda9bc 100644 --- a/abis_mapping/templates/survey_metadata_v2/examples/minimal.ttl +++ b/abis_mapping/templates/survey_metadata_v2/examples/minimal.ttl @@ -13,25 +13,25 @@ a schema:Collection ; void:inDataset ; schema:identifier "Survey Collection - Survey Type - Wet pitfall trapping" ; - schema:member ; + schema:member ; tern:hasAttribute . a schema:Collection ; void:inDataset ; schema:identifier "Survey Collection - Target Habitat Scope - Woodland" ; - schema:member ; + schema:member ; tern:hasAttribute . a schema:Collection ; void:inDataset ; schema:identifier "Survey Collection - Target Taxonomic Scope - Coleoptera" ; - schema:member ; + schema:member ; tern:hasAttribute . a schema:Collection ; void:inDataset ; schema:identifier "Survey Collection - Target Taxonomic Scope - Insecta" ; - schema:member ; + schema:member ; tern:hasAttribute . a rdfs:Datatype ; @@ -131,7 +131,7 @@ time:inXSDDate "2015-01-21"^^xsd:date ] ; time:hasEnd [ a time:Instant ; time:inXSDDate "2015-02-03"^^xsd:date ] ] ; - prov:hadPlan ; + prov:hadPlan ; schema:identifier "COL1"^^, "COL1"^^ ; schema:keywords "farmland", @@ -144,7 +144,7 @@ "woodland" ; schema:name "Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits - Summer" . - a prov:Plan ; + a prov:Plan ; schema:citation "Ng, K., Barton, P.S., Blanchard, W. et al. Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits. Oecologia 188, 645–657 (2018). https://doi.org/10.1007/s00442-018-4180-9\"" ; schema:description "Our experimental design consisted of four 400 m transects running from inside each woodland patch out into four adjoining farmland uses (crop, rested, woody debris application, revegetation plantings). To quantify potential edge efects on beetle species traits, we sampled beetles at five locations along each transect: 200 and 20 m inside woodlands, 200 and 20 m inside farmlands, and at the woodland–farmland edge (0 m). Each sampling location comprised a pair of wet invertebrate pitfall traps. separated by a drift fence (60 cm long x 10 cm high) to help direct arthropods into traps. We opened a total of 220 pairs of traps for 14 days during spring (Oct–Nov 2014), and repeated sampling during summer (January–February 2015). Beetle samples from each pitfall trap pair, and across the two time periods, were pooled to provide one sample per sampling location." ; schema:url "https://biocollect.ala.org.au/document/download/2022-01/202201%20CBR%20Flora%20and%20Vegetation%20report_draftv1.pdf"^^xsd:anyURI, diff --git a/abis_mapping/templates/survey_metadata_v2/mapping.py b/abis_mapping/templates/survey_metadata_v2/mapping.py index 152fcf88..ae3d0f0c 100644 --- a/abis_mapping/templates/survey_metadata_v2/mapping.py +++ b/abis_mapping/templates/survey_metadata_v2/mapping.py @@ -197,7 +197,11 @@ def apply_mapping_row( survey = utils.iri_patterns.survey_iri(base_iri, survey_id=survey_id) # Create survey plan IRI - survey_plan = utils.rdf.uri(f"survey/SSD-survey/{row_num}/plan") + survey_plan = utils.rdf.uri_quoted( + base_iri, + "Plan/{survey_id}", + survey_id=(survey_id or str(row_num)), # fallback to row number when surveyID not available. + ) # Conditionally create survey type attribute, value and collection IRIs row_survey_type: str | None = row["surveyType"] From 86da1a72b5d7c2a1cb4748d173b1d674f9afddb0 Mon Sep 17 00:00:00 2001 From: Lincoln Puzey Date: Thu, 7 Nov 2024 10:57:30 +0800 Subject: [PATCH 4/8] BDRSPS-966 Use base_iri to construct IRIs for attribute, value and collections nodes Add helper functions to make these consistent --- .../survey_metadata_v2/examples/minimal.ttl | 68 +++++++++---------- .../templates/survey_metadata_v2/mapping.py | 24 ++++--- abis_mapping/utils/iri_patterns.py | 62 +++++++++++++++++ 3 files changed, 109 insertions(+), 45 deletions(-) diff --git a/abis_mapping/templates/survey_metadata_v2/examples/minimal.ttl b/abis_mapping/templates/survey_metadata_v2/examples/minimal.ttl index 96cda9bc..66d5fced 100644 --- a/abis_mapping/templates/survey_metadata_v2/examples/minimal.ttl +++ b/abis_mapping/templates/survey_metadata_v2/examples/minimal.ttl @@ -10,29 +10,29 @@ @prefix void: . @prefix xsd: . - a schema:Collection ; + a schema:Collection ; void:inDataset ; schema:identifier "Survey Collection - Survey Type - Wet pitfall trapping" ; schema:member ; - tern:hasAttribute . + tern:hasAttribute . - a schema:Collection ; + a schema:Collection ; void:inDataset ; schema:identifier "Survey Collection - Target Habitat Scope - Woodland" ; schema:member ; - tern:hasAttribute . + tern:hasAttribute . - a schema:Collection ; + a schema:Collection ; void:inDataset ; schema:identifier "Survey Collection - Target Taxonomic Scope - Coleoptera" ; schema:member ; - tern:hasAttribute . + tern:hasAttribute . - a schema:Collection ; + a schema:Collection ; void:inDataset ; schema:identifier "Survey Collection - Target Taxonomic Scope - Insecta" ; schema:member ; - tern:hasAttribute . + tern:hasAttribute . a rdfs:Datatype ; skos:definition "An identifier for the dataset" ; @@ -63,60 +63,60 @@ a prov:Agent ; schema:name "NSW Department of Planning, Industry and Environment" . - a skos:Concept ; - skos:broader ; - skos:definition "A type of targetTaxonomicScope" ; - skos:inScheme ; - skos:prefLabel "Coleoptera" ; - schema:citation "http://createme.org/dataset/Example-Systematic-Survey-Metadata-Dataset"^^xsd:anyURI . - - a skos:Concept ; - skos:broader ; - skos:definition "A type of targetTaxonomicScope" ; - skos:inScheme ; - skos:prefLabel "Insecta" ; - schema:citation "http://createme.org/dataset/Example-Systematic-Survey-Metadata-Dataset"^^xsd:anyURI . - - a tern:Attribute ; + a tern:Attribute ; void:inDataset ; tern:attribute ; tern:hasSimpleValue "Wet pitfall trapping" ; - tern:hasValue . + tern:hasValue . - a tern:Attribute ; + a tern:Attribute ; void:inDataset ; tern:attribute ; tern:hasSimpleValue "Woodland" ; - tern:hasValue . + tern:hasValue . - a tern:Attribute ; + a tern:Attribute ; void:inDataset ; tern:attribute ; tern:hasSimpleValue "Coleoptera" ; - tern:hasValue . + tern:hasValue . - a tern:Attribute ; + a tern:Attribute ; void:inDataset ; tern:attribute ; tern:hasSimpleValue "Insecta" ; - tern:hasValue . + tern:hasValue . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of targetTaxonomicScope" ; + skos:inScheme ; + skos:prefLabel "Coleoptera" ; + schema:citation "http://createme.org/dataset/Example-Systematic-Survey-Metadata-Dataset"^^xsd:anyURI . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of targetTaxonomicScope" ; + skos:inScheme ; + skos:prefLabel "Insecta" ; + schema:citation "http://createme.org/dataset/Example-Systematic-Survey-Metadata-Dataset"^^xsd:anyURI . - a tern:IRI, + a tern:IRI, tern:Value ; rdfs:label "Wet pitfall trapping" ; rdf:value . - a tern:IRI, + a tern:IRI, tern:Value ; rdfs:label "Woodland" ; rdf:value . - a tern:IRI, + a tern:IRI, tern:Value ; rdfs:label "Coleoptera" ; rdf:value . - a tern:IRI, + a tern:IRI, tern:Value ; rdfs:label "Insecta" ; rdf:value . diff --git a/abis_mapping/templates/survey_metadata_v2/mapping.py b/abis_mapping/templates/survey_metadata_v2/mapping.py index ae3d0f0c..7dc15536 100644 --- a/abis_mapping/templates/survey_metadata_v2/mapping.py +++ b/abis_mapping/templates/survey_metadata_v2/mapping.py @@ -206,9 +206,11 @@ def apply_mapping_row( # Conditionally create survey type attribute, value and collection IRIs row_survey_type: str | None = row["surveyType"] if row_survey_type: - survey_type_attribute = utils.rdf.extend_uri(dataset, "attribute", "surveyType", row_survey_type) - survey_type_value = utils.rdf.extend_uri(dataset, "value", "surveyType", row_survey_type) - survey_type_collection = utils.rdf.extend_uri(dataset, "SurveyCollection", "surveyType", row_survey_type) + survey_type_attribute = utils.iri_patterns.attribute_iri(base_iri, "surveyType", row_survey_type) + survey_type_value = utils.iri_patterns.attribute_value_iri(base_iri, "surveyType", row_survey_type) + survey_type_collection = utils.iri_patterns.attribute_collection_iri( + base_iri, "Survey", "surveyType", row_survey_type + ) else: survey_type_attribute = None survey_type_value = None @@ -221,10 +223,10 @@ def apply_mapping_row( target_habitat_objects.append( AttributeValue( raw=target_habitat, - attribute=utils.rdf.extend_uri(dataset, "attribute", "targetHabitatScope", target_habitat), - value=utils.rdf.extend_uri(dataset, "value", "targetHabitatScope", target_habitat), - collection=utils.rdf.extend_uri( - dataset, "SurveyCollection", "targetHabitatScope", target_habitat + attribute=utils.iri_patterns.attribute_iri(base_iri, "targetHabitatScope", target_habitat), + value=utils.iri_patterns.attribute_value_iri(base_iri, "targetHabitatScope", target_habitat), + collection=utils.iri_patterns.attribute_collection_iri( + base_iri, "Survey", "targetHabitatScope", target_habitat ), ), ) @@ -236,10 +238,10 @@ def apply_mapping_row( target_taxonomic_objects.append( AttributeValue( raw=target_taxon, - attribute=utils.rdf.extend_uri(dataset, "attribute", "targetTaxonomicScope", target_taxon), - value=utils.rdf.extend_uri(dataset, "value", "targetTaxonomicScope", target_taxon), - collection=utils.rdf.extend_uri( - dataset, "SurveyCollection", "targetTaxonomicScope", target_taxon + attribute=utils.iri_patterns.attribute_iri(base_iri, "targetTaxonomicScope", target_taxon), + value=utils.iri_patterns.attribute_value_iri(base_iri, "targetTaxonomicScope", target_taxon), + collection=utils.iri_patterns.attribute_collection_iri( + base_iri, "Survey", "targetTaxonomicScope", target_taxon ), ) ) diff --git a/abis_mapping/utils/iri_patterns.py b/abis_mapping/utils/iri_patterns.py index 99884cdd..c3f1efed 100644 --- a/abis_mapping/utils/iri_patterns.py +++ b/abis_mapping/utils/iri_patterns.py @@ -9,6 +9,9 @@ # local from abis_mapping import utils +# typing +from typing import Literal + def survey_iri( base_iri: rdflib.Namespace | None, @@ -34,3 +37,62 @@ def survey_iri( # (which is always 1 for a Survey, since metadata template must have 1 row) survey_id=(survey_id or "1"), ) + + +def attribute_iri( + base_iri: rdflib.Namespace | None, + attribute: str, + value: str, + /, +) -> rdflib.URIRef: + """Get the IRI to use for a tern:Attribute node. + + Args: + base_iri: The namespace to construct the IRI from. + attribute: Typically the name of the CSV field. + value: The value of the attribute. + + Returns: + IRI for the tern:Attribute node. + """ + return utils.rdf.uri(f"attribute/{attribute}/{value}", namespace=base_iri) + + +def attribute_value_iri( + base_iri: rdflib.Namespace | None, + attribute: str, + value: str, + /, +) -> rdflib.URIRef: + """Get the IRI to use for a tern:Value node associated with a tern:Attribute node. + + Args: + base_iri: The namespace to construct the IRI from. + attribute: Typically the name of the CSV field. + value: The value of the attribute. + + Returns: + IRI for the tern:Value node. + """ + return utils.rdf.uri(f"value/{attribute}/{value}", namespace=base_iri) + + +def attribute_collection_iri( + base_iri: rdflib.Namespace | None, + collection_type: Literal["Survey", "Occurrence", "Site", "SiteVisit"], + attribute: str, + value: str, + /, +) -> rdflib.URIRef: + """Get the IRI to use for a schema:Collection node associated with a tern:Attribute node. + + Args: + base_iri: The namespace to construct the IRI from. + collection_type: What 'type' of Collection this is. Corresponds to the template being mapped. + attribute: Typically the name of the CSV field. + value: The value of the attribute. + + Returns: + IRI for the schema:Collection node. + """ + return utils.rdf.uri(f"{collection_type}Collection/{attribute}/{value}", namespace=base_iri) From 07936a034e6548b12cde0e1f940db781a4029eeb Mon Sep 17 00:00:00 2001 From: Lincoln Puzey Date: Thu, 7 Nov 2024 14:00:21 +0800 Subject: [PATCH 5/8] BDRSPS-966 Update IRIs for surveyID datatype and agent to use new namespaces --- .../survey_metadata_v2/examples/minimal.ttl | 36 ++++++++-------- .../templates/survey_metadata_v2/mapping.py | 4 +- abis_mapping/utils/iri_patterns.py | 41 +++++++++++++++++++ abis_mapping/utils/namespaces.py | 2 + 4 files changed, 63 insertions(+), 20 deletions(-) diff --git a/abis_mapping/templates/survey_metadata_v2/examples/minimal.ttl b/abis_mapping/templates/survey_metadata_v2/examples/minimal.ttl index 66d5fced..b4297a93 100644 --- a/abis_mapping/templates/survey_metadata_v2/examples/minimal.ttl +++ b/abis_mapping/templates/survey_metadata_v2/examples/minimal.ttl @@ -39,29 +39,23 @@ skos:prefLabel "Gaia Resources datasetID" ; prov:wasAttributedTo . - a rdfs:Datatype ; - skos:prefLabel "surveyID source" ; - prov:qualifiedAttribution [ a prov:Attribution ; - prov:agent ; - prov:hadRole ] . - - a rdfs:Datatype ; - skos:prefLabel "surveyID source" ; - prov:qualifiedAttribution [ a prov:Attribution ; - prov:agent ; - prov:hadRole ] . - a bdr:Project ; void:inDataset ; schema:hasPart ; schema:identifier "COL1" ; schema:name "Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits - Summer" . - a prov:Agent ; - schema:name "CSIRO" . + a rdfs:Datatype ; + skos:prefLabel "surveyID source" ; + prov:qualifiedAttribution [ a prov:Attribution ; + prov:agent ; + prov:hadRole ] . - a prov:Agent ; - schema:name "NSW Department of Planning, Industry and Environment" . + a rdfs:Datatype ; + skos:prefLabel "surveyID source" ; + prov:qualifiedAttribution [ a prov:Attribution ; + prov:agent ; + prov:hadRole ] . a tern:Attribute ; void:inDataset ; @@ -121,6 +115,12 @@ rdfs:label "Insecta" ; rdf:value . + a prov:Agent ; + schema:name "CSIRO" . + + a prov:Agent ; + schema:name "NSW Department of Planning, Industry and Environment" . + a tern:Survey ; bdr:purpose "Summer sampling for peak insect diversity." ; bdr:target "Coleoptera", @@ -132,8 +132,8 @@ time:hasEnd [ a time:Instant ; time:inXSDDate "2015-02-03"^^xsd:date ] ] ; prov:hadPlan ; - schema:identifier "COL1"^^, - "COL1"^^ ; + schema:identifier "COL1"^^, + "COL1"^^ ; schema:keywords "farmland", "ground beetle", "habitat", diff --git a/abis_mapping/templates/survey_metadata_v2/mapping.py b/abis_mapping/templates/survey_metadata_v2/mapping.py index 7dc15536..45c93a4c 100644 --- a/abis_mapping/templates/survey_metadata_v2/mapping.py +++ b/abis_mapping/templates/survey_metadata_v2/mapping.py @@ -253,8 +253,8 @@ def apply_mapping_row( survey_org_objects.append( SurveyIDDatatype( name=raw_org, - datatype=utils.rdf.uri(f"datatype/surveyID/{raw_org}", base_iri), - agent=utils.rdf.uri(f"agent/{raw_org}"), + datatype=utils.iri_patterns.datatype_iri("surveyID", raw_org), + agent=utils.iri_patterns.agent_iri(raw_org), ) ) diff --git a/abis_mapping/utils/iri_patterns.py b/abis_mapping/utils/iri_patterns.py index c3f1efed..b71f936e 100644 --- a/abis_mapping/utils/iri_patterns.py +++ b/abis_mapping/utils/iri_patterns.py @@ -96,3 +96,44 @@ def attribute_collection_iri( IRI for the schema:Collection node. """ return utils.rdf.uri(f"{collection_type}Collection/{attribute}/{value}", namespace=base_iri) + + +def datatype_iri( + identifier_type: str, + identifier_source: str, + /, +) -> rdflib.URIRef: + """Get the IRI to use for a rdfs:Datatype node. + + This node typically represents the source of an identifier. + + Args: + identifier_type: What identifier the datatype is for. e.g. "surveyID", "catalogNumber" + identifier_source: The source of the identifier. + e.g. the value for the "surveyOrgs" or "catalogNumberSource" template fields. + + Returns: + URIRef for the rdfs:Datatype node. + """ + return utils.rdf.uri( + f"{identifier_type}/{identifier_source}", + namespace=utils.namespaces.BDR_DATATYPES, + ) + + +def agent_iri( + org: str, + /, +) -> rdflib.URIRef: + """Get the IRI to use for a prov:Agent node. + + Args: + org: The org this agent node represents. + + Returns: + URIRef for the prov:Agent node. + """ + return utils.rdf.uri( + f"{org}", + namespace=utils.namespaces.BDR_ORGS, + ) diff --git a/abis_mapping/utils/namespaces.py b/abis_mapping/utils/namespaces.py index a4f297bf..40f0d66e 100644 --- a/abis_mapping/utils/namespaces.py +++ b/abis_mapping/utils/namespaces.py @@ -14,3 +14,5 @@ DWC = rdflib.Namespace("http://rs.tdwg.org/dwc/terms/") BDR = rdflib.Namespace("http://example.com/bdr-schema/") ABIS = rdflib.Namespace("https://linked.data.gov.au/def/abis/") +BDR_DATATYPES = rdflib.Namespace("https://linked.data.gov.au/dataset/bdr/datatypes/") +BDR_ORGS = rdflib.Namespace("https://linked.data.gov.au/dataset/bdr/orgs/") From 1263ad7003f9a857caccfd06ba7a0d09c49285b3 Mon Sep 17 00:00:00 2001 From: Lincoln Puzey Date: Fri, 8 Nov 2024 12:02:16 +0800 Subject: [PATCH 6/8] Add args to test_uri_quoted docstring --- tests/utils/test_rdf.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/utils/test_rdf.py b/tests/utils/test_rdf.py index 65afa442..02a6be40 100644 --- a/tests/utils/test_rdf.py +++ b/tests/utils/test_rdf.py @@ -71,6 +71,12 @@ def test_uri_quoted( * different length paths * with/without special chars * with/without replacement fields + + Args: + namespace: The namespace for the uri_quoted function + path: The path for the uri_quoted function + fields: The fields to pass as kwargs to uri_quoted + expected: The expected return for the uri_quoted function """ result = utils.rdf.uri_quoted(namespace, path, **fields) assert result == expected From 75197aba23e02cd17bd0c890f55de6da7c39daea Mon Sep 17 00:00:00 2001 From: Lincoln Puzey Date: Fri, 8 Nov 2024 13:07:44 +0800 Subject: [PATCH 7/8] Make survey_iri take position-only args for consistency --- abis_mapping/templates/survey_metadata_v2/mapping.py | 2 +- abis_mapping/templates/survey_occurrence_data_v2/mapping.py | 2 +- abis_mapping/templates/survey_site_visit_data_v2/mapping.py | 2 +- abis_mapping/utils/iri_patterns.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/abis_mapping/templates/survey_metadata_v2/mapping.py b/abis_mapping/templates/survey_metadata_v2/mapping.py index 45c93a4c..52be2d0f 100644 --- a/abis_mapping/templates/survey_metadata_v2/mapping.py +++ b/abis_mapping/templates/survey_metadata_v2/mapping.py @@ -194,7 +194,7 @@ def apply_mapping_row( # Create TERN survey IRI from surveyID field survey_id: str | None = row["surveyID"] - survey = utils.iri_patterns.survey_iri(base_iri, survey_id=survey_id) + survey = utils.iri_patterns.survey_iri(base_iri, survey_id) # Create survey plan IRI survey_plan = utils.rdf.uri_quoted( diff --git a/abis_mapping/templates/survey_occurrence_data_v2/mapping.py b/abis_mapping/templates/survey_occurrence_data_v2/mapping.py index c741f16e..92f48144 100644 --- a/abis_mapping/templates/survey_occurrence_data_v2/mapping.py +++ b/abis_mapping/templates/survey_occurrence_data_v2/mapping.py @@ -603,7 +603,7 @@ def apply_mapping_row( # Create TERN survey IRI from surveyID field survey_id: str | None = row["surveyID"] - survey = utils.iri_patterns.survey_iri(base_iri, survey_id=survey_id) + survey = utils.iri_patterns.survey_iri(base_iri, survey_id) # Conditionally create uri dependent on siteVisitID field. if site_visit_id := row["siteVisitID"]: diff --git a/abis_mapping/templates/survey_site_visit_data_v2/mapping.py b/abis_mapping/templates/survey_site_visit_data_v2/mapping.py index e6d0a98f..8d5fd2ae 100644 --- a/abis_mapping/templates/survey_site_visit_data_v2/mapping.py +++ b/abis_mapping/templates/survey_site_visit_data_v2/mapping.py @@ -352,7 +352,7 @@ def apply_mapping_row( # Create TERN survey IRI from surveyID field row_survey_id: str | None = row["surveyID"] - uri_survey = utils.iri_patterns.survey_iri(base_iri, survey_id=row_survey_id) + uri_survey = utils.iri_patterns.survey_iri(base_iri, row_survey_id) # URI for the Site Visit Plan uri_site_visit_plan = utils.rdf.extend_uri_quoted(dataset, "visit", "plan", row_site_visit_id) diff --git a/abis_mapping/utils/iri_patterns.py b/abis_mapping/utils/iri_patterns.py index b71f936e..0a41cdeb 100644 --- a/abis_mapping/utils/iri_patterns.py +++ b/abis_mapping/utils/iri_patterns.py @@ -15,8 +15,8 @@ def survey_iri( base_iri: rdflib.Namespace | None, - *, survey_id: str | None, + /, ) -> rdflib.URIRef: """Get the IRI for the tern:Survey node, constructed from the surveyID field. From fea6451a8b7c14ab453f55438e1791f43cdfa0ef Mon Sep 17 00:00:00 2001 From: Lincoln Puzey Date: Fri, 8 Nov 2024 13:10:55 +0800 Subject: [PATCH 8/8] Add ids to test_uri_quoted parameters --- tests/utils/test_rdf.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/tests/utils/test_rdf.py b/tests/utils/test_rdf.py index 02a6be40..ca8fea9f 100644 --- a/tests/utils/test_rdf.py +++ b/tests/utils/test_rdf.py @@ -44,19 +44,33 @@ def test_rdf_uri() -> None: @pytest.mark.parametrize( ("namespace", "path", "fields", "expected"), [ - (rdflib.Namespace("https://test.com/foo/"), "", {}, rdflib.URIRef("https://test.com/foo/")), - (rdflib.Namespace("https://test.com/foo/"), "bar", {}, rdflib.URIRef("https://test.com/foo/bar")), - ( + pytest.param( + rdflib.Namespace("https://test.com/foo/"), + "", + {}, + rdflib.URIRef("https://test.com/foo/"), + id="empty path", + ), + pytest.param( + rdflib.Namespace("https://test.com/foo/"), + "bar", + {}, + rdflib.URIRef("https://test.com/foo/bar"), + id="static path", + ), + pytest.param( rdflib.Namespace("https://test.com/foo/"), "bar/{v}", {"v": "123"}, rdflib.URIRef("https://test.com/foo/bar/123"), + id="path with field to replace", ), - ( + pytest.param( rdflib.Namespace("https://test.com/foo/"), "bar/{v1}/cat/{v2}", {"v1": "123", "v2": "A B C?!"}, rdflib.URIRef("https://test.com/foo/bar/123/cat/A%20B%20C%3F%21"), + id="path with fields to replace with special chars", ), ], ) @@ -68,10 +82,6 @@ def test_uri_quoted( ) -> None: """Test the uri_quoted function. - * different length paths - * with/without special chars - * with/without replacement fields - Args: namespace: The namespace for the uri_quoted function path: The path for the uri_quoted function