diff --git a/abis_mapping/templates/survey_metadata_v2/examples/minimal.ttl b/abis_mapping/templates/survey_metadata_v2/examples/minimal.ttl index b930679b..b4297a93 100644 --- a/abis_mapping/templates/survey_metadata_v2/examples/minimal.ttl +++ b/abis_mapping/templates/survey_metadata_v2/examples/minimal.ttl @@ -10,58 +10,76 @@ @prefix void: . @prefix xsd: . - a schema:Collection ; + a schema:Collection ; void:inDataset ; schema:identifier "Survey Collection - Survey Type - Wet pitfall trapping" ; - schema:member ; - tern:hasAttribute . + schema:member ; + tern:hasAttribute . - a schema:Collection ; + a schema:Collection ; void:inDataset ; schema:identifier "Survey Collection - Target Habitat Scope - Woodland" ; - schema:member ; - tern:hasAttribute . + schema:member ; + tern:hasAttribute . - a schema:Collection ; + a schema:Collection ; void:inDataset ; schema:identifier "Survey Collection - Target Taxonomic Scope - Coleoptera" ; - schema:member ; - tern:hasAttribute . + schema:member ; + tern:hasAttribute . - a schema:Collection ; + a schema:Collection ; void:inDataset ; schema:identifier "Survey Collection - Target Taxonomic Scope - Insecta" ; - schema:member ; - tern:hasAttribute . + schema:member ; + tern:hasAttribute . a rdfs:Datatype ; skos:definition "An identifier for the dataset" ; skos:prefLabel "Gaia Resources datasetID" ; prov:wasAttributedTo . - a rdfs:Datatype ; + a bdr:Project ; + void:inDataset ; + schema:hasPart ; + schema:identifier "COL1" ; + schema:name "Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits - Summer" . + + a rdfs:Datatype ; skos:prefLabel "surveyID source" ; prov:qualifiedAttribution [ a prov:Attribution ; - prov:agent ; + prov:agent ; prov:hadRole ] . - a rdfs:Datatype ; + a rdfs:Datatype ; skos:prefLabel "surveyID source" ; prov:qualifiedAttribution [ a prov:Attribution ; - prov:agent ; + prov:agent ; prov:hadRole ] . - a bdr:Project ; + a tern:Attribute ; void:inDataset ; - schema:hasPart ; - schema:identifier "COL1" ; - schema:name "Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits - Summer" . + tern:attribute ; + tern:hasSimpleValue "Wet pitfall trapping" ; + tern:hasValue . - a prov:Agent ; - schema:name "CSIRO" . + a tern:Attribute ; + void:inDataset ; + tern:attribute ; + tern:hasSimpleValue "Woodland" ; + tern:hasValue . - a prov:Agent ; - schema:name "NSW Department of Planning, Industry and Environment" . + a tern:Attribute ; + void:inDataset ; + tern:attribute ; + tern:hasSimpleValue "Coleoptera" ; + tern:hasValue . + + a tern:Attribute ; + void:inDataset ; + tern:attribute ; + tern:hasSimpleValue "Insecta" ; + tern:hasValue . a skos:Concept ; skos:broader ; @@ -77,51 +95,33 @@ skos:prefLabel "Insecta" ; schema:citation "http://createme.org/dataset/Example-Systematic-Survey-Metadata-Dataset"^^xsd:anyURI . - a tern:Attribute ; - void:inDataset ; - tern:attribute ; - tern:hasSimpleValue "Wet pitfall trapping" ; - tern:hasValue . - - a tern:Attribute ; - void:inDataset ; - tern:attribute ; - tern:hasSimpleValue "Woodland" ; - tern:hasValue . - - a tern:Attribute ; - void:inDataset ; - tern:attribute ; - tern:hasSimpleValue "Coleoptera" ; - tern:hasValue . - - a tern:Attribute ; - void:inDataset ; - tern:attribute ; - tern:hasSimpleValue "Insecta" ; - tern:hasValue . - - a tern:IRI, + a tern:IRI, tern:Value ; rdfs:label "Wet pitfall trapping" ; rdf:value . - a tern:IRI, + a tern:IRI, tern:Value ; rdfs:label "Woodland" ; rdf:value . - a tern:IRI, + a tern:IRI, tern:Value ; rdfs:label "Coleoptera" ; rdf:value . - a tern:IRI, + a tern:IRI, tern:Value ; rdfs:label "Insecta" ; rdf:value . - a tern:Survey ; + a prov:Agent ; + schema:name "CSIRO" . + + a prov:Agent ; + schema:name "NSW Department of Planning, Industry and Environment" . + + a tern:Survey ; bdr:purpose "Summer sampling for peak insect diversity." ; bdr:target "Coleoptera", "Insecta" ; @@ -131,9 +131,9 @@ time:inXSDDate "2015-01-21"^^xsd:date ] ; time:hasEnd [ a time:Instant ; time:inXSDDate "2015-02-03"^^xsd:date ] ] ; - prov:hadPlan ; - schema:identifier "COL1"^^, - "COL1"^^ ; + prov:hadPlan ; + schema:identifier "COL1"^^, + "COL1"^^ ; schema:keywords "farmland", "ground beetle", "habitat", @@ -144,7 +144,7 @@ "woodland" ; schema:name "Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits - Summer" . - a prov:Plan ; + a prov:Plan ; schema:citation "Ng, K., Barton, P.S., Blanchard, W. et al. Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits. Oecologia 188, 645–657 (2018). https://doi.org/10.1007/s00442-018-4180-9\"" ; schema:description "Our experimental design consisted of four 400 m transects running from inside each woodland patch out into four adjoining farmland uses (crop, rested, woody debris application, revegetation plantings). To quantify potential edge efects on beetle species traits, we sampled beetles at five locations along each transect: 200 and 20 m inside woodlands, 200 and 20 m inside farmlands, and at the woodland–farmland edge (0 m). Each sampling location comprised a pair of wet invertebrate pitfall traps. separated by a drift fence (60 cm long x 10 cm high) to help direct arthropods into traps. We opened a total of 220 pairs of traps for 14 days during spring (Oct–Nov 2014), and repeated sampling during summer (January–February 2015). Beetle samples from each pitfall trap pair, and across the two time periods, were pooled to provide one sample per sampling location." ; schema:url "https://biocollect.ala.org.au/document/download/2022-01/202201%20CBR%20Flora%20and%20Vegetation%20report_draftv1.pdf"^^xsd:anyURI, @@ -161,7 +161,7 @@ geo:asWKT " POLYGON ((-33.826 146.363, -33.826 148.499, -34.411 148.499, -33.826 146.363))"^^geo:wktLiteral ] ; rdf:object _:N52d3d4f338b894a95032d27200000000 ; rdf:predicate geo:hasGeometry ; - rdf:subject ; + rdf:subject ; rdfs:comment "supplied as" . _:N52d3d4f338b894a95032d27200000000 a geo:Geometry ; diff --git a/abis_mapping/templates/survey_metadata_v2/mapping.py b/abis_mapping/templates/survey_metadata_v2/mapping.py index f8de8f8d..52be2d0f 100644 --- a/abis_mapping/templates/survey_metadata_v2/mapping.py +++ b/abis_mapping/templates/survey_metadata_v2/mapping.py @@ -2,7 +2,6 @@ # Standard import dataclasses -import urllib.parse # Third-party import frictionless @@ -193,19 +192,25 @@ def apply_mapping_row( # Create BDR project IRI project = utils.rdf.uri(f"project/SSD-Survey-Project/{row_num}", base_iri) - # Create TERN survey IRI - Note this needs to match the iri construction of the - # survey occurrence and site vist template mapping, ensuring they will resolve properly. - survey = utils.rdf.uri("survey/", base_iri) + urllib.parse.quote(row["surveyID"], safe="") + # Create TERN survey IRI from surveyID field + survey_id: str | None = row["surveyID"] + survey = utils.iri_patterns.survey_iri(base_iri, survey_id) # Create survey plan IRI - survey_plan = utils.rdf.uri(f"survey/SSD-survey/{row_num}/plan") + survey_plan = utils.rdf.uri_quoted( + base_iri, + "Plan/{survey_id}", + survey_id=(survey_id or str(row_num)), # fallback to row number when surveyID not available. + ) # Conditionally create survey type attribute, value and collection IRIs row_survey_type: str | None = row["surveyType"] if row_survey_type: - survey_type_attribute = utils.rdf.extend_uri(dataset, "attribute", "surveyType", row_survey_type) - survey_type_value = utils.rdf.extend_uri(dataset, "value", "surveyType", row_survey_type) - survey_type_collection = utils.rdf.extend_uri(dataset, "SurveyCollection", "surveyType", row_survey_type) + survey_type_attribute = utils.iri_patterns.attribute_iri(base_iri, "surveyType", row_survey_type) + survey_type_value = utils.iri_patterns.attribute_value_iri(base_iri, "surveyType", row_survey_type) + survey_type_collection = utils.iri_patterns.attribute_collection_iri( + base_iri, "Survey", "surveyType", row_survey_type + ) else: survey_type_attribute = None survey_type_value = None @@ -218,10 +223,10 @@ def apply_mapping_row( target_habitat_objects.append( AttributeValue( raw=target_habitat, - attribute=utils.rdf.extend_uri(dataset, "attribute", "targetHabitatScope", target_habitat), - value=utils.rdf.extend_uri(dataset, "value", "targetHabitatScope", target_habitat), - collection=utils.rdf.extend_uri( - dataset, "SurveyCollection", "targetHabitatScope", target_habitat + attribute=utils.iri_patterns.attribute_iri(base_iri, "targetHabitatScope", target_habitat), + value=utils.iri_patterns.attribute_value_iri(base_iri, "targetHabitatScope", target_habitat), + collection=utils.iri_patterns.attribute_collection_iri( + base_iri, "Survey", "targetHabitatScope", target_habitat ), ), ) @@ -233,10 +238,10 @@ def apply_mapping_row( target_taxonomic_objects.append( AttributeValue( raw=target_taxon, - attribute=utils.rdf.extend_uri(dataset, "attribute", "targetTaxonomicScope", target_taxon), - value=utils.rdf.extend_uri(dataset, "value", "targetTaxonomicScope", target_taxon), - collection=utils.rdf.extend_uri( - dataset, "SurveyCollection", "targetTaxonomicScope", target_taxon + attribute=utils.iri_patterns.attribute_iri(base_iri, "targetTaxonomicScope", target_taxon), + value=utils.iri_patterns.attribute_value_iri(base_iri, "targetTaxonomicScope", target_taxon), + collection=utils.iri_patterns.attribute_collection_iri( + base_iri, "Survey", "targetTaxonomicScope", target_taxon ), ) ) @@ -248,8 +253,8 @@ def apply_mapping_row( survey_org_objects.append( SurveyIDDatatype( name=raw_org, - datatype=utils.rdf.uri(f"datatype/surveyID/{raw_org}", base_iri), - agent=utils.rdf.uri(f"agent/{raw_org}"), + datatype=utils.iri_patterns.datatype_iri("surveyID", raw_org), + agent=utils.iri_patterns.agent_iri(raw_org), ) ) diff --git a/abis_mapping/templates/survey_occurrence_data_v2/examples/margaret_river_flora/margaret_river_flora.ttl b/abis_mapping/templates/survey_occurrence_data_v2/examples/margaret_river_flora/margaret_river_flora.ttl index c18b7d95..df034b65 100644 --- a/abis_mapping/templates/survey_occurrence_data_v2/examples/margaret_river_flora/margaret_river_flora.ttl +++ b/abis_mapping/templates/survey_occurrence_data_v2/examples/margaret_river_flora/margaret_river_flora.ttl @@ -1634,7 +1634,7 @@ tern:FeatureOfInterest ; void:inDataset ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f00000021 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1655,7 +1655,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f00000000 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1668,7 +1668,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f0000001b ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1681,7 +1681,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f0000001e ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1694,7 +1694,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f00000024 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1707,7 +1707,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f00000003 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1720,7 +1720,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f00000006 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1733,7 +1733,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f00000009 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1746,7 +1746,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f0000000c ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1759,7 +1759,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f0000000f ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1772,7 +1772,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f00000012 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1785,7 +1785,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f00000015 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1798,7 +1798,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f00000018 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1820,7 +1820,7 @@ void:inDataset ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f00000027 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1837,7 +1837,7 @@ "M12378"^^, "MR-456"^^, "PE:12:8832"^^ ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f0000002a ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1856,7 +1856,7 @@ "M12379"^^, "MR-457"^^, "PE:12:8833"^^ ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:N1dddcc66c58fa593aae7f72f0000002d ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; diff --git a/abis_mapping/templates/survey_occurrence_data_v2/examples/organism_qty.ttl b/abis_mapping/templates/survey_occurrence_data_v2/examples/organism_qty.ttl index d546254a..00858a35 100644 --- a/abis_mapping/templates/survey_occurrence_data_v2/examples/organism_qty.ttl +++ b/abis_mapping/templates/survey_occurrence_data_v2/examples/organism_qty.ttl @@ -84,7 +84,7 @@ tern:FeatureOfInterest ; void:inDataset ; sosa:usedProcedure ; - schema:isPartOf ; + schema:isPartOf ; schema:spatial _:Nbf8d3720a20d4c0a80c10b6800000000 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; diff --git a/abis_mapping/templates/survey_occurrence_data_v2/mapping.py b/abis_mapping/templates/survey_occurrence_data_v2/mapping.py index d6249a9f..92f48144 100644 --- a/abis_mapping/templates/survey_occurrence_data_v2/mapping.py +++ b/abis_mapping/templates/survey_occurrence_data_v2/mapping.py @@ -601,13 +601,9 @@ def apply_mapping_row( sensitivity_category_value = None sensitivity_category_collection = None - # Conditionally create uri dependent on surveyID field. - if survey_id := row["surveyID"]: - # Create TERN.Survey subject IRI - Note this needs to match the iri construction of the - # survey metadata and site vist template mapping, ensuring they will resolve properly. - survey = utils.rdf.uri("survey/", base_iri) + urllib.parse.quote(survey_id, safe="") - else: - survey = utils.rdf.uri("survey/1", base_iri) + # Create TERN survey IRI from surveyID field + survey_id: str | None = row["surveyID"] + survey = utils.iri_patterns.survey_iri(base_iri, survey_id) # Conditionally create uri dependent on siteVisitID field. if site_visit_id := row["siteVisitID"]: diff --git a/abis_mapping/templates/survey_site_visit_data_v2/examples/minimal.ttl b/abis_mapping/templates/survey_site_visit_data_v2/examples/minimal.ttl index 394e654c..7f62b677 100644 --- a/abis_mapping/templates/survey_site_visit_data_v2/examples/minimal.ttl +++ b/abis_mapping/templates/survey_site_visit_data_v2/examples/minimal.ttl @@ -157,7 +157,7 @@ prov:wasAssociatedWith , ; schema:identifier "TIS-24-03-P1-03" ; - schema:isPartOf ; + schema:isPartOf ; tern:hasSite , "P1"^^ . @@ -178,7 +178,7 @@ , ; schema:identifier "TIS-24-03-P1-01" ; - schema:isPartOf ; + schema:isPartOf ; tern:hasSite , "P1"^^ ; tern:siteDescription "dry" . @@ -192,7 +192,7 @@ prov:wasAssociatedWith , ; schema:identifier "TIS-24-03-P1-02" ; - schema:isPartOf ; + schema:isPartOf ; tern:hasSite , "P1"^^ ; tern:siteDescription "moist leaf litter after recent rain" . diff --git a/abis_mapping/templates/survey_site_visit_data_v2/mapping.py b/abis_mapping/templates/survey_site_visit_data_v2/mapping.py index d7628893..8d5fd2ae 100644 --- a/abis_mapping/templates/survey_site_visit_data_v2/mapping.py +++ b/abis_mapping/templates/survey_site_visit_data_v2/mapping.py @@ -350,15 +350,9 @@ def apply_mapping_row( # survey site and occurrence template mapping, ensuring they will resolve properly. uri_site = utils.rdf.uri("site/", base_iri) + urllib.parse.quote(row_site_id, safe="") - # URI for the Survey + # Create TERN survey IRI from surveyID field row_survey_id: str | None = row["surveyID"] - if row_survey_id: - # Create TERN survey IRI - Note this needs to match the iri construction of the - # survey occurrence and metadata template mapping, ensuring they will resolve properly. - uri_survey = utils.rdf.uri("survey/", base_iri) + urllib.parse.quote(row_survey_id, safe="") - else: - # Default IRI - uri_survey = utils.rdf.uri("survey/1", base_iri) + uri_survey = utils.iri_patterns.survey_iri(base_iri, row_survey_id) # URI for the Site Visit Plan uri_site_visit_plan = utils.rdf.extend_uri_quoted(dataset, "visit", "plan", row_site_visit_id) diff --git a/abis_mapping/utils/__init__.py b/abis_mapping/utils/__init__.py index 1a750db8..ad0d0e52 100644 --- a/abis_mapping/utils/__init__.py +++ b/abis_mapping/utils/__init__.py @@ -2,6 +2,7 @@ # Local from . import coords +from . import iri_patterns from . import namespaces from . import rdf from . import strings diff --git a/abis_mapping/utils/iri_patterns.py b/abis_mapping/utils/iri_patterns.py new file mode 100644 index 00000000..0a41cdeb --- /dev/null +++ b/abis_mapping/utils/iri_patterns.py @@ -0,0 +1,139 @@ +"""This module contains functions for constructing IRIs according to common patterns. + +This is important when the exact same IRI needs to be constructed from multiple template +mappings so that the output RDF links together on these IRIs.""" + +# third party +import rdflib + +# local +from abis_mapping import utils + +# typing +from typing import Literal + + +def survey_iri( + base_iri: rdflib.Namespace | None, + survey_id: str | None, + /, +) -> rdflib.URIRef: + """Get the IRI for the tern:Survey node, constructed from the surveyID field. + + This IRI is used in mapping multiple Systematic Survey template, + and needs to be the same for all of them. + + Args: + base_iri: The namespace to construct the IRI from. + survey_id: The surveyID field from the template. + + Returns: + The IRI for the tern:Survey node. + """ + return utils.rdf.uri_quoted( + base_iri, + "Survey/{survey_id}", + # surveyID is an optional field. When missing fallback to the row number. + # (which is always 1 for a Survey, since metadata template must have 1 row) + survey_id=(survey_id or "1"), + ) + + +def attribute_iri( + base_iri: rdflib.Namespace | None, + attribute: str, + value: str, + /, +) -> rdflib.URIRef: + """Get the IRI to use for a tern:Attribute node. + + Args: + base_iri: The namespace to construct the IRI from. + attribute: Typically the name of the CSV field. + value: The value of the attribute. + + Returns: + IRI for the tern:Attribute node. + """ + return utils.rdf.uri(f"attribute/{attribute}/{value}", namespace=base_iri) + + +def attribute_value_iri( + base_iri: rdflib.Namespace | None, + attribute: str, + value: str, + /, +) -> rdflib.URIRef: + """Get the IRI to use for a tern:Value node associated with a tern:Attribute node. + + Args: + base_iri: The namespace to construct the IRI from. + attribute: Typically the name of the CSV field. + value: The value of the attribute. + + Returns: + IRI for the tern:Value node. + """ + return utils.rdf.uri(f"value/{attribute}/{value}", namespace=base_iri) + + +def attribute_collection_iri( + base_iri: rdflib.Namespace | None, + collection_type: Literal["Survey", "Occurrence", "Site", "SiteVisit"], + attribute: str, + value: str, + /, +) -> rdflib.URIRef: + """Get the IRI to use for a schema:Collection node associated with a tern:Attribute node. + + Args: + base_iri: The namespace to construct the IRI from. + collection_type: What 'type' of Collection this is. Corresponds to the template being mapped. + attribute: Typically the name of the CSV field. + value: The value of the attribute. + + Returns: + IRI for the schema:Collection node. + """ + return utils.rdf.uri(f"{collection_type}Collection/{attribute}/{value}", namespace=base_iri) + + +def datatype_iri( + identifier_type: str, + identifier_source: str, + /, +) -> rdflib.URIRef: + """Get the IRI to use for a rdfs:Datatype node. + + This node typically represents the source of an identifier. + + Args: + identifier_type: What identifier the datatype is for. e.g. "surveyID", "catalogNumber" + identifier_source: The source of the identifier. + e.g. the value for the "surveyOrgs" or "catalogNumberSource" template fields. + + Returns: + URIRef for the rdfs:Datatype node. + """ + return utils.rdf.uri( + f"{identifier_type}/{identifier_source}", + namespace=utils.namespaces.BDR_DATATYPES, + ) + + +def agent_iri( + org: str, + /, +) -> rdflib.URIRef: + """Get the IRI to use for a prov:Agent node. + + Args: + org: The org this agent node represents. + + Returns: + URIRef for the prov:Agent node. + """ + return utils.rdf.uri( + f"{org}", + namespace=utils.namespaces.BDR_ORGS, + ) diff --git a/abis_mapping/utils/namespaces.py b/abis_mapping/utils/namespaces.py index a4f297bf..40f0d66e 100644 --- a/abis_mapping/utils/namespaces.py +++ b/abis_mapping/utils/namespaces.py @@ -14,3 +14,5 @@ DWC = rdflib.Namespace("http://rs.tdwg.org/dwc/terms/") BDR = rdflib.Namespace("http://example.com/bdr-schema/") ABIS = rdflib.Namespace("https://linked.data.gov.au/def/abis/") +BDR_DATATYPES = rdflib.Namespace("https://linked.data.gov.au/dataset/bdr/datatypes/") +BDR_ORGS = rdflib.Namespace("https://linked.data.gov.au/dataset/bdr/orgs/") diff --git a/abis_mapping/utils/rdf.py b/abis_mapping/utils/rdf.py index 4795992b..bb3ed89d 100644 --- a/abis_mapping/utils/rdf.py +++ b/abis_mapping/utils/rdf.py @@ -87,6 +87,54 @@ def uri( return namespace[internal_id] +def uri_quoted( + namespace: Optional[rdflib.Namespace], + path: str, + /, + **kwargs: str, +) -> rdflib.URIRef: + """Generates a rdflib.URIRef using the supplied namespace and path. + + The path string can contain fields to be replaced in braces, e.g "survey/{survey_id}" + These fields are replaced by the contents of kwargs. + Each kwarg value is sanitised by being url-quoted before being inserted in the path. + Then the resulting path is then appended to the namespace to get the URI. + + Some examples: + >>> uri_quoted(namespaces.EXAMPLE, "someField/{the_value}", the_value="foo") + rdflib.URIRef("http://example.com/someField/foo") + >>> uri_quoted(namespaces.EXAMPLE, "Type/{field}/{value}", field="foo", value="Hello There!") + rdflib.URIRef("http://example.com/Type/foo/Hello%20There%21") + + url-quoting is used instead of slugify-ing; + 1. So that the exact original value can be determined from the URI. + This is not possible with slugify-ing which will remove or replace chars with "-". + 2. Different input values always result in different final URIs. + This is not always the case with slugify-ing, since chars can be dropped or replaced with "-". + + Args: + namespace: Namespace for the uri. + path: Path to append to the namespace. + Can contain fields to be replaced, e.g. "survey/{survey_id}". + kwargs: Fields and values to replace in the path string. + + Returns: + Generated URI. + Raises: + KeyError: When the path string contains a field not specified in kwargs. + """ + # Check for namespace + if namespace is None: + # Set Default Namespace + namespace = namespaces.CREATEME + + # fill in any {field} names in path with url-quoted kwargs. + path = path.format_map({field: urllib.parse.quote(value, safe="") for field, value in kwargs.items()}) + + # Create URIRef and Return + return namespace[path] + + def extend_uri( base: rdflib.URIRef, *parts: str, diff --git a/tests/utils/test_rdf.py b/tests/utils/test_rdf.py index c7e3b44c..ca8fea9f 100644 --- a/tests/utils/test_rdf.py +++ b/tests/utils/test_rdf.py @@ -41,6 +41,57 @@ def test_rdf_uri() -> None: assert isinstance(d, rdflib.URIRef) +@pytest.mark.parametrize( + ("namespace", "path", "fields", "expected"), + [ + pytest.param( + rdflib.Namespace("https://test.com/foo/"), + "", + {}, + rdflib.URIRef("https://test.com/foo/"), + id="empty path", + ), + pytest.param( + rdflib.Namespace("https://test.com/foo/"), + "bar", + {}, + rdflib.URIRef("https://test.com/foo/bar"), + id="static path", + ), + pytest.param( + rdflib.Namespace("https://test.com/foo/"), + "bar/{v}", + {"v": "123"}, + rdflib.URIRef("https://test.com/foo/bar/123"), + id="path with field to replace", + ), + pytest.param( + rdflib.Namespace("https://test.com/foo/"), + "bar/{v1}/cat/{v2}", + {"v1": "123", "v2": "A B C?!"}, + rdflib.URIRef("https://test.com/foo/bar/123/cat/A%20B%20C%3F%21"), + id="path with fields to replace with special chars", + ), + ], +) +def test_uri_quoted( + namespace: rdflib.Namespace, + path: str, + fields: dict[str, str], + expected: rdflib.URIRef, +) -> None: + """Test the uri_quoted function. + + Args: + namespace: The namespace for the uri_quoted function + path: The path for the uri_quoted function + fields: The fields to pass as kwargs to uri_quoted + expected: The expected return for the uri_quoted function + """ + result = utils.rdf.uri_quoted(namespace, path, **fields) + assert result == expected + + @pytest.mark.parametrize( ("base", "extension", "expected"), [