From 81ce2a09ff2dc9b3909da8405034befd0fb6c44e Mon Sep 17 00:00:00 2001 From: joecrowleygaia Date: Fri, 28 Jun 2024 14:00:09 +0800 Subject: [PATCH 1/3] modified vocabs to now have common get method signatures for both restricted and flexible types. Added custom validator to Field model for vocabularies field and a method to retrieve a field's associated vocabs --- abis_mapping/base/mapper.py | 37 +++-- abis_mapping/types/schema.py | 52 ++++++- abis_mapping/utils/vocabs.py | 144 ++++++++++-------- abis_mapping/vocabs/basis_of_record.py | 20 +-- abis_mapping/vocabs/check_protocol.py | 23 +-- .../vocabs/conservation_jurisdiction.py | 13 +- abis_mapping/vocabs/establishment_means.py | 21 +-- abis_mapping/vocabs/geodetic_datum.py | 11 +- abis_mapping/vocabs/identification_method.py | 23 +-- .../vocabs/identification_qualifier.py | 23 +-- abis_mapping/vocabs/kingdom.py | 67 ++++---- abis_mapping/vocabs/life_stage.py | 19 +-- abis_mapping/vocabs/occurrence_status.py | 21 +-- abis_mapping/vocabs/organism_quantity_type.py | 22 +-- abis_mapping/vocabs/preparations.py | 23 +-- .../vocabs/relationship_to_related_site.py | 11 +- abis_mapping/vocabs/reproductive_condition.py | 22 +-- abis_mapping/vocabs/sampling_effort_unit.py | 22 +-- abis_mapping/vocabs/sampling_protocol.py | 21 +-- abis_mapping/vocabs/sequencing_method.py | 23 +-- abis_mapping/vocabs/sex.py | 21 +-- abis_mapping/vocabs/site_type.py | 23 +-- abis_mapping/vocabs/survey_type.py | 21 +-- abis_mapping/vocabs/target_habitat_scope.py | 23 +-- abis_mapping/vocabs/target_taxonomic_scope.py | 22 +-- abis_mapping/vocabs/taxon_rank.py | 21 +-- abis_mapping/vocabs/threat_status.py | 24 +-- tests/base/test_mapper.py | 79 +++++++++- tests/types/test_schema.py | 110 +++++++++++++ 29 files changed, 625 insertions(+), 337 deletions(-) create mode 100644 tests/types/test_schema.py diff --git a/abis_mapping/base/mapper.py b/abis_mapping/base/mapper.py index 45789939..6c495b5c 100644 --- a/abis_mapping/base/mapper.py +++ b/abis_mapping/base/mapper.py @@ -13,12 +13,11 @@ import rdflib # Local -from . import types -from abis_mapping.types import spatial -from abis_mapping.types import temporal -from abis_mapping.types import schema +from . import types as base_types +from abis_mapping import types from abis_mapping import utils + # Typing from typing import Any, Iterator, Optional, final @@ -43,7 +42,7 @@ class ABISMapper(abc.ABC): @abc.abstractmethod def apply_validation( self, - data: types.ReadableType, + data: base_types.ReadableType, **kwargs: Any, ) -> frictionless.Report: """Applies Frictionless Validation to Raw Data to Generate Report. @@ -59,7 +58,7 @@ def apply_validation( @abc.abstractmethod def apply_mapping( self, - data: types.ReadableType, + data: base_types.ReadableType, dataset_iri: Optional[rdflib.URIRef] = None, base_iri: Optional[rdflib.Namespace] = None, **kwargs: Any, @@ -91,14 +90,14 @@ def add_default_dataset( graph.add((uri, a, utils.namespaces.TERN.RDFDataset)) graph.add((uri, rdflib.DCTERMS.title, rdflib.Literal(self.DATASET_DEFAULT_NAME))) graph.add((uri, rdflib.DCTERMS.description, rdflib.Literal(self.DATASET_DEFAULT_DESCRIPTION))) - graph.add((uri, rdflib.DCTERMS.issued, temporal.Date.today().to_rdf_literal())) + graph.add((uri, rdflib.DCTERMS.issued, types.temporal.Date.today().to_rdf_literal())) def add_geometry_supplied_as( self, subj: rdflib.graph.Node, pred: rdflib.graph.Node, obj: rdflib.graph.Node, - geom: spatial.Geometry, + geom: types.spatial.Geometry, graph: rdflib.Graph, ) -> None: """Add geometry supplied as originally to the graph. @@ -177,7 +176,7 @@ def extract_extra_fields( @classmethod def extra_fields_schema( cls, - data: frictionless.Row | types.ReadableType, + data: frictionless.Row | base_types.ReadableType, full_schema: bool = False ) -> frictionless.Schema: """Creates a schema with all extra fields found in data. @@ -306,11 +305,29 @@ def schema( # Read Schema and validate s_dict = json.loads(schema_file.read_text()) - s_class = schema.Schema.model_validate(s_dict, strict=True) + s_class = types.schema.Schema.model_validate(s_dict, strict=True) # Dump pydantic class to return dict return s_class.model_dump(exclude_none=discard_optional) + @final + @classmethod + @property + @functools.lru_cache + def fields(cls) -> dict[str, types.schema.Field]: + """Indexed dictionary of all fields' metadata. + + Returns: + dict[str, types.schema.Field]: Dictionary of all fields + with name as key.. + """ + # Get schema + schema = types.schema.Schema.model_validate(cls.schema()) + + # Return dictionary of fields + return {f.name: f for f in schema.fields} + + @final @classmethod @functools.lru_cache diff --git a/abis_mapping/types/schema.py b/abis_mapping/types/schema.py index 72ed5868..c33332ea 100644 --- a/abis_mapping/types/schema.py +++ b/abis_mapping/types/schema.py @@ -1,10 +1,14 @@ """Describes the models to define a schema.""" + # Third-party import pydantic +# Local +from abis_mapping import utils + # Typing -from typing import Any +from typing import Any, Type class Constraints(pydantic.BaseModel): @@ -24,11 +28,55 @@ class Field(pydantic.BaseModel): type: str format: str | None constraints: Constraints - vocabularies: list[str] = [] + vocabularies: list[str] # Allow extra fields to be captured mainly to catch errors in json model_config = pydantic.ConfigDict(extra="allow") + @pydantic.field_validator("vocabularies", mode="after") + @classmethod + def check_vocabularies(cls, values: list[str]) -> list[str]: + """Custom validation of the vocabularies field. + + Args: + values (list[str]): The provided vocabularies initial value. + + Returns: + list[str]: The validated vocabulary ids. + + Raises: + KeyError: A provided vocabulary id does not exist. + """ + # Check each provided name to see if it exists in the registry + for name in values: + if utils.vocabs.get_vocab(name) is None: + raise ValueError(f"Vocabulary id {name} does not exist.") + + # Return list + return values + + def get_vocab(self, name: str | None = None) -> Type[utils.vocabs.Vocabulary]: + """Retrieves the vocab for the field. + + Args: + name (str | None, optional): The name of the vocab to retrieve. Will + return first vocab if not provided. + + Returns: + Type[utils.vocabs.Vocabulary]: Returns vocabulary for the field. + + Raises: + ValueError: If name is not within the vocabularies field. + """ + # Check if name exists + if name is not None and name not in self.vocabularies: + raise ValueError(f"Vocabulary '{name}' is not defined for field {self.name}.") + + # Retrieve + if name is not None: + return utils.vocabs.get_vocab(name) + + return utils.vocabs.get_vocab(self.vocabularies.pop(0)) class Schema(pydantic.BaseModel): """Model for overall schema object of a schema definition.""" diff --git a/abis_mapping/utils/vocabs.py b/abis_mapping/utils/vocabs.py index ce60920d..370c2f7f 100644 --- a/abis_mapping/utils/vocabs.py +++ b/abis_mapping/utils/vocabs.py @@ -11,7 +11,7 @@ from abis_mapping.utils import strings # Typing -from typing import Optional, Iterable, final, Final +from typing import Optional, Iterable, final, Final, Type # Constants @@ -79,40 +79,63 @@ def alternative_labels(self) -> Iterable[str]: class Vocabulary(abc.ABC): - """Base Vocabulary class.""" + """Base Vocabulary class. + + Attributes: + id_registry (dict[str, Vocabulary]): Dictionary to hold all vocabs for + mapping by their id. + vocab_id (str): ID to assign vocabulary. + terms (Iterable[Term]): Terms to add to the vocabulary. + publish (bool, optional): Whether to publish vocabulary + in documentation. Defaults to True. + """ # Dictionary to hold all vocabs for mapping by their id. - id_registry: dict[str, "Vocabulary"] = {} + id_registry: dict[str, Type["Vocabulary"]] = {} + + # Attributes assigned per vocabulary + vocab_id: str + terms: Iterable[Term] + publish: bool = True def __init__( self, - vocab_id: str, - terms: Iterable[Term], - publish: bool = True, + graph: rdflib.Graph, + source: Optional[rdflib.URIRef] = None, ): """Vocabulary constructor. Args: - vocab_id (str): ID to assign vocabulary. - publish (bool, optional): Whether to publish vocabulary - in documentation. Defaults to True. + graph (rdflib.Graph): Graph to reference + within vocabulary + source (Optional[rdflib.URIRef]): Optional source URI to attribute + a new vocabulary term to. """ - # Assign object internal variables - self.vocab_id: Final[str] = vocab_id - self.publish: Final[bool] = publish - - # Set Instance Variables - self.terms: Final[tuple[Term, ...]] = tuple(terms) + # Assign instance variables + self.graph = graph + self.source: Optional[rdflib.URIRef] = source # Generate Dictionary Mapping from Terms self._mapping: dict[str | None, rdflib.URIRef | None] = {} for term in self.terms: self._mapping.update(**term.to_mapping()) + @abc.abstractmethod + def get(self, value: str | None) -> rdflib.URIRef: + """Retrieve na IRI from the vocabulary. + + Args: + value (Optional[str]): Possible raw string value to search for in + vocabulary. + + Returns: + rdflib.URIRef: Matching vocabulary IRI. + """ + @final @classmethod def register( cls, - vocab: "Vocabulary", + vocab: Type["Vocabulary"], ) -> None: """Register a Vocabulary within the centralise vocabulary id registry. @@ -157,46 +180,45 @@ def get(self, value: str) -> rdflib.URIRef: class FlexibleVocabulary(Vocabulary): - """Flexible Vocabulary""" + """Flexible Vocabulary. + + Attributes: + definition (rdflib.Literal): Definition to use when creating a new + vocabulary term 'on the fly'. + base_ns (rdflib.URIRef): Base IRI namespace to use when creating a new + vocabulary term 'on the fly'. + scheme (rdflib.URIRef): Scheme IRI to use when creating a new + vocabulary term 'on the fly'. + broader (Optional[rdflib.URIRef]): Optional broader IRI to use when + creating a new vocabulary term 'on the fly'. + default (Optional[Term]): Optional default term to fall back on if + a value is not supplied. + """ + # Declare attributes applicable to a flexible vocab + definition: rdflib.Literal + base: rdflib.URIRef + scheme: rdflib.URIRef + broader: Optional[rdflib.URIRef] + default: Optional[Term] def __init__( self, - vocab_id: str, - definition: rdflib.Literal, - base: rdflib.URIRef, - scheme: rdflib.URIRef, - broader: Optional[rdflib.URIRef], - default: Optional[Term], - terms: Iterable[Term], - publish: bool = True, - ) -> None: - """Initialises a Flexible Vocabulary. + graph: rdflib.Graph, + source: Optional[rdflib.URIRef] = None, + ): + """Flexible Vocabulary constructor. Args: - vocab_id (str): ID to assign vocabulary. - definition (rdflib.Literal): Definition to use when creating a new - vocabulary term 'on the fly'. - base (rdflib.URIRef): Base IRI namespace to use when creating a new - vocabulary term 'on the fly'. - scheme (rdflib.URIRef): Scheme IRI to use when creating a new - vocabulary term 'on the fly'. - broader (Optional[rdflib.URIRef]): Optional broader IRI to use when - creating a new vocabulary term 'on the fly'. - default (Optional[Term]): Optional default term to fall back on if - a value is not supplied. - terms (Iterable[Term]): Terms for the vocabulary. - publish (bool, optional): Whether to publish vocabulary - within documentation. Defaults to True. + graph (rdflib.Graph): Graph to reference + within vocabulary + source (Optional[rdflib.URIRef]): Optional source URI to attribute + a new vocabulary term to. """ # Call parent constructor - super().__init__(vocab_id, terms, publish) + super().__init__(graph=graph, source=source) # Set Instance Variables - self.definition = definition - self.base = rdflib.Namespace(base) # Cast to a Namespace - self.scheme = scheme - self.broader = broader - self.default = default + self.base_ns = rdflib.Namespace(self.base) # Cast to a Namespace # Add Default mapping if Applicable if self.default: @@ -204,9 +226,7 @@ def __init__( def get( self, - graph: rdflib.Graph, value: Optional[str], - source: Optional[rdflib.URIRef] = None, ) -> rdflib.URIRef: """Retrieves an IRI from the Vocabulary. @@ -218,11 +238,8 @@ def get( 'on the fly'. Args: - graph (rdflib.Graph): Graph to create a new vocabulary term in. value (Optional[str]): Possible raw string value to search for in vocabulary. - source (Optional[rdflib.URIRef]): Optional source URI to attribute - a new vocabulary term to. Returns: rdflib.URIRef: Default, matching or created vocabulary IRI. @@ -245,26 +262,26 @@ def get( raise VocabularyError("Value not supplied for vocabulary with no default") # Create our Own Concept IRI - iri = rdf.uri(value, namespace=self.base) + iri = rdf.uri(value, namespace=self.base_ns) # Add to Graph - graph.add((iri, a, rdflib.SKOS.Concept)) - graph.add((iri, rdflib.SKOS.definition, self.definition)) - graph.add((iri, rdflib.SKOS.inScheme, self.scheme)) - graph.add((iri, rdflib.SKOS.prefLabel, rdflib.Literal(value))) + self.graph.add((iri, a, rdflib.SKOS.Concept)) + self.graph.add((iri, rdflib.SKOS.definition, self.definition)) + self.graph.add((iri, rdflib.SKOS.inScheme, self.scheme)) + self.graph.add((iri, rdflib.SKOS.prefLabel, rdflib.Literal(value))) # Check for Broader IRI if self.broader: # Add Broader - graph.add((iri, rdflib.SKOS.broader, self.broader)) + self.graph.add((iri, rdflib.SKOS.broader, self.broader)) # Check for Source URI - if source: + if self.source: # Construct Source URI Literal - uri = rdflib.Literal(source, datatype=rdflib.XSD.anyURI) + uri = rdflib.Literal(self.source, datatype=rdflib.XSD.anyURI) # Add Source - graph.add((iri, rdflib.DCTERMS.source, uri)) + self.graph.add((iri, rdflib.DCTERMS.source, uri)) # Return return iri @@ -274,13 +291,14 @@ class VocabularyError(Exception): """Error Raised in Vocabulary Handling""" -def get_vocab(key: str) -> Vocabulary | None: +def get_vocab(key: str) -> Type[Vocabulary] | None: """Retrieves vocab object for given key. Args: key (str): Key to retrieve vocab for. Returns: - Vocabulary | None: Corresponding vocabulary for given key or None. + Type[Vocabulary] | None: Corresponding vocabulary class + for given key or None. """ return Vocabulary.id_registry.get(key) diff --git a/abis_mapping/vocabs/basis_of_record.py b/abis_mapping/vocabs/basis_of_record.py index c4709c0b..d930e858 100644 --- a/abis_mapping/vocabs/basis_of_record.py +++ b/abis_mapping/vocabs/basis_of_record.py @@ -53,14 +53,14 @@ # Vocabulary -BASIS_OF_RECORD = utils.vocabs.FlexibleVocabulary( - vocab_id="BASIS_OF_RECORD", - definition=rdflib.Literal("A type of basisOfRecord."), - base=utils.rdf.uri("bdr-cv/attribute/basisOfRecord/"), - scheme=rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/dd085299-ae86-4371-ae15-61dfa432f924"), - broader=utils.rdf.uri("bdr-cv/attribute/basisOfRecord", utils.namespaces.EXAMPLE), # TODO -> Need real URI - default=None, # No default, omitted if not provided - terms=( +class BasisOfRecord(utils.vocabs.FlexibleVocabulary): + vocab_id = "BASIS_OF_RECORD" + definition = rdflib.Literal("A type of basisOfRecord.") + base = utils.rdf.uri("bdr-cv/attribute/basisOfRecord/") + scheme = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/dd085299-ae86-4371-ae15-61dfa432f924") + broader = utils.rdf.uri("bdr-cv/attribute/basisOfRecord", utils.namespaces.EXAMPLE) # TODO -> Need real URI + default = None # No default, omitted if not provided + terms = ( HUMAN_OBSERVATION, OCCURRENCE, PRESERVED_SPECIMEN, @@ -69,7 +69,7 @@ MACHINE_OBSERVATION, MATERIAL_SAMPLE, ) -) + # Register vocabulary -utils.vocabs.Vocabulary.register(BASIS_OF_RECORD) +utils.vocabs.Vocabulary.register(BasisOfRecord) diff --git a/abis_mapping/vocabs/check_protocol.py b/abis_mapping/vocabs/check_protocol.py index ad1d8811..79261c06 100644 --- a/abis_mapping/vocabs/check_protocol.py +++ b/abis_mapping/vocabs/check_protocol.py @@ -15,17 +15,18 @@ description="Unspecified", ) + # Vocabulary -CHECK_PROTOCOL = utils.vocabs.FlexibleVocabulary( - vocab_id="CHECK_PROTOCOL", - definition=rdflib.Literal("A type of threatStatusCheckProtocol."), - base=utils.rdf.uri("bdr-cv/methods/threatStatusCheckProtocol/"), - scheme=rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/2fd44aca-168f-4177-b393-0688ce38102c"), - broader=utils.rdf.uri("bdr-cv/methods/threatStatusCheckProtocol", utils.namespaces.EXAMPLE), # TODO -> Need real URI # noqa: E501 - default=UNSPECIFIED, - terms=(UNSPECIFIED, ), - publish=False, -) +class CheckProtocol(utils.vocabs.FlexibleVocabulary): + vocab_id = "CHECK_PROTOCOL" + definition = rdflib.Literal("A type of threatStatusCheckProtocol.") + base = utils.rdf.uri("bdr-cv/methods/threatStatusCheckProtocol/") + scheme = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/2fd44aca-168f-4177-b393-0688ce38102c") + broader = utils.rdf.uri("bdr-cv/methods/threatStatusCheckProtocol", utils.namespaces.EXAMPLE) # TODO -> Need real URI # noqa: E501 + default = UNSPECIFIED + terms = (UNSPECIFIED, ) + publish = False + # Register -utils.vocabs.Vocabulary.register(CHECK_PROTOCOL) +utils.vocabs.Vocabulary.register(CheckProtocol) diff --git a/abis_mapping/vocabs/conservation_jurisdiction.py b/abis_mapping/vocabs/conservation_jurisdiction.py index 598f178d..cb255303 100644 --- a/abis_mapping/vocabs/conservation_jurisdiction.py +++ b/abis_mapping/vocabs/conservation_jurisdiction.py @@ -55,12 +55,13 @@ description="Western Australia", ) + # Vocabulary -CONSERVATION_JURISDICTION = utils.vocabs.RestrictedVocabulary( - vocab_id="CONSERVATION_JURISDICTION", - terms=(ACT, EPBC, NSW, NT, QLD, SA, TAS, VIC, WA), - publish=False, -) +class ConservationJurisdiction(utils.vocabs.RestrictedVocabulary): + vocab_id = "CONSERVATION_JURISDICTION" + terms = (ACT, EPBC, NSW, NT, QLD, SA, TAS, VIC, WA) + publish = False + # Register -utils.vocabs.Vocabulary.register(CONSERVATION_JURISDICTION) +utils.vocabs.Vocabulary.register(ConservationJurisdiction) diff --git a/abis_mapping/vocabs/establishment_means.py b/abis_mapping/vocabs/establishment_means.py index 9026c79d..b70c09a9 100644 --- a/abis_mapping/vocabs/establishment_means.py +++ b/abis_mapping/vocabs/establishment_means.py @@ -46,16 +46,17 @@ description="The temporary occurrence of a taxon far outside its natural or migratory range.", ) + # Vocabulary -ESTABLISHMENT_MEANS = utils.vocabs.FlexibleVocabulary( - vocab_id="ESTABLISHMENT_MEANS", - definition=rdflib.Literal("A type of establishmentMeans."), - base=utils.rdf.uri("bdr-cv/parameter/establishmentMeans/"), - scheme=rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/5699eca7-9ef0-47a6-bcfb-9306e0e2b85e"), - broader=utils.rdf.uri("bdr-cv/parameter/establishmentMeans", utils.namespaces.EXAMPLE), # TODO -> Need real URI - default=None, # No default, omitted if not provided - terms=(INTRODUCED, INTRODUCED_ASSISTED_COLONISATION, NATIVE, NATIVE_REINTRODUCED, UNCERTAIN, VAGRANT), -) +class EstablishmentMeans(utils.vocabs.FlexibleVocabulary): + vocab_id = "ESTABLISHMENT_MEANS" + definition = rdflib.Literal("A type of establishmentMeans.") + base = utils.rdf.uri("bdr-cv/parameter/establishmentMeans/") + scheme = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/5699eca7-9ef0-47a6-bcfb-9306e0e2b85e") + broader = utils.rdf.uri("bdr-cv/parameter/establishmentMeans", utils.namespaces.EXAMPLE) # TODO -> Need real URI + default = None # No default, omitted if not provided + terms = (INTRODUCED, INTRODUCED_ASSISTED_COLONISATION, NATIVE, NATIVE_REINTRODUCED, UNCERTAIN, VAGRANT) + # Register -utils.vocabs.Vocabulary.register(ESTABLISHMENT_MEANS) +utils.vocabs.Vocabulary.register(EstablishmentMeans) diff --git a/abis_mapping/vocabs/geodetic_datum.py b/abis_mapping/vocabs/geodetic_datum.py index b76b94df..2570af6e 100644 --- a/abis_mapping/vocabs/geodetic_datum.py +++ b/abis_mapping/vocabs/geodetic_datum.py @@ -35,11 +35,12 @@ description="World Geodetic System 1984, used in GPS", ) + # Vocabulary -GEODETIC_DATUM = utils.vocabs.RestrictedVocabulary( - vocab_id="GEODETIC_DATUM", - terms=(AGD66, AGD84, GDA2020, GDA94, WGS84), -) +class GeodeticDatum(utils.vocabs.RestrictedVocabulary): + vocab_id = "GEODETIC_DATUM" + terms = (AGD66, AGD84, GDA2020, GDA94, WGS84) + # Register -utils.vocabs.Vocabulary.register(GEODETIC_DATUM) +utils.vocabs.Vocabulary.register(GeodeticDatum) diff --git a/abis_mapping/vocabs/identification_method.py b/abis_mapping/vocabs/identification_method.py index a6f6800d..1818d40d 100644 --- a/abis_mapping/vocabs/identification_method.py +++ b/abis_mapping/vocabs/identification_method.py @@ -15,17 +15,18 @@ description="Undefined", ) + # Vocabulary -IDENTIFICATION_METHOD = utils.vocabs.FlexibleVocabulary( - vocab_id="IDENTIFICATION_METHOD", - definition=rdflib.Literal("A type of identificationMethod."), - base=utils.rdf.uri("bdr-cv/methods/identificationMethod/"), - scheme=rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/2fd44aca-168f-4177-b393-0688ce38102c"), - broader=utils.rdf.uri("bdr-cv/methods/identificationMethod", utils.namespaces.EXAMPLE), # TODO -> Need real URI - default=UNDEFINED, - terms=(), # No baseline vocabulary values - publish=False, -) +class IdentificationMethod(utils.vocabs.FlexibleVocabulary): + vocab_id = "IDENTIFICATION_METHOD" + definition = rdflib.Literal("A type of identificationMethod.") + base = utils.rdf.uri("bdr-cv/methods/identificationMethod/") + scheme = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/2fd44aca-168f-4177-b393-0688ce38102c") + broader = utils.rdf.uri("bdr-cv/methods/identificationMethod", utils.namespaces.EXAMPLE) # TODO -> Need real URI + default = UNDEFINED + terms = () # No baseline vocabulary values + publish = False + # Register -utils.vocabs.Vocabulary.register(IDENTIFICATION_METHOD) +utils.vocabs.Vocabulary.register(IdentificationMethod) diff --git a/abis_mapping/vocabs/identification_qualifier.py b/abis_mapping/vocabs/identification_qualifier.py index 803eaffa..59d23814 100644 --- a/abis_mapping/vocabs/identification_qualifier.py +++ b/abis_mapping/vocabs/identification_qualifier.py @@ -158,15 +158,16 @@ ), ) + # Vocabulary -IDENTIFICATION_QUALIFIER = utils.vocabs.FlexibleVocabulary( - vocab_id="IDENTIFICATION_QUALIFIER", - definition=rdflib.Literal("A type of identificationQualifier."), - base=utils.rdf.uri("bdr-cv/attribute/identificationQualifier/"), - scheme=rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/dd085299-ae86-4371-ae15-61dfa432f924"), - broader=utils.rdf.uri("bdr-cv/attribute/identificationQualifier", utils.namespaces.EXAMPLE), # TODO -> Need real URI # noqa: E501 - default=None, # No default, ommitted if not provided - terms=( +class IdentificationQualifier(utils.vocabs.FlexibleVocabulary): + vocab_id = "IDENTIFICATION_QUALIFIER" + definition = rdflib.Literal("A type of identificationQualifier.") + base = utils.rdf.uri("bdr-cv/attribute/identificationQualifier/") + scheme = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/dd085299-ae86-4371-ae15-61dfa432f924") + broader = utils.rdf.uri("bdr-cv/attribute/identificationQualifier", utils.namespaces.EXAMPLE) # TODO -> Need real URI # noqa: E501 + default = None # No default, ommitted if not provided + terms = ( ANIMALIA_CETERA, CONFER, EX_GREGE, @@ -182,8 +183,8 @@ SPECIES_PROXIMA, STETIT, SUBSPECIES, - ), -) + ) + # Register -utils.vocabs.Vocabulary.register(IDENTIFICATION_QUALIFIER) +utils.vocabs.Vocabulary.register(IdentificationQualifier) diff --git a/abis_mapping/vocabs/kingdom.py b/abis_mapping/vocabs/kingdom.py index b6ab2468..b3dfccde 100644 --- a/abis_mapping/vocabs/kingdom.py +++ b/abis_mapping/vocabs/kingdom.py @@ -58,38 +58,41 @@ description="Kingdom (taxonRank: Regnum) Fungi specimen", ) + # Vocabularies -KINGDOM = utils.vocabs.FlexibleVocabulary( - vocab_id="KINGDOM", - definition=rdflib.Literal("A type of kingdom."), - base=utils.rdf.uri("bdr-cv/attribute/kingdom/"), - scheme=rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/dd085299-ae86-4371-ae15-61dfa432f924"), - broader=utils.rdf.uri("bdr-cv/attribute/kingdom", utils.namespaces.EXAMPLE), # TODO -> Need real URI - default=None, # No default, kingdom is required in the CSV - terms=(ANIMALIA, PLANTAE, FUNGI), -) -KINGDOM_OCCURRENCE = utils.vocabs.FlexibleVocabulary( - vocab_id="KINGDOM_OCCURRENCE", - definition=rdflib.Literal("The existence of the organism sampled at a particular place at a particular time."), - base=utils.rdf.uri("bdr-cv/featureType/occurrence/kingdom/"), - scheme=rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/68af3d25-c801-4089-afff-cf701e2bd61d"), - broader=None, # No broader, top level concept - default=None, # No default, kingdom is required in the CSV - terms=(ANIMALIA_OCCURRENCE, PLANTAE_OCCURRENCE, FUNGI_OCCURRENCE), - publish=False, -) -KINGDOM_SPECIMEN = utils.vocabs.FlexibleVocabulary( - vocab_id="KINGDOM_SPECIMEN", - definition=rdflib.Literal("An organism specimen."), - base=utils.rdf.uri("bdr-cv/featureType/specimen/kingdom/"), - scheme=rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/68af3d25-c801-4089-afff-cf701e2bd61d"), - broader=None, # No broader, top level concept - default=None, # No default, kingdom is required in the CSV - terms=(ANIMALIA_SPECIMEN, PLANTAE_SPECIMEN, FUNGI_SPECIMEN), - publish=False, -) +class Kingdom(utils.vocabs.FlexibleVocabulary): + vocab_id = "KINGDOM" + definition = rdflib.Literal("A type of kingdom.") + base = utils.rdf.uri("bdr-cv/attribute/kingdom/") + scheme = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/dd085299-ae86-4371-ae15-61dfa432f924") + broader = utils.rdf.uri("bdr-cv/attribute/kingdom", utils.namespaces.EXAMPLE) # TODO -> Need real URI + default = None # No default, kingdom is required in the CSV + terms = (ANIMALIA, PLANTAE, FUNGI) + + +class KingdomOccurrence(utils.vocabs.FlexibleVocabulary): + vocab_id = "KINGDOM_OCCURRENCE" + definition = rdflib.Literal("The existence of the organism sampled at a particular place at a particular time.") + base = utils.rdf.uri("bdr-cv/featureType/occurrence/kingdom/") + scheme = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/68af3d25-c801-4089-afff-cf701e2bd61d") + broader = None # No broader, top level concept + default = None # No default, kingdom is required in the CSV + terms = (ANIMALIA_OCCURRENCE, PLANTAE_OCCURRENCE, FUNGI_OCCURRENCE) + publish = False + + +class KingdomSpecimen(utils.vocabs.FlexibleVocabulary): + vocab_id = "KINGDOM_SPECIMEN" + definition = rdflib.Literal("An organism specimen.") + base = utils.rdf.uri("bdr-cv/featureType/specimen/kingdom/") + scheme = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/68af3d25-c801-4089-afff-cf701e2bd61d") + broader = None # No broader, top level concept + default = None # No default, kingdom is required in the CSV + terms = (ANIMALIA_SPECIMEN, PLANTAE_SPECIMEN, FUNGI_SPECIMEN) + publish = False + # Register -utils.vocabs.Vocabulary.register(KINGDOM) -utils.vocabs.Vocabulary.register(KINGDOM_OCCURRENCE) -utils.vocabs.Vocabulary.register(KINGDOM_SPECIMEN) +utils.vocabs.Vocabulary.register(Kingdom) +utils.vocabs.Vocabulary.register(KingdomOccurrence) +utils.vocabs.Vocabulary.register(KingdomSpecimen) diff --git a/abis_mapping/vocabs/life_stage.py b/abis_mapping/vocabs/life_stage.py index 5010e7eb..e4346b3a 100644 --- a/abis_mapping/vocabs/life_stage.py +++ b/abis_mapping/vocabs/life_stage.py @@ -182,14 +182,15 @@ ), ) + # Vocabulary -LIFE_STAGE = utils.vocabs.FlexibleVocabulary( - vocab_id="LIFE_STAGE", - definition=rdflib.Literal("A type of lifeStage."), - base=utils.rdf.uri("bdr-cv/parameter/lifeStage/"), - scheme=rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/5699eca7-9ef0-47a6-bcfb-9306e0e2b85e"), +class LifeStage(utils.vocabs.FlexibleVocabulary): + vocab_id="LIFE_STAGE" + definition=rdflib.Literal("A type of lifeStage.") + base=utils.rdf.uri("bdr-cv/parameter/lifeStage/") + scheme=rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/5699eca7-9ef0-47a6-bcfb-9306e0e2b85e") broader=utils.rdf.uri("bdr-cv/parameter/lifeStage", utils.namespaces.EXAMPLE), # TODO -> Need real URI - default=None, # No default, ommitted if not provided + default=None # No default, ommitted if not provided terms=( ADULT, EMBRYO, @@ -201,8 +202,8 @@ SPORE, SPOROPHYTE, ZYGOTE, - ), -) + ) + # Register -utils.vocabs.Vocabulary.register(LIFE_STAGE) +utils.vocabs.Vocabulary.register(LifeStage) diff --git a/abis_mapping/vocabs/occurrence_status.py b/abis_mapping/vocabs/occurrence_status.py index bb302529..a06f09f6 100644 --- a/abis_mapping/vocabs/occurrence_status.py +++ b/abis_mapping/vocabs/occurrence_status.py @@ -20,16 +20,17 @@ description="The occurrence was not present at the location and time of the observation." ) + # Vocabulary -OCCURRENCE_STATUS = utils.vocabs.FlexibleVocabulary( - vocab_id="OCCURRENCE_STATUS", - definition=rdflib.Literal("A type of occurrenceStatus."), - base=utils.rdf.uri("bdr-cv/parameter/occurrenceStatus/"), - scheme=rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/5699eca7-9ef0-47a6-bcfb-9306e0e2b85e"), - broader=utils.rdf.uri("bdr-cv/parameter/occurrenceStatus", utils.namespaces.EXAMPLE), # TODO -> Need real URI - default=None, # No default, ommitted if not provided - terms=(PRESENT, ABSENT) -) +class OccurrenceStatus(utils.vocabs.FlexibleVocabulary): + vocab_id = "OCCURRENCE_STATUS" + definition = rdflib.Literal("A type of occurrenceStatus.") + base = utils.rdf.uri("bdr-cv/parameter/occurrenceStatus/") + scheme = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/5699eca7-9ef0-47a6-bcfb-9306e0e2b85e") + broader = utils.rdf.uri("bdr-cv/parameter/occurrenceStatus", utils.namespaces.EXAMPLE) # TODO -> Need real URI + default = None # No default, ommitted if not provided + terms = (PRESENT, ABSENT) + # Register -utils.vocabs.Vocabulary.register(OCCURRENCE_STATUS) +utils.vocabs.Vocabulary.register(OccurrenceStatus) diff --git a/abis_mapping/vocabs/organism_quantity_type.py b/abis_mapping/vocabs/organism_quantity_type.py index 84589487..9dceb494 100644 --- a/abis_mapping/vocabs/organism_quantity_type.py +++ b/abis_mapping/vocabs/organism_quantity_type.py @@ -114,14 +114,14 @@ # Vocab -ORGANISM_QUANTITY_TYPE = utils.vocabs.FlexibleVocabulary( - vocab_id="ORGANISM_QUANTITY_TYPE", - definition=rdflib.Literal("A type of organism quantity."), - base=utils.rdf.uri("bdr-cv/concept/organismQuantityType"), - scheme=rdflib.URIRef("http://rs.gbif.org/vocabulary/gbif/quantityType"), - broader=utils.rdf.uri("bdr-cv/concept/organismQuantityType", utils.namespaces.EXAMPLE), - default=None, # No default, ommitted if not provided. - terms=( +class OrganismQuantityType(utils.vocabs.FlexibleVocabulary): + vocab_id = "ORGANISM_QUANTITY_TYPE" + definition = rdflib.Literal("A type of organism quantity.") + base = utils.rdf.uri("bdr-cv/concept/organismQuantityType") + scheme = rdflib.URIRef("http://rs.gbif.org/vocabulary/gbif/quantityType") + broader = utils.rdf.uri("bdr-cv/concept/organismQuantityType", utils.namespaces.EXAMPLE) + default = None # No default, ommitted if not provided. + terms = ( PERCENTAGE_OF_SPECIES, PERCENTAGE_OF_BIOVOLUME, PERCENTAGE_OF_BIOMASS, @@ -134,8 +134,8 @@ BIOMASS_KG, BIOVOLUME_CUBIC_MICRONS, BIOVOLUME_ML, - ), -) + ) + # Register -utils.vocabs.Vocabulary.register(ORGANISM_QUANTITY_TYPE) +utils.vocabs.Vocabulary.register(OrganismQuantityType) diff --git a/abis_mapping/vocabs/preparations.py b/abis_mapping/vocabs/preparations.py index d5942e2a..c0de1762 100644 --- a/abis_mapping/vocabs/preparations.py +++ b/abis_mapping/vocabs/preparations.py @@ -80,15 +80,16 @@ description="Refrigerated", ) + # Vocabulary -PREPARATIONS = utils.vocabs.FlexibleVocabulary( - vocab_id="PREPARATIONS", - definition=rdflib.Literal("A type of preparations."), - base=utils.rdf.uri("bdr-cv/attribute/preparations/"), - scheme=rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/dd085299-ae86-4371-ae15-61dfa432f924"), - broader=utils.rdf.uri("bdr-cv/attribute/preparations", utils.namespaces.EXAMPLE), # TODO -> Need real URI - default=None, # No default, ommitted if not provided - terms=( +class Preparations(utils.vocabs.FlexibleVocabulary): + vocab_id = "PREPARATIONS" + definition = rdflib.Literal("A type of preparations.") + base = utils.rdf.uri("bdr-cv/attribute/preparations/") + scheme = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/dd085299-ae86-4371-ae15-61dfa432f924") + broader = utils.rdf.uri("bdr-cv/attribute/preparations", utils.namespaces.EXAMPLE) # TODO -> Need real URI + default = None # No default, ommitted if not provided + terms = ( ALCOHOL, DEEPFROZEN, DRIED, @@ -103,8 +104,8 @@ OTHER, PINNED, REFRIGERATED, - ), -) + ) + # Register -utils.vocabs.Vocabulary.register(PREPARATIONS) +utils.vocabs.Vocabulary.register(Preparations) diff --git a/abis_mapping/vocabs/relationship_to_related_site.py b/abis_mapping/vocabs/relationship_to_related_site.py index 01ac2fc3..8ff1c7b6 100644 --- a/abis_mapping/vocabs/relationship_to_related_site.py +++ b/abis_mapping/vocabs/relationship_to_related_site.py @@ -19,11 +19,12 @@ description="When a site is a subset of another site." ) + # Vocabulary -RELATIONSHIP_TO_RELATED_SITE = utils.vocabs.RestrictedVocabulary( - vocab_id="RELATIONSHIP_TO_RELATED_SITE", - terms=(SAME_AS, PART_OF), -) +class RelationshipToRelatedSite(utils.vocabs.RestrictedVocabulary): + vocab_id = "RELATIONSHIP_TO_RELATED_SITE" + terms = (SAME_AS, PART_OF) + # Register -utils.vocabs.Vocabulary.register(RELATIONSHIP_TO_RELATED_SITE) +utils.vocabs.Vocabulary.register(RelationshipToRelatedSite) diff --git a/abis_mapping/vocabs/reproductive_condition.py b/abis_mapping/vocabs/reproductive_condition.py index 01e66823..42b84376 100644 --- a/abis_mapping/vocabs/reproductive_condition.py +++ b/abis_mapping/vocabs/reproductive_condition.py @@ -9,16 +9,16 @@ # Vocabulary -REPRODUCTIVE_CONDITION = utils.vocabs.FlexibleVocabulary( - vocab_id="REPRODUCTIVE_CONDITION", - definition=rdflib.Literal("A type of reproductiveCondition."), - base=utils.rdf.uri("bdr-cv/parameter/reproductiveCondition/"), - scheme=rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/5699eca7-9ef0-47a6-bcfb-9306e0e2b85e"), - broader=utils.rdf.uri("bdr-cv/parameter/reproductiveCondition", utils.namespaces.EXAMPLE), # TODO -> Need real URI - default=None, # No default, ommitted if not provided - terms=(), # No baseline vocabulary values - publish=False, -) +class ReproductiveCondition(utils.vocabs.FlexibleVocabulary): + vocab_id = "REPRODUCTIVE_CONDITION" + definition = rdflib.Literal("A type of reproductiveCondition.") + base = utils.rdf.uri("bdr-cv/parameter/reproductiveCondition/") + scheme = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/5699eca7-9ef0-47a6-bcfb-9306e0e2b85e") + broader = utils.rdf.uri("bdr-cv/parameter/reproductiveCondition", utils.namespaces.EXAMPLE) # TODO -> Need real URI + default = None # No default, ommitted if not provided + terms = () # No baseline vocabulary values + publish = False + # Register -utils.vocabs.Vocabulary.register(REPRODUCTIVE_CONDITION) +utils.vocabs.Vocabulary.register(ReproductiveCondition) diff --git a/abis_mapping/vocabs/sampling_effort_unit.py b/abis_mapping/vocabs/sampling_effort_unit.py index 7f81cd2e..23663111 100644 --- a/abis_mapping/vocabs/sampling_effort_unit.py +++ b/abis_mapping/vocabs/sampling_effort_unit.py @@ -7,16 +7,16 @@ from abis_mapping import utils -SAMPLING_EFFORT_UNIT = utils.vocabs.FlexibleVocabulary( - vocab_id="SAMPLING_EFFORT_UNIT", - definition=rdflib.Literal("A type of samplingEffortUnit"), - base=utils.rdf.uri("bdr-cv/attribute/samplingEffortUnit/"), - scheme=rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/dd085299-ae86-4371-ae15-61dfa432f924"), - broader=utils.rdf.uri("bdr-cv/concept/samplingEffortUnit", utils.namespaces.EXAMPLE), # TODO -> Need real URI - default=None, - terms=(), - publish=False, -) +class SamplingEffortUnit(utils.vocabs.FlexibleVocabulary): + vocab_id = "SAMPLING_EFFORT_UNIT" + definition = rdflib.Literal("A type of samplingEffortUnit") + base = utils.rdf.uri("bdr-cv/attribute/samplingEffortUnit/") + scheme = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/dd085299-ae86-4371-ae15-61dfa432f924") + broader = utils.rdf.uri("bdr-cv/concept/samplingEffortUnit", utils.namespaces.EXAMPLE), # TODO -> Need real URI + default = None + terms = () + publish = False + # Register -utils.vocabs.Vocabulary.register(SAMPLING_EFFORT_UNIT) +utils.vocabs.Vocabulary.register(SamplingEffortUnit) diff --git a/abis_mapping/vocabs/sampling_protocol.py b/abis_mapping/vocabs/sampling_protocol.py index 0f7dfb3b..e5ece05e 100644 --- a/abis_mapping/vocabs/sampling_protocol.py +++ b/abis_mapping/vocabs/sampling_protocol.py @@ -20,16 +20,17 @@ description="No protocol provided.", ) + # Vocabulary -SAMPLING_PROTOCOL = utils.vocabs.FlexibleVocabulary( - vocab_id="SAMPLING_PROTOCOL", - definition=rdflib.Literal("A type of samplingProtocol."), - base=utils.rdf.uri("bdr-cv/methods/samplingProtocol/"), - scheme=rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/2fd44aca-168f-4177-b393-0688ce38102c"), - broader=utils.rdf.uri("bdr-cv/methods/samplingProtocol", utils.namespaces.EXAMPLE), # TODO -> Need real URI - default=UNSPECIFIED, - terms=(HUMAN_OBSERVATION, UNSPECIFIED), -) +class SamplingProtocol(utils.vocabs.FlexibleVocabulary): + vocab_id = "SAMPLING_PROTOCOL" + definition = rdflib.Literal("A type of samplingProtocol.") + base = utils.rdf.uri("bdr-cv/methods/samplingProtocol/") + scheme = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/2fd44aca-168f-4177-b393-0688ce38102c") + broader = utils.rdf.uri("bdr-cv/methods/samplingProtocol", utils.namespaces.EXAMPLE) # TODO -> Need real URI + default = UNSPECIFIED + terms = (HUMAN_OBSERVATION, UNSPECIFIED) + # Register -utils.vocabs.Vocabulary.register(SAMPLING_PROTOCOL) +utils.vocabs.Vocabulary.register(SamplingProtocol) diff --git a/abis_mapping/vocabs/sequencing_method.py b/abis_mapping/vocabs/sequencing_method.py index fd095b80..1d637ccc 100644 --- a/abis_mapping/vocabs/sequencing_method.py +++ b/abis_mapping/vocabs/sequencing_method.py @@ -15,17 +15,18 @@ description="Undefined", ) + # Vocabulary -SEQUENCING_METHOD = utils.vocabs.FlexibleVocabulary( - vocab_id="SEQUENCING_METHOD", - definition=rdflib.Literal("A type of sequencingMethod."), - base=utils.rdf.uri("bdr-cv/methods/sequencingMethod/"), - scheme=rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/2fd44aca-168f-4177-b393-0688ce38102c"), - broader=utils.rdf.uri("bdr-cv/methods/sequencingMethod", utils.namespaces.EXAMPLE), # TODO -> Need real URI - default=UNDEFINED, - terms=(), # No baseline vocabulary values - publish=False, -) +class SequencingMethod(utils.vocabs.FlexibleVocabulary): + vocab_id = "SEQUENCING_METHOD" + definition = rdflib.Literal("A type of sequencingMethod.") + base = utils.rdf.uri("bdr-cv/methods/sequencingMethod/") + scheme = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/2fd44aca-168f-4177-b393-0688ce38102c") + broader = utils.rdf.uri("bdr-cv/methods/sequencingMethod", utils.namespaces.EXAMPLE) # TODO -> Need real URI + default = UNDEFINED + terms = () # No baseline vocabulary values + publish = False + # Register -utils.vocabs.Vocabulary.register(SEQUENCING_METHOD) +utils.vocabs.Vocabulary.register(SequencingMethod) diff --git a/abis_mapping/vocabs/sex.py b/abis_mapping/vocabs/sex.py index 5ee8c86b..127bae4f 100644 --- a/abis_mapping/vocabs/sex.py +++ b/abis_mapping/vocabs/sex.py @@ -40,16 +40,17 @@ description="If the sex of an organism can't be determined for some reason.", ) + # Vocabulary -SEX = utils.vocabs.FlexibleVocabulary( - vocab_id="SEX", - definition=rdflib.Literal("A type of sex."), - base=utils.rdf.uri("bdr-cv/parameter/sex/"), - scheme=rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/5699eca7-9ef0-47a6-bcfb-9306e0e2b85e"), - broader=utils.rdf.uri("bdr-cv/parameter/sex", utils.namespaces.EXAMPLE), # TODO -> Need real URI - default=None, # No default, ommitted if not provided - terms=(FEMALE, HERMAPHRODITE, MALE, UNDETERMINED), -) +class Sex(utils.vocabs.FlexibleVocabulary): + vocab_id = "SEX" + definition = rdflib.Literal("A type of sex.") + base = utils.rdf.uri("bdr-cv/parameter/sex/") + scheme = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/5699eca7-9ef0-47a6-bcfb-9306e0e2b85e") + broader = utils.rdf.uri("bdr-cv/parameter/sex", utils.namespaces.EXAMPLE) # TODO -> Need real URI + default = None # No default, ommitted if not provided + terms = (FEMALE, HERMAPHRODITE, MALE, UNDETERMINED) + # Register -utils.vocabs.Vocabulary.register(SEX) +utils.vocabs.Vocabulary.register(Sex) diff --git a/abis_mapping/vocabs/site_type.py b/abis_mapping/vocabs/site_type.py index 7bf89dff..237c4b50 100644 --- a/abis_mapping/vocabs/site_type.py +++ b/abis_mapping/vocabs/site_type.py @@ -39,22 +39,23 @@ description="A line along which biotic and abiotic characteristics are sampled", ) + # Vocabulary -SITE_TYPE = utils.vocabs.FlexibleVocabulary( - vocab_id="SITE_TYPE", - definition=rdflib.Literal("A type of site."), - base=utils.rdf.uri("bdr-cv/concept/siteType/"), - scheme=rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/74aa68d3-28fd-468d-8ff5-7e791d9f7159"), - broader=utils.rdf.uri("bdr-cv/concept/siteType", utils.namespaces.EXAMPLE), # TODO -> Need real URI - default=SITE, - terms=( +class SiteType(utils.vocabs.FlexibleVocabulary): + vocab_id = "SITE_TYPE" + definition = rdflib.Literal("A type of site.") + base = utils.rdf.uri("bdr-cv/concept/siteType/") + scheme = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/74aa68d3-28fd-468d-8ff5-7e791d9f7159") + broader = utils.rdf.uri("bdr-cv/concept/siteType", utils.namespaces.EXAMPLE) # TODO -> Need real URI + default = SITE + terms = ( SITE, PARENT_SITE, PLOT, QUADRAT, TRANSECT, - ), -) + ) + # Register -utils.vocabs.Vocabulary.register(SITE_TYPE) +utils.vocabs.Vocabulary.register(SiteType) diff --git a/abis_mapping/vocabs/survey_type.py b/abis_mapping/vocabs/survey_type.py index 3c022c5b..32120ea9 100644 --- a/abis_mapping/vocabs/survey_type.py +++ b/abis_mapping/vocabs/survey_type.py @@ -20,15 +20,16 @@ ), ) -SURVEY_TYPE = utils.vocabs.FlexibleVocabulary( - vocab_id="SURVEY_TYPE", - definition=rdflib.Literal("A type of surveyType"), - base=utils.rdf.uri("bdr-cv/attribute/surveyType/"), - scheme=rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/dd085299-ae86-4371-ae15-61dfa432f924"), - broader=utils.rdf.uri("bdr-cv/concept/surveyType", utils.namespaces.EXAMPLE), # TODO -> Need real URI - default=None, - terms=(WET_PITFALL_TRAPPING,), -) + +class SurveyType(utils.vocabs.FlexibleVocabulary): + vocab_id = "SURVEY_TYPE" + definition = rdflib.Literal("A type of surveyType") + base = utils.rdf.uri("bdr-cv/attribute/surveyType/") + scheme = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/dd085299-ae86-4371-ae15-61dfa432f924") + broader = utils.rdf.uri("bdr-cv/concept/surveyType", utils.namespaces.EXAMPLE) # TODO -> Need real URI + default = None + terms = (WET_PITFALL_TRAPPING,) + # Register -utils.vocabs.Vocabulary.register(SURVEY_TYPE) +utils.vocabs.Vocabulary.register(SurveyType) diff --git a/abis_mapping/vocabs/target_habitat_scope.py b/abis_mapping/vocabs/target_habitat_scope.py index bdfe8bb0..bbacecb4 100644 --- a/abis_mapping/vocabs/target_habitat_scope.py +++ b/abis_mapping/vocabs/target_habitat_scope.py @@ -1044,14 +1044,15 @@ ), ) -TARGET_HABITAT_SCOPE = utils.vocabs.FlexibleVocabulary( - vocab_id="TARGET_HABITAT_SCOPE", - definition=rdflib.Literal("A type of targetHabitatScope"), - base=utils.rdf.uri("bdr-cv/attribute/targetHabitatScope/"), - scheme=rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/dd085299-ae86-4371-ae15-61dfa432f924"), - broader=rdflib.URIRef("https://linked.data.gov.au/def/nrm/c19a0098-1f3f-4bc2-b84d-fdb6d4e24d6f"), - default=None, - terms=( + +class TargetHabitatScope(utils.vocabs.FlexibleVocabulary): + vocab_id = "TARGET_HABITAT_SCOPE" + definition = rdflib.Literal("A type of targetHabitatScope") + base = utils.rdf.uri("bdr-cv/attribute/targetHabitatScope/") + scheme = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/dd085299-ae86-4371-ae15-61dfa432f924") + broader = rdflib.URIRef("https://linked.data.gov.au/def/nrm/c19a0098-1f3f-4bc2-b84d-fdb6d4e24d6f") + default = None + terms = ( BEACH, BILLABONG_OR_SWAMP, CAVE, @@ -1168,8 +1169,8 @@ URBAN, VINELAND, WOODLAND, - ), -) + ) + # Register -utils.vocabs.Vocabulary.register(TARGET_HABITAT_SCOPE) +utils.vocabs.Vocabulary.register(TargetHabitatScope) diff --git a/abis_mapping/vocabs/target_taxonomic_scope.py b/abis_mapping/vocabs/target_taxonomic_scope.py index d4e37cfe..61fefca5 100644 --- a/abis_mapping/vocabs/target_taxonomic_scope.py +++ b/abis_mapping/vocabs/target_taxonomic_scope.py @@ -79,14 +79,14 @@ ), ) -TARGET_TAXONOMIC_SCOPE = utils.vocabs.FlexibleVocabulary( - vocab_id="TARGET_TAXONOMIC_SCOPE", - definition=rdflib.Literal("A type of targetTaxonomicScope"), - base=utils.rdf.uri("bdr-cv/attribute/targetTaxonomicScope/"), - scheme=rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/dd085299-ae86-4371-ae15-61dfa432f924"), - broader=rdflib.URIRef("https://linked.data.gov.au/def/nrm/7ea12fed-6b87-4c20-9ab4-600b32ce15ec"), - default=None, - terms=( +class TargetTaxonomicScope(utils.vocabs.FlexibleVocabulary): + vocab_id = "TARGET_TAXONOMIC_SCOPE" + definition = rdflib.Literal("A type of targetTaxonomicScope") + base = utils.rdf.uri("bdr-cv/attribute/targetTaxonomicScope/") + scheme = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/dd085299-ae86-4371-ae15-61dfa432f924") + broader = rdflib.URIRef("https://linked.data.gov.au/def/nrm/7ea12fed-6b87-4c20-9ab4-600b32ce15ec") + default = None + terms = ( AMPHIBIAN, BIRD, INVERTEBRATE, @@ -94,8 +94,8 @@ NON_VASCULAR_PLANT, REPTILE, VASCULAR_PLANT, - ), -) + ) + # Register -utils.vocabs.Vocabulary.register(TARGET_TAXONOMIC_SCOPE) +utils.vocabs.Vocabulary.register(TargetTaxonomicScope) diff --git a/abis_mapping/vocabs/taxon_rank.py b/abis_mapping/vocabs/taxon_rank.py index a2ae80cd..22324b44 100644 --- a/abis_mapping/vocabs/taxon_rank.py +++ b/abis_mapping/vocabs/taxon_rank.py @@ -202,15 +202,16 @@ description="Variety", ) + # Vocabulary -TAXON_RANK = utils.vocabs.FlexibleVocabulary( - vocab_id="TAXON_RANK", - definition=rdflib.Literal("A type of taxonRank."), - base=utils.rdf.uri("bdr-cv/attribute/taxonRank/"), - scheme=rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/dd085299-ae86-4371-ae15-61dfa432f924"), - broader=utils.rdf.uri("bdr-cv/attribute/taxonRank", utils.namespaces.EXAMPLE), # TODO -> Need real URI - default=None, # No default, ommitted if not provided - terms=( +class TaxonRank(utils.vocabs.FlexibleVocabulary): + vocab_id = "TAXON_RANK" + definition = rdflib.Literal("A type of taxonRank.") + base = utils.rdf.uri("bdr-cv/attribute/taxonRank/") + scheme = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/dd085299-ae86-4371-ae15-61dfa432f924") + broader = utils.rdf.uri("bdr-cv/attribute/taxonRank", utils.namespaces.EXAMPLE) # TODO -> Need real URI + default = None # No default, ommitted if not provided + terms = ( CLASS, CULTIVAR, CULTIVARGROUP, @@ -246,7 +247,7 @@ UNRANKED, VARIETY, ) -) + # Register -utils.vocabs.Vocabulary.register(TAXON_RANK) +utils.vocabs.Vocabulary.register(TaxonRank) diff --git a/abis_mapping/vocabs/threat_status.py b/abis_mapping/vocabs/threat_status.py index 43984e24..d2e24d21 100644 --- a/abis_mapping/vocabs/threat_status.py +++ b/abis_mapping/vocabs/threat_status.py @@ -743,14 +743,14 @@ ) # Vocabulary -THREAT_STATUS = utils.vocabs.FlexibleVocabulary( - vocab_id="THREAT_STATUS", - definition=rdflib.Literal("A type of threatStatus."), - base=utils.rdf.uri("bdr-cv/parameter/threatStatus/"), - scheme=rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/5699eca7-9ef0-47a6-bcfb-9306e0e2b85e"), - broader=utils.rdf.uri("bdr-cv/parameter/threatStatus", utils.namespaces.EXAMPLE), # TODO -> Need real URI - default=None, # No default, ommitted if not provided - terms=( +class ThreatStatus(utils.vocabs.FlexibleVocabulary): + vocab_id = "THREAT_STATUS" + definition = rdflib.Literal("A type of threatStatus.") + base = utils.rdf.uri("bdr-cv/parameter/threatStatus/") + scheme = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/5699eca7-9ef0-47a6-bcfb-9306e0e2b85e") + broader = utils.rdf.uri("bdr-cv/parameter/threatStatus", utils.namespaces.EXAMPLE) # TODO -> Need real URI + default = None # No default, ommitted if not provided + terms = ( ACT_CRITICALLY_ENDANGERED, ACT_ENDANGERED, ACT_EXTINCT_IN_THE_WILD, @@ -824,9 +824,9 @@ WA_SPECIALLY_PROTECTED_SPECIES, WA_T, WA_VU, - ), - publish=False, -) + ) + publish=False + # Register -utils.vocabs.Vocabulary.register(THREAT_STATUS) +utils.vocabs.Vocabulary.register(ThreatStatus) diff --git a/tests/base/test_mapper.py b/tests/base/test_mapper.py index 93d534eb..96be6e72 100644 --- a/tests/base/test_mapper.py +++ b/tests/base/test_mapper.py @@ -2,9 +2,10 @@ # Standard -import json import csv import io +import json +import unittest.mock # Third-party import frictionless @@ -16,7 +17,9 @@ from abis_mapping import utils # Typing -from typing import Any +from typing import Any, Optional, Iterator + +from abis_mapping.base import types as base_types def data_to_csv(data: list[dict[str, Any]]) -> bytes: @@ -321,3 +324,75 @@ def test_extra_fields_middle(mocker: pytest_mock.MockerFixture) -> None: assert not report.valid error_codes = [code for codes in report.flatten(['type']) for code in codes] assert error_codes == ["incorrect-label"] + + +def test_fields(mocker: pytest_mock.MockerFixture) -> None: + """Tests the fields method. + + Args: + mocker (pytest_mock.MockerFixture): + """ + # Patch schema method + descriptor = { + "fields": [ + { + "name": "fieldA", + "title": "Title A", + "description": "Description A", + "example": "Example A", + "type": "typeA", + "format": "formatA", + "constraints": { + "required": False, + }, + "vocabularies": [ + "vocabularyA" + ] + }, + { + "name": "fieldB", + "title": "Title B", + "description": "Description B", + "example": "Example B", + "type": "typeB", + "format": "formatB", + "constraints": { + "required": False, + }, + "vocabularies": [ + "vocabularyB" + ] + } + ] + } + mocked_schema = mocker.patch.object(base.mapper.ABISMapper, "schema", return_value=descriptor) + + # Create mapper + class TestMapper(base.mapper.ABISMapper): + + def apply_mapping( + self, + data: base_types.ReadableType, + dataset_iri: Optional[rdflib.URIRef] = None, + base_iri: Optional[rdflib.Namespace] = None, + **kwargs: Any + ) -> Iterator[rdflib.Graph]: + pass + + def apply_validation( + self, + data: base_types.ReadableType, + **kwargs: Any + ) -> frictionless.Report: + pass + + mapper = TestMapper() + + # Assert + assert list(mapper.fields.keys()) == ["fieldA", "fieldB"] + mocked_schema.assert_called_once() + + + + + diff --git a/tests/types/test_schema.py b/tests/types/test_schema.py new file mode 100644 index 00000000..cfbe529a --- /dev/null +++ b/tests/types/test_schema.py @@ -0,0 +1,110 @@ +"""Provides unit tests for the schema module.""" + +# Third-party +import pydantic +import pytest + +# Local +from abis_mapping import types +from abis_mapping import utils + +# Typing +from typing import Any + + +class TestField: + @pytest.fixture + def field_d(self) -> dict[str, Any]: + """Returns a field definition. + + Returns: + dict[str, Any]: The field definition. + """ + + return { + "name": "fieldA", + "title": "titleA", + "description": "descriptionA", + "example": "exampleA", + "type": "typeA", + "format": None, + "constraints": { + "required": False, + }, + "vocabularies": [ + "SEX" + ], + } + + def test_check_vocabularies(self, field_d: dict[str, Any]) -> None: + """Tests the check vocabularies method with valid vocab. + + Args: + field_d (dict[str, Any]): The field definition. + """ + # Perform validation + field = types.schema.Field.model_validate(field_d) + + # Assert + assert field.vocabularies == ["SEX"] + + def test_check_vocabularies_invalid_vocab(self, field_d: dict[str, Any]) -> None: + """Tests the custom vocabularies validator method with invalid input. + + Args: + field_d (dict[str, Any]): Field definition fixture + """ + # Modify the field fixture to have invalid vocab + field_d["vocabularies"] = ["AFAKEVOCAB123"] + + with pytest.raises(pydantic.ValidationError): + # Create field + types.schema.Field.model_validate(field_d) + + def test_get_vocab(self, field_d: dict[str, Any]) -> None: + """Tests the get vocab method. + + Args: + field_d dict[str, Any]: The field definition fixture. + """ + # Create field + field = types.schema.Field.model_validate(field_d) + + # Invoke + vocab = field.get_vocab() + + # Assert + assert vocab.vocab_id == "SEX" + + def test_get_vocab_invalid_id(self, field_d: dict[str, Any]) -> None: + """Test the get_vocab method with invalid id. + + Args: + field_d (dict[str, Any]): The field definition fixture. + """ + # Create field + field = types.schema.Field.model_validate(field_d) + + with pytest.raises(ValueError): + # Invoke + field.get_vocab("AFAKEVOCAB123") + + def test_get_vocab_no_vocabs(self, field_d: dict[str, Any]) -> None: + """Test the get_vocab method with no vocabs on field. + + Args: + field_d (dict[str, Any]): The field definition fixture. + """ + # Modify field d + field_d["vocabularies"] = [] + + # Create field + field = types.schema.Field.model_validate(field_d) + + # Should raise index error + with pytest.raises(IndexError): + # Invoke + field.get_vocab() + + + From 8ffe871cbc95d725408621b08c84567e684c7f41 Mon Sep 17 00:00:00 2001 From: joecrowleygaia Date: Wed, 3 Jul 2024 11:41:03 +0800 Subject: [PATCH 2/3] Modified mapping methods to use new registry system for controlled vocabs, drawing upon defined vocabularies attached to fields. Fixed issues in doc generation for linting and typechecking. --- abis_mapping/base/mapper.py | 2 - .../incidental_occurrence_data/mapping.py | 235 ++++++++-------- .../incidental_occurrence_data/schema.json | 84 ++++-- .../incidental_occurrence_data_v2/mapping.py | 240 ++++++++-------- .../templates/survey_metadata/mapping.py | 45 ++- .../survey_occurrence_data/mapping.py | 258 +++++++++--------- .../survey_occurrence_data/schema.json | 5 +- .../templates/survey_site_data/mapping.py | 41 +-- .../survey_site_data/validators/shaping.py | 2 +- abis_mapping/types/schema.py | 5 +- abis_mapping/types/spatial.py | 17 +- abis_mapping/utils/vocabs.py | 18 +- abis_mapping/vocabs/life_stage.py | 14 +- abis_mapping/vocabs/sampling_effort_unit.py | 2 +- abis_mapping/vocabs/target_taxonomic_scope.py | 1 + abis_mapping/vocabs/threat_status.py | 3 +- docs/instructions.py | 6 +- docs/tables/base.py | 28 +- docs/tables/fields.py | 12 +- docs/tables/threat_status.py | 22 +- docs/tables/vocabs.py | 16 +- scripts/lint.sh | 2 +- scripts/typecheck.sh | 2 +- tests/base/test_mapper.py | 16 +- tests/conftest.py | 32 +++ tests/docs/tables/conftest.py | 38 --- tests/docs/tables/test_fields.py | 18 +- tests/types/test_schema.py | 4 - tests/types/test_spatial.py | 7 +- tests/utils/test_vocabs.py | 62 +++-- tests/vocabs/test_organism_quantity_type.py | 6 +- 31 files changed, 650 insertions(+), 593 deletions(-) delete mode 100644 tests/docs/tables/conftest.py diff --git a/abis_mapping/base/mapper.py b/abis_mapping/base/mapper.py index 6c495b5c..6bbde7b9 100644 --- a/abis_mapping/base/mapper.py +++ b/abis_mapping/base/mapper.py @@ -313,7 +313,6 @@ def schema( @final @classmethod @property - @functools.lru_cache def fields(cls) -> dict[str, types.schema.Field]: """Indexed dictionary of all fields' metadata. @@ -327,7 +326,6 @@ def fields(cls) -> dict[str, types.schema.Field]: # Return dictionary of fields return {f.name: f for f in schema.fields} - @final @classmethod @functools.lru_cache diff --git a/abis_mapping/templates/incidental_occurrence_data/mapping.py b/abis_mapping/templates/incidental_occurrence_data/mapping.py index 6e51fa09..b9450d26 100644 --- a/abis_mapping/templates/incidental_occurrence_data/mapping.py +++ b/abis_mapping/templates/incidental_occurrence_data/mapping.py @@ -860,12 +860,11 @@ def add_observation_scientific_name( # that this row has a specimen, otherwise it is Field Sample foi = sample_specimen if has_specimen(row) else sample_field - # Retrieve Vocab or Create on the Fly - vocab = vocabs.identification_method.IDENTIFICATION_METHOD.get( - graph=graph, - value=row["identificationMethod"], - source=dataset, - ) + # Retrieve registered vocab + vocab = self.fields["identificationMethod"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["identificationMethod"]) # Add to Graph graph.add((uri, a, utils.namespaces.TERN.Observation)) @@ -879,7 +878,7 @@ def add_observation_scientific_name( graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) graph.add((temporal_entity, a, rdflib.TIME.Instant)) graph.add((temporal_entity, date_identified.rdf_in_xsd, date_identified.to_rdf_literal())) - graph.add((uri, rdflib.SOSA.usedProcedure, vocab)) + graph.add((uri, rdflib.SOSA.usedProcedure, term)) # Check for identifiedBy if row["identifiedBy"]: @@ -949,12 +948,11 @@ def add_observation_verbatim_id( # that this row has a specimen, otherwise it is Field Sample foi = sample_specimen if has_specimen(row) else sample_field - # Retrieve Vocab or Create on the Fly - vocab = vocabs.identification_method.IDENTIFICATION_METHOD.get( - graph=graph, - value=row["identificationMethod"], - source=dataset, - ) + # Retrieve vocab + vocab = self.fields["identificationMethod"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["identificationMethod"]) # Add to Graph graph.add((uri, a, utils.namespaces.TERN.Observation)) @@ -968,7 +966,7 @@ def add_observation_verbatim_id( graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) graph.add((temporal_entity, a, rdflib.TIME.Instant)) graph.add((temporal_entity, date_identified.rdf_in_xsd, date_identified.to_rdf_literal())) - graph.add((uri, rdflib.SOSA.usedProcedure, vocab)) + graph.add((uri, rdflib.SOSA.usedProcedure, term)) # Check for identifiedBy if row["identifiedBy"]: @@ -1025,12 +1023,11 @@ def add_sampling_field( datum=row["geodeticDatum"] ) - # Retrieve Vocab or Create on the Fly - vocab = vocabs.sampling_protocol.SAMPLING_PROTOCOL.get( - graph=graph, - value=row["samplingProtocol"], - source=dataset, - ) + # Retrieve vocab + vocab = self.fields["samplingProtocol"].get_vocab() + + # Retrieve term or create on the fly + term = vocab(graph=graph, source=dataset).get(row["samplingProtocol"]) # Add to Graph graph.add((uri, a, utils.namespaces.TERN.Sampling)) @@ -1059,7 +1056,7 @@ def add_sampling_field( graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) graph.add((temporal_entity, a, rdflib.TIME.Instant)) graph.add((temporal_entity, event_date.rdf_in_xsd, event_date.to_rdf_literal())) - graph.add((uri, rdflib.SOSA.usedProcedure, vocab)) + graph.add((uri, rdflib.SOSA.usedProcedure, term)) # Check for recordID if row["recordID"]: @@ -1154,18 +1151,17 @@ def add_id_qualifier_value( if not row["identificationQualifier"]: return + # Retrieve vocab for field + vocab = self.fields["identificationQualifier"].get_vocab() + # Retrieve Vocab or Create on the Fly - vocab = vocabs.identification_qualifier.IDENTIFICATION_QUALIFIER.get( - graph=graph, - value=row["identificationQualifier"], - source=dataset, - ) + term = vocab(graph=graph, source=dataset).get(row["identificationQualifier"]) # Identification Qualifier Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal("identificationQualifier"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_id_remarks_attribute( self, @@ -1394,12 +1390,12 @@ def add_sample_field( with this node graph (rdflib.Graph): Graph to add to """ - # Retrieve Vocab or Create on the Fly - vocab = vocabs.kingdom.KINGDOM_OCCURRENCE.get( - graph=graph, - value=row["kingdom"], - source=dataset, - ) + + # Retrieve vocab for field + vocab = self.fields["kingdom"].get_vocab("KINGDOM_OCCURRENCE") + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["kingdom"]) # Add to Graph graph.add((uri, a, utils.namespaces.TERN.FeatureOfInterest)) @@ -1408,7 +1404,7 @@ def add_sample_field( graph.add((uri, rdflib.RDFS.comment, rdflib.Literal("field-sample"))) graph.add((uri, rdflib.SOSA.isResultOf, sampling_field)) graph.add((uri, rdflib.SOSA.isSampleOf, feature_of_interest)) - graph.add((uri, utils.namespaces.TERN.featureType, vocab)) + graph.add((uri, utils.namespaces.TERN.featureType, term)) # Check for institutionCode if row["institutionCode"]: @@ -1552,12 +1548,11 @@ def add_sample_specimen( if not has_specimen(row): return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.kingdom.KINGDOM_SPECIMEN.get( - graph=graph, - value=row["kingdom"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["kingdom"].get_vocab("KINGDOM_SPECIMEN") + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["kingdom"]) # Add to Graph graph.add((uri, a, utils.namespaces.TERN.FeatureOfInterest)) @@ -1566,7 +1561,7 @@ def add_sample_specimen( graph.add((uri, rdflib.RDFS.comment, rdflib.Literal("specimen-sample"))) graph.add((uri, rdflib.SOSA.isResultOf, sampling_specimen)) graph.add((uri, rdflib.SOSA.isSampleOf, sample_field)) - graph.add((uri, utils.namespaces.TERN.featureType, vocab)) + graph.add((uri, utils.namespaces.TERN.featureType, term)) # Check for catalogNumber if row["catalogNumber"]: @@ -1718,18 +1713,17 @@ def add_kingdom_value( dataset (rdflib.URIRef): Dataset this belongs to graph (rdflib.Graph): Graph to add to """ - # Retrieve Vocab or Create on the Fly - vocab = vocabs.kingdom.KINGDOM.get( - graph=graph, - value=row["kingdom"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["kingdom"].get_vocab("KINGDOM") + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["kingdom"]) # Kingdom Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal(f"kingdom = {row['kingdom']}"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_taxon_rank_attribute( self, @@ -1779,18 +1773,17 @@ def add_taxon_rank_value( if not row["taxonRank"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.taxon_rank.TAXON_RANK.get( - graph=graph, - value=row["taxonRank"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["taxonRank"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["taxonRank"]) # Taxon Rank Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal(f"taxon rank = {row['taxonRank']}"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_individual_count_observation( self, @@ -2060,18 +2053,17 @@ def add_basis_value( if not row["basisOfRecord"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.basis_of_record.BASIS_OF_RECORD.get( - graph=graph, - value=row["basisOfRecord"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["basisOfRecord"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["basisOfRecord"]) # Basis of Record Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal("basisOfRecord"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_owner_institution_provider( self, @@ -2189,18 +2181,17 @@ def add_occurrence_status_value( if not row["occurrenceStatus"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.occurrence_status.OCCURRENCE_STATUS.get( - graph=graph, - value=row["occurrenceStatus"], - source=dataset, - ) + # Retrieve vocab from field + vocab = self.fields["occurrenceStatus"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["occurrenceStatus"]) # Occurrence Status Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal(f"occurrenceStatus = {row['occurrenceStatus']}"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_preparations_attribute( self, @@ -2250,18 +2241,17 @@ def add_preparations_value( if not row["preparations"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.preparations.PREPARATIONS.get( - graph=graph, - value=row["preparations"], - source=dataset, - ) + # Retrieve vocab from field + vocab = self.fields["preparations"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["preparations"]) # Preparations Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal("preparations"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_establishment_means_observation( self, @@ -2343,18 +2333,17 @@ def add_establishment_means_value( if not row["establishmentMeans"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.establishment_means.ESTABLISHMENT_MEANS.get( - graph=graph, - value=row["establishmentMeans"], - source=dataset, - ) + # Retrieve vocab from field + vocab = self.fields["establishmentMeans"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["establishmentMeans"]) # Establishment Means Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal("establishmentMeans-value"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_life_stage_observation( self, @@ -2444,18 +2433,17 @@ def add_life_stage_value( if not row["lifeStage"]: return + # Retrieve vocab from field + vocab = self.fields["lifeStage"].get_vocab() + # Retrieve Vocab or Create on the Fly - vocab = vocabs.life_stage.LIFE_STAGE.get( - graph=graph, - value=row["lifeStage"], - source=dataset, - ) + term = vocab(graph=graph, source=dataset).get(row["lifeStage"]) # Life Stage Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal("lifeStage-value"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_sex_observation( self, @@ -2544,18 +2532,17 @@ def add_sex_value( if not row["sex"]: return + # Retrieve vocab for field + vocab = self.fields["sex"].get_vocab() + # Retrieve Vocab or Create on the Fly - vocab = vocabs.sex.SEX.get( - graph=graph, - value=row["sex"], - source=dataset, - ) + term = vocab(graph=graph, source=dataset).get(row["sex"]) # Sex Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal("sex-value"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_reproductive_condition_observation( self, @@ -2645,18 +2632,17 @@ def add_reproductive_condition_value( if not row["reproductiveCondition"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.reproductive_condition.REPRODUCTIVE_CONDITION.get( - graph=graph, - value=row["reproductiveCondition"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["reproductiveCondition"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["reproductiveCondition"]) # Reproductive Condition Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal("reproductiveCondition-value"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_accepted_name_usage_observation( self, @@ -2771,12 +2757,11 @@ def add_sampling_sequencing( datum=row["geodeticDatum"], ) + # Retrieve vocab for field + vocab = self.fields["sequencingMethod"].get_vocab() + # Retrieve Vocab or Create on the Fly - vocab = vocabs.sequencing_method.SEQUENCING_METHOD.get( - graph=graph, - value=row["sequencingMethod"], - source=dataset, - ) + term = vocab(graph=graph, source=dataset).get(row["sequencingMethod"]) # Add to Graph graph.add((uri, a, utils.namespaces.TERN.Sampling)) @@ -2788,7 +2773,7 @@ def add_sampling_sequencing( graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) graph.add((temporal_entity, a, rdflib.TIME.Instant)) graph.add((temporal_entity, event_date.rdf_in_xsd, event_date.to_rdf_literal())) - graph.add((uri, rdflib.SOSA.usedProcedure, vocab)) + graph.add((uri, rdflib.SOSA.usedProcedure, term)) # Add geometry geometry_node = rdflib.BNode() @@ -2934,13 +2919,11 @@ def add_threat_status_observation( or row["preparedDate"] or row["eventDate"] ) + # Retrieve vocab for field + vocab = self.fields["threatStatusCheckProtocol"].get_vocab() - # Retrieve Vocab or Create on the Fly - vocab = vocabs.check_protocol.CHECK_PROTOCOL.get( - graph=graph, - value=row["threatStatusCheckProtocol"], - source=dataset, - ) + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["threatStatusCheckProtocol"]) # Threat Status Observation graph.add((uri, a, utils.namespaces.TERN.Observation)) @@ -2951,7 +2934,7 @@ def add_threat_status_observation( graph.add((uri, rdflib.SOSA.hasSimpleResult, rdflib.Literal(row["threatStatus"]))) graph.add((uri, rdflib.SOSA.observedProperty, CONCEPT_CONSERVATION_STATUS)) graph.add((uri, rdflib.PROV.wasInfluencedBy, jurisdiction_attribute)) - graph.add((uri, rdflib.SOSA.usedProcedure, vocab)) + graph.add((uri, rdflib.SOSA.usedProcedure, term)) temporal_entity = rdflib.BNode() graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) graph.add((temporal_entity, a, rdflib.TIME.Instant)) @@ -3003,18 +2986,17 @@ def add_threat_status_value( # Combine conservationJurisdiction and threatStatus value = f"{row['conservationJurisdiction']}/{row['threatStatus']}" - # Retrieve Vocab or Create on the Fly - vocab = vocabs.threat_status.THREAT_STATUS.get( - graph=graph, - value=value, - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["threatStatus"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(value) # Threat Status Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal(f"Conservation status = {row['threatStatus']}"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_conservation_jurisdiction_attribute( self, @@ -3062,8 +3044,11 @@ def add_conservation_jurisdiction_value( if not row["conservationJurisdiction"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.conservation_jurisdiction.CONSERVATION_JURISDICTION.get(row["conservationJurisdiction"]) + # Retrieve vocab for field + vocab = self.fields["conservationJurisdiction"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph).get(row["conservationJurisdiction"]) # Construct Label label = f"Conservation Jurisdiction = {row['conservationJurisdiction']}" @@ -3072,7 +3057,7 @@ def add_conservation_jurisdiction_value( graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal(label))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) # Helper Functions diff --git a/abis_mapping/templates/incidental_occurrence_data/schema.json b/abis_mapping/templates/incidental_occurrence_data/schema.json index a93fafe2..54c13913 100644 --- a/abis_mapping/templates/incidental_occurrence_data/schema.json +++ b/abis_mapping/templates/incidental_occurrence_data/schema.json @@ -114,7 +114,10 @@ "format": "default", "constraints": { "required": false - } + }, + "vocabularies": [ + "SAMPLING_PROTOCOL" + ] }, { "name": "basisOfRecord", @@ -125,7 +128,10 @@ "format": "default", "constraints": { "required": false - } + }, + "vocabularies": [ + "BASIS_OF_RECORD" + ] }, { "name": "recordedBy", @@ -158,7 +164,10 @@ "format": "default", "constraints": { "required": false - } + }, + "vocabularies": [ + "OCCURRENCE_STATUS" + ] }, { "name": "habitat", @@ -180,7 +189,10 @@ "format": "default", "constraints": { "required": false - } + }, + "vocabularies": [ + "ESTABLISHMENT_MEANS" + ] }, { "name": "organismRemarks", @@ -213,7 +225,10 @@ "format": "default", "constraints": { "required": false - } + }, + "vocabularies": [ + "LIFE_STAGE" + ] }, { "name": "sex", @@ -224,7 +239,10 @@ "format": "default", "constraints": { "required": false - } + }, + "vocabularies": [ + "SEX" + ] }, { "name": "reproductiveCondition", @@ -235,7 +253,10 @@ "format": "default", "constraints": { "required": false - } + }, + "vocabularies": [ + "REPRODUCTIVE_CONDITION" + ] }, { "name": "occurrenceID", @@ -312,7 +333,10 @@ "format": "default", "constraints": { "required": false - } + }, + "vocabularies": [ + "PREPARATIONS" + ] }, { "name": "preparedDate", @@ -345,7 +369,10 @@ "format": "default", "constraints": { "required": false - } + }, + "vocabularies": [ + "SEQUENCING_METHOD" + ] }, { "name": "verbatimIdentification", @@ -389,7 +416,10 @@ "format": "default", "constraints": { "required": false - } + }, + "vocabularies": [ + "IDENTIFICATION_METHOD" + ] }, { "name": "scientificName", @@ -411,7 +441,10 @@ "format": "default", "constraints": { "required": false - } + }, + "vocabularies": [ + "IDENTIFICATION_QUALIFIER" + ] }, { "name": "identificationRemarks", @@ -444,7 +477,12 @@ "format": "default", "constraints": { "required": true - } + }, + "vocabularies": [ + "KINGDOM", + "KINGDOM_OCCURRENCE", + "KINGDOM_SPECIMEN" + ] }, { "name": "taxonRank", @@ -455,7 +493,10 @@ "format": "default", "constraints": { "required": false - } + }, + "vocabularies": [ + "TAXON_RANK" + ] }, { "name": "threatStatus", @@ -466,7 +507,10 @@ "format": "default", "constraints": { "required": false - } + }, + "vocabularies": [ + "THREAT_STATUS" + ] }, { "name": "conservationJurisdiction", @@ -506,7 +550,10 @@ "WESTERN AUSTRALIA", "Western Australia" ] - } + }, + "vocabularies": [ + "CONSERVATION_JURISDICTION" + ] }, { "name": "threatStatusCheckProtocol", @@ -517,7 +564,10 @@ "format": "default", "constraints": { "required": false - } + }, + "vocabularies": [ + "CHECK_PROTOCOL" + ] }, { "name": "threatStatusDateDetermined", @@ -542,4 +592,4 @@ } } ] -} \ No newline at end of file +} diff --git a/abis_mapping/templates/incidental_occurrence_data_v2/mapping.py b/abis_mapping/templates/incidental_occurrence_data_v2/mapping.py index b3d05242..ed12dff8 100644 --- a/abis_mapping/templates/incidental_occurrence_data_v2/mapping.py +++ b/abis_mapping/templates/incidental_occurrence_data_v2/mapping.py @@ -949,12 +949,11 @@ def add_observation_scientific_name( # that this row has a specimen, otherwise it is Field Sample foi = sample_specimen if has_specimen(row) else sample_field - # Retrieve Vocab or Create on the Fly - vocab = vocabs.identification_method.IDENTIFICATION_METHOD.get( - graph=graph, - value=row["identificationMethod"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["identificationMethod"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["identificationMethod"]) # Add to Graph graph.add((uri, a, utils.namespaces.TERN.Observation)) @@ -968,7 +967,7 @@ def add_observation_scientific_name( graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) graph.add((temporal_entity, a, rdflib.TIME.Instant)) graph.add((temporal_entity, date_identified.rdf_in_xsd, date_identified.to_rdf_literal())) - graph.add((uri, rdflib.SOSA.usedProcedure, vocab)) + graph.add((uri, rdflib.SOSA.usedProcedure, term)) # Check for identifiedBy if row["identifiedBy"]: @@ -1032,12 +1031,11 @@ def add_observation_verbatim_id( # that this row has a specimen, otherwise it is Field Sample foi = sample_specimen if has_specimen(row) else sample_field - # Retrieve Vocab or Create on the Fly - vocab = vocabs.identification_method.IDENTIFICATION_METHOD.get( - graph=graph, - value=row["identificationMethod"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["identificationMethod"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["identificationMethod"]) # Add to Graph graph.add((uri, a, utils.namespaces.TERN.Observation)) @@ -1051,7 +1049,7 @@ def add_observation_verbatim_id( graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) graph.add((temporal_entity, a, rdflib.TIME.Instant)) graph.add((temporal_entity, date_identified.rdf_in_xsd, date_identified.to_rdf_literal())) - graph.add((uri, rdflib.SOSA.usedProcedure, vocab)) + graph.add((uri, rdflib.SOSA.usedProcedure, term)) # Check for identifiedBy if row["identifiedBy"]: @@ -1178,12 +1176,11 @@ def add_sampling_field( datum=row["geodeticDatum"] ) - # Retrieve Vocab or Create on the Fly - vocab = vocabs.sampling_protocol.SAMPLING_PROTOCOL.get( - graph=graph, - value=row["samplingProtocol"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["samplingProtocol"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["samplingProtocol"]) # Add to Graph graph.add((uri, a, utils.namespaces.TERN.Sampling)) @@ -1212,7 +1209,7 @@ def add_sampling_field( graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) graph.add((temporal_entity, a, rdflib.TIME.Instant)) graph.add((temporal_entity, event_date.rdf_in_xsd, event_date.to_rdf_literal())) - graph.add((uri, rdflib.SOSA.usedProcedure, vocab)) + graph.add((uri, rdflib.SOSA.usedProcedure, term)) # Add Identifier graph.add( @@ -1343,18 +1340,17 @@ def add_id_qualifier_value( if not row["identificationQualifier"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.identification_qualifier.IDENTIFICATION_QUALIFIER.get( - graph=graph, - value=row["identificationQualifier"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["identificationQualifier"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["identificationQualifier"]) # Identification Qualifier Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal("identificationQualifier"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_id_remarks_attribute( self, @@ -1673,12 +1669,11 @@ def add_sample_field( with record number literal. graph (rdflib.Graph): Graph to add to """ - # Retrieve Vocab or Create on the Fly - vocab = vocabs.kingdom.KINGDOM_OCCURRENCE.get( - graph=graph, - value=row["kingdom"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["kingdom"].get_vocab("KINGDOM_OCCURRENCE") + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["kingdom"]) # Add to Graph graph.add((uri, a, utils.namespaces.TERN.FeatureOfInterest)) @@ -1687,7 +1682,7 @@ def add_sample_field( graph.add((uri, rdflib.RDFS.comment, rdflib.Literal("field-sample"))) graph.add((uri, rdflib.SOSA.isResultOf, sampling_field)) graph.add((uri, rdflib.SOSA.isSampleOf, feature_of_interest)) - graph.add((uri, utils.namespaces.TERN.featureType, vocab)) + graph.add((uri, utils.namespaces.TERN.featureType, term)) # Check for recordNumber if row["recordNumber"]: @@ -1780,12 +1775,11 @@ def add_sample_specimen( if not has_specimen(row): return + # Retrieve vocab for field + vocab = self.fields["kingdom"].get_vocab("KINGDOM_SPECIMEN") + # Retrieve Vocab or Create on the Fly - vocab = vocabs.kingdom.KINGDOM_SPECIMEN.get( - graph=graph, - value=row["kingdom"], - source=dataset, - ) + term = vocab(graph=graph, source=dataset).get(row["kingdom"]) # Add to Graph graph.add((uri, a, utils.namespaces.TERN.FeatureOfInterest)) @@ -1794,7 +1788,7 @@ def add_sample_specimen( graph.add((uri, rdflib.RDFS.comment, rdflib.Literal("specimen-sample"))) graph.add((uri, rdflib.SOSA.isResultOf, sampling_specimen)) graph.add((uri, rdflib.SOSA.isSampleOf, sample_field)) - graph.add((uri, utils.namespaces.TERN.featureType, vocab)) + graph.add((uri, utils.namespaces.TERN.featureType, term)) # Check for catalogNumber if row["catalogNumber"]: @@ -1908,18 +1902,17 @@ def add_kingdom_value( dataset (rdflib.URIRef): Dataset this belongs to graph (rdflib.Graph): Graph to add to """ - # Retrieve Vocab or Create on the Fly - vocab = vocabs.kingdom.KINGDOM.get( - graph=graph, - value=row["kingdom"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["kingdom"].get_vocab("KINGDOM") + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["kingdom"]) # Kingdom Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal(f"kingdom = {row['kingdom']}"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_taxon_rank_attribute( self, @@ -1968,19 +1961,17 @@ def add_taxon_rank_value( # Check Existence if not row["taxonRank"]: return + # Retrieve vocab for field + vocab = self.fields["taxonRank"].get_vocab() - # Retrieve Vocab or Create on the Fly - vocab = vocabs.taxon_rank.TAXON_RANK.get( - graph=graph, - value=row["taxonRank"], - source=dataset, - ) + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["taxonRank"]) # Taxon Rank Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal(f"taxon rank = {row['taxonRank']}"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_individual_count_observation( self, @@ -2234,18 +2225,17 @@ def add_basis_value( if not row["basisOfRecord"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.basis_of_record.BASIS_OF_RECORD.get( - graph=graph, - value=row["basisOfRecord"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["basisOfRecord"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["basisOfRecord"]) # Basis of Record Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal("basisOfRecord"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_owner_institution_provider( self, @@ -2355,18 +2345,17 @@ def add_occurrence_status_value( if not row["occurrenceStatus"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.occurrence_status.OCCURRENCE_STATUS.get( - graph=graph, - value=row["occurrenceStatus"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["occurrenceStatus"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["occurrenceStatus"]) # Occurrence Status Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal(f"occurrenceStatus = {row['occurrenceStatus']}"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_preparations_attribute( self, @@ -2416,18 +2405,17 @@ def add_preparations_value( if not row["preparations"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.preparations.PREPARATIONS.get( - graph=graph, - value=row["preparations"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["preparations"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["preparations"]) # Preparations Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal("preparations"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_establishment_means_observation( self, @@ -2501,18 +2489,17 @@ def add_establishment_means_value( if not row["establishmentMeans"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.establishment_means.ESTABLISHMENT_MEANS.get( - graph=graph, - value=row["establishmentMeans"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["establishmentMeans"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["establishmentMeans"]) # Establishment Means Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal("establishmentMeans-value"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_life_stage_observation( self, @@ -2594,18 +2581,17 @@ def add_life_stage_value( if not row["lifeStage"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.life_stage.LIFE_STAGE.get( - graph=graph, - value=row["lifeStage"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["lifeStage"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["lifeStage"]) # Life Stage Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal("lifeStage-value"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_sex_observation( self, @@ -2686,18 +2672,17 @@ def add_sex_value( if not row["sex"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.sex.SEX.get( - graph=graph, - value=row["sex"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["sex"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["sex"]) # Sex Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal("sex-value"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_reproductive_condition_observation( self, @@ -2779,18 +2764,17 @@ def add_reproductive_condition_value( if not row["reproductiveCondition"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.reproductive_condition.REPRODUCTIVE_CONDITION.get( - graph=graph, - value=row["reproductiveCondition"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["reproductiveCondition"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["reproductiveCondition"]) # Reproductive Condition Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal("reproductiveCondition-value"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_accepted_name_usage_observation( self, @@ -2901,12 +2885,11 @@ def add_sampling_sequencing( datum=row["geodeticDatum"], ) - # Retrieve Vocab or Create on the Fly - vocab = vocabs.sequencing_method.SEQUENCING_METHOD.get( - graph=graph, - value=row["sequencingMethod"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["sequencingMethod"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["sequencingMethod"]) # Add to Graph graph.add((uri, a, utils.namespaces.TERN.Sampling)) @@ -2918,7 +2901,7 @@ def add_sampling_sequencing( graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) graph.add((temporal_entity, a, rdflib.TIME.Instant)) graph.add((temporal_entity, event_date.rdf_in_xsd, event_date.to_rdf_literal())) - graph.add((uri, rdflib.SOSA.usedProcedure, vocab)) + graph.add((uri, rdflib.SOSA.usedProcedure, term)) # Add geometry geometry_node = rdflib.BNode() @@ -3057,12 +3040,11 @@ def add_threat_status_observation( or row["eventDate"] ) - # Retrieve Vocab or Create on the Fly - vocab = vocabs.check_protocol.CHECK_PROTOCOL.get( - graph=graph, - value=row["threatStatusCheckProtocol"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["threatStatusCheckProtocol"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["threatStatusCheckProtocol"]) # Threat Status Observation graph.add((uri, a, utils.namespaces.TERN.Observation)) @@ -3073,7 +3055,7 @@ def add_threat_status_observation( graph.add((uri, rdflib.SOSA.hasSimpleResult, rdflib.Literal(row["threatStatus"]))) graph.add((uri, rdflib.SOSA.observedProperty, CONCEPT_CONSERVATION_STATUS)) graph.add((uri, rdflib.PROV.wasInfluencedBy, jurisdiction_attribute)) - graph.add((uri, rdflib.SOSA.usedProcedure, vocab)) + graph.add((uri, rdflib.SOSA.usedProcedure, term)) temporal_entity = rdflib.BNode() graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) graph.add((temporal_entity, a, rdflib.TIME.Instant)) @@ -3119,18 +3101,17 @@ def add_threat_status_value( # Combine conservationJurisdiction and threatStatus value = f"{row['conservationJurisdiction']}/{row['threatStatus']}" - # Retrieve Vocab or Create on the Fly - vocab = vocabs.threat_status.THREAT_STATUS.get( - graph=graph, - value=value, - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["threatStatus"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(value) # Threat Status Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal(f"Conservation status = {row['threatStatus']}"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_conservation_jurisdiction_attribute( self, @@ -3178,8 +3159,11 @@ def add_conservation_jurisdiction_value( if not row["conservationJurisdiction"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.conservation_jurisdiction.CONSERVATION_JURISDICTION.get(row["conservationJurisdiction"]) + # Retrieve vocab for field + vocab = self.fields["conservationJurisdiction"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph).get(row["conservationJurisdiction"]) # Construct Label label = f"Conservation Jurisdiction = {row['conservationJurisdiction']}" @@ -3188,7 +3172,7 @@ def add_conservation_jurisdiction_value( graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal(label))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) # Helper Functions diff --git a/abis_mapping/templates/survey_metadata/mapping.py b/abis_mapping/templates/survey_metadata/mapping.py index fc273c1e..7dd35cb0 100644 --- a/abis_mapping/templates/survey_metadata/mapping.py +++ b/abis_mapping/templates/survey_metadata/mapping.py @@ -13,7 +13,6 @@ from abis_mapping import plugins from abis_mapping import types from abis_mapping import utils -from abis_mapping import vocabs # Typing from typing import Optional, Iterator, Any @@ -805,13 +804,12 @@ def add_survey_type_value( # Add label graph.add((uri, rdflib.RDFS.label, rdflib.Literal("surveyType"))) + # Retrieve vocab for field + vocab = self.fields["surveyType"].get_vocab() + # Add value - vocab = vocabs.survey_type.SURVEY_TYPE.get( - graph=graph, - value=survey_type, - source=dataset, - ) - graph.add((uri, rdflib.RDF.value, vocab)) + term = vocab(graph=graph, source=dataset).get(survey_type) + graph.add((uri, rdflib.RDF.value, term)) def add_sampling_effort_attribute( self, @@ -881,14 +879,13 @@ def add_sampling_effort_value( # Add label graph.add((uri, rdflib.RDFS.label, rdflib.Literal("samplingEffort"))) + # Retrieve vocab for field + vocab = self.fields["samplingEffortUnit"].get_vocab() + # Add value - unit_vocab = vocabs.sampling_effort_unit.SAMPLING_EFFORT_UNIT.get( - graph=graph, - value=effort_unit, - source=dataset, - ) + term = vocab(graph=graph, source=dataset).get(effort_unit) graph.add((uri, rdflib.RDF.value, rdflib.Literal(effort_value))) - graph.add((uri, utils.namespaces.TERN.unit, unit_vocab)) + graph.add((uri, utils.namespaces.TERN.unit, term)) def add_target_habitat_attribute( self, @@ -942,13 +939,12 @@ def add_target_habitat_value( # Add label graph.add((uri, rdflib.RDFS.label, rdflib.Literal("targetHabitatScope"))) + # Retrieve vocab for field + vocab = self.fields["targetHabitatScope"].get_vocab() + # Add value - vocab = vocabs.target_habitat_scope.TARGET_HABITAT_SCOPE.get( - graph=graph, - value=raw_value, - source=dataset, - ) - graph.add((uri, rdflib.RDF.value, vocab)) + term = vocab(graph=graph, source=dataset).get(raw_value) + graph.add((uri, rdflib.RDF.value, term)) def add_target_taxonomic_attribute( self, @@ -1003,13 +999,12 @@ def add_target_taxonomic_value( # Add label graph.add((uri, rdflib.RDFS.label, rdflib.Literal("targetTaxonomicScope"))) + # Retrieve vocab for field + vocab = self.fields["targetTaxonomicScope"].get_vocab() + # Add value - vocab = vocabs.target_taxonomic_scope.TARGET_TAXONOMIC_SCOPE.get( - graph=graph, - value=raw_value, - source=dataset, - ) - graph.add((uri, rdflib.RDF.value, vocab)) + term = vocab(graph=graph, source=dataset).get(raw_value) + graph.add((uri, rdflib.RDF.value, term)) base.mapper.ABISMapper.register_mapper(SurveyMetadataMapper) diff --git a/abis_mapping/templates/survey_occurrence_data/mapping.py b/abis_mapping/templates/survey_occurrence_data/mapping.py index 567bace2..4edc1609 100644 --- a/abis_mapping/templates/survey_occurrence_data/mapping.py +++ b/abis_mapping/templates/survey_occurrence_data/mapping.py @@ -1090,12 +1090,11 @@ def add_observation_scientific_name( # that this row has a specimen, otherwise it is Field Sample foi = sample_specimen if has_specimen(row) else sample_field + # Retrieve vocab for field + vocab = self.fields["identificationMethod"].get_vocab() + # Retrieve Vocab or Create on the Fly - vocab = vocabs.identification_method.IDENTIFICATION_METHOD.get( - graph=graph, - value=row["identificationMethod"], - source=dataset, - ) + term = vocab(graph=graph, source=dataset).get(row["identificationMethod"]) # Add to Graph graph.add((uri, a, utils.namespaces.TERN.Observation)) @@ -1109,7 +1108,7 @@ def add_observation_scientific_name( graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) graph.add((temporal_entity, a, rdflib.TIME.Instant)) graph.add((temporal_entity, date_identified.rdf_in_xsd, date_identified.to_rdf_literal())) - graph.add((uri, rdflib.SOSA.usedProcedure, vocab)) + graph.add((uri, rdflib.SOSA.usedProcedure, term)) # Check for identifiedBy if row["identifiedBy"]: @@ -1173,12 +1172,11 @@ def add_observation_verbatim_id( # that this row has a specimen, otherwise it is Field Sample foi = sample_specimen if has_specimen(row) else sample_field - # Retrieve Vocab or Create on the Fly - vocab = vocabs.identification_method.IDENTIFICATION_METHOD.get( - graph=graph, - value=row["identificationMethod"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["identificationMethod"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["identificationMethod"]) # Add to Graph graph.add((uri, a, utils.namespaces.TERN.Observation)) @@ -1192,7 +1190,7 @@ def add_observation_verbatim_id( graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) graph.add((temporal_entity, a, rdflib.TIME.Instant)) graph.add((temporal_entity, date_identified.rdf_in_xsd, date_identified.to_rdf_literal())) - graph.add((uri, rdflib.SOSA.usedProcedure, vocab)) + graph.add((uri, rdflib.SOSA.usedProcedure, term)) # Check for identifiedBy if row["identifiedBy"]: @@ -1339,12 +1337,11 @@ def add_sampling_field( # Add survey graph.add((uri, rdflib.SDO.isPartOf, DEFAULT_SURVEY)) # TODO -> Cross reference - # Retrieve Vocab or Create on the Fly - vocab = vocabs.sampling_protocol.SAMPLING_PROTOCOL.get( - graph=graph, - value=row["samplingProtocol"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["samplingProtocol"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["samplingProtocol"]) # Add to Graph graph.add((uri, a, utils.namespaces.TERN.Sampling)) @@ -1354,7 +1351,7 @@ def add_sampling_field( graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) graph.add((temporal_entity, a, rdflib.TIME.Instant)) graph.add((temporal_entity, event_date.rdf_in_xsd, event_date.to_rdf_literal())) - graph.add((uri, rdflib.SOSA.usedProcedure, vocab)) + graph.add((uri, rdflib.SOSA.usedProcedure, term)) # Add geometry geometry_node = rdflib.BNode() @@ -1504,18 +1501,17 @@ def add_id_qualifier_value( if not row["identificationQualifier"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.identification_qualifier.IDENTIFICATION_QUALIFIER.get( - graph=graph, - value=row["identificationQualifier"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["identificationQualifier"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["identificationQualifier"]) # Identification Qualifier Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal("identificationQualifier"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_id_remarks_attribute( self, @@ -1856,12 +1852,11 @@ def add_sample_field( provided else None. graph (rdflib.Graph): Graph to add to """ - # Retrieve Vocab or Create on the Fly - vocab = vocabs.kingdom.KINGDOM_OCCURRENCE.get( - graph=graph, - value=row["kingdom"], - source=dataset, - ) + # Retrieve vocab for field (multiple exists for kingdom) + vocab = self.fields["kingdom"].get_vocab("KINGDOM_OCCURRENCE") + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["kingdom"]) # Add to Graph graph.add((uri, a, utils.namespaces.TERN.FeatureOfInterest)) @@ -1869,7 +1864,7 @@ def add_sample_field( graph.add((uri, rdflib.VOID.inDataset, dataset)) graph.add((uri, rdflib.RDFS.comment, rdflib.Literal("field-sample"))) graph.add((uri, rdflib.SOSA.isResultOf, sampling_field)) - graph.add((uri, utils.namespaces.TERN.featureType, vocab)) + graph.add((uri, utils.namespaces.TERN.featureType, term)) if site is not None: graph.add((uri, rdflib.SOSA.isSampleOf, site)) @@ -1971,12 +1966,11 @@ def add_sample_specimen( if not has_specimen(row): return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.kingdom.KINGDOM_SPECIMEN.get( - graph=graph, - value=row["kingdom"], - source=dataset, - ) + # Retrieve vocab for field (multiple exists for kingdom) + vocab = self.fields["kingdom"].get_vocab("KINGDOM_SPECIMEN") + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["kingdom"]) # Add to Graph graph.add((uri, a, utils.namespaces.TERN.FeatureOfInterest)) @@ -1985,7 +1979,7 @@ def add_sample_specimen( graph.add((uri, rdflib.RDFS.comment, rdflib.Literal("specimen-sample"))) graph.add((uri, rdflib.SOSA.isResultOf, sampling_specimen)) graph.add((uri, rdflib.SOSA.isSampleOf, sample_field)) - graph.add((uri, utils.namespaces.TERN.featureType, vocab)) + graph.add((uri, utils.namespaces.TERN.featureType, term)) # Check for catalogNumber if row["catalogNumber"]: @@ -2099,18 +2093,17 @@ def add_kingdom_value( dataset (rdflib.URIRef): Dataset this belongs to graph (rdflib.Graph): Graph to add to """ - # Retrieve Vocab or Create on the Fly - vocab = vocabs.kingdom.KINGDOM.get( - graph=graph, - value=row["kingdom"], - source=dataset, - ) + # Retrieve vocab for field (multiple exist for kingdom) + vocab = self.fields["kingdom"].get_vocab("KINGDOM") + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["kingdom"]) # Kingdom Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal(f"kingdom = {row['kingdom']}"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_taxon_rank_attribute( self, @@ -2160,18 +2153,17 @@ def add_taxon_rank_value( if not row["taxonRank"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.taxon_rank.TAXON_RANK.get( - graph=graph, - value=row["taxonRank"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["taxonRank"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["taxonRank"]) # Taxon Rank Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal(f"taxon rank = {row['taxonRank']}"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_individual_count_observation( self, @@ -2425,18 +2417,17 @@ def add_basis_value( if not row["basisOfRecord"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.basis_of_record.BASIS_OF_RECORD.get( - graph=graph, - value=row["basisOfRecord"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["basisOfRecord"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["basisOfRecord"]) # Basis of Record Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal("basisOfRecord"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_owner_institution_provider( self, @@ -2546,18 +2537,17 @@ def add_occurrence_status_value( if not row["occurrenceStatus"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.occurrence_status.OCCURRENCE_STATUS.get( - graph=graph, - value=row["occurrenceStatus"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["occurrenceStatus"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["occurrenceStatus"]) # Occurrence Status Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal(f"occurrenceStatus = {row['occurrenceStatus']}"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_preparations_attribute( self, @@ -2607,18 +2597,17 @@ def add_preparations_value( if not row["preparations"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.preparations.PREPARATIONS.get( - graph=graph, - value=row["preparations"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["preparations"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["preparations"]) # Preparations Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal("preparations"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_establishment_means_observation( self, @@ -2692,18 +2681,17 @@ def add_establishment_means_value( if not row["establishmentMeans"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.establishment_means.ESTABLISHMENT_MEANS.get( - graph=graph, - value=row["establishmentMeans"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["establishmentMeans"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["establishmentMeans"]) # Establishment Means Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal("establishmentMeans-value"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_life_stage_observation( self, @@ -2785,18 +2773,17 @@ def add_life_stage_value( if not row["lifeStage"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.life_stage.LIFE_STAGE.get( - graph=graph, - value=row["lifeStage"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["lifeStage"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["lifeStage"]) # Life Stage Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal("lifeStage-value"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_sex_observation( self, @@ -2877,18 +2864,17 @@ def add_sex_value( if not row["sex"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.sex.SEX.get( - graph=graph, - value=row["sex"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["sex"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["sex"]) # Sex Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal("sex-value"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_reproductive_condition_observation( self, @@ -2970,18 +2956,17 @@ def add_reproductive_condition_value( if not row["reproductiveCondition"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.reproductive_condition.REPRODUCTIVE_CONDITION.get( - graph=graph, - value=row["reproductiveCondition"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["reproductiveCondition"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["reproductiveCondition"]) # Reproductive Condition Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal("reproductiveCondition-value"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_accepted_name_usage_observation( self, @@ -3112,12 +3097,11 @@ def add_sampling_sequencing( # but if it does then node will be ommitted from graph. return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.sequencing_method.SEQUENCING_METHOD.get( - graph=graph, - value=row["sequencingMethod"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["sequencingMethod"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["sequencingMethod"]) # Add to Graph graph.add((uri, a, utils.namespaces.TERN.Sampling)) @@ -3129,7 +3113,7 @@ def add_sampling_sequencing( graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) graph.add((temporal_entity, a, rdflib.TIME.Instant)) graph.add((temporal_entity, event_date.rdf_in_xsd, event_date.to_rdf_literal())) - graph.add((uri, rdflib.SOSA.usedProcedure, vocab)) + graph.add((uri, rdflib.SOSA.usedProcedure, term)) # Add geometry geometry_node = rdflib.BNode() @@ -3267,12 +3251,11 @@ def add_threat_status_observation( or row["eventDate"] ) - # Retrieve Vocab or Create on the Fly - vocab = vocabs.check_protocol.CHECK_PROTOCOL.get( - graph=graph, - value=row["threatStatusCheckProtocol"], - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["threatStatusCheckProtocol"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(row["threatStatusCheckProtocol"]) # Threat Status Observation graph.add((uri, a, utils.namespaces.TERN.Observation)) @@ -3283,7 +3266,7 @@ def add_threat_status_observation( graph.add((uri, rdflib.SOSA.hasSimpleResult, rdflib.Literal(row["threatStatus"]))) graph.add((uri, rdflib.SOSA.observedProperty, CONCEPT_CONSERVATION_STATUS)) graph.add((uri, rdflib.PROV.wasInfluencedBy, jurisdiction_attribute)) - graph.add((uri, rdflib.SOSA.usedProcedure, vocab)) + graph.add((uri, rdflib.SOSA.usedProcedure, term)) temporal_entity = rdflib.BNode() graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) graph.add((temporal_entity, a, rdflib.TIME.Instant)) @@ -3329,18 +3312,17 @@ def add_threat_status_value( # Combine conservationJurisdiction and threatStatus value = f"{row['conservationJurisdiction']}/{row['threatStatus']}" - # Retrieve Vocab or Create on the Fly - vocab = vocabs.threat_status.THREAT_STATUS.get( - graph=graph, - value=value, - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["threatStatus"].get_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset).get(value) # Threat Status Value graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal(f"Conservation status = {row['threatStatus']}"))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_conservation_jurisdiction_attribute( self, @@ -3388,8 +3370,11 @@ def add_conservation_jurisdiction_value( if not row["conservationJurisdiction"]: return - # Retrieve Vocab or Create on the Fly - vocab = vocabs.conservation_jurisdiction.CONSERVATION_JURISDICTION.get(row["conservationJurisdiction"]) + # Retrieve vocab for field + vocab = self.fields["conservationJurisdiction"].get_vocab() + + # Retrieve term + term = vocab(graph=graph).get(row["conservationJurisdiction"]) # Construct Label label = f"Conservation Jurisdiction = {row['conservationJurisdiction']}" @@ -3398,7 +3383,7 @@ def add_conservation_jurisdiction_value( graph.add((uri, a, utils.namespaces.TERN.IRI)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal(label))) - graph.add((uri, rdflib.RDF.value, vocab)) + graph.add((uri, rdflib.RDF.value, term)) def add_organism_quantity_observation( self, @@ -3454,7 +3439,9 @@ def add_organism_quantity_observation( ))) # Add comment to temporal entity - graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal("Date unknown, template eventDate used as proxy"))) + graph.add( + (temporal_entity, rdflib.RDFS.comment, rdflib.Literal("Date unknown, template eventDate used as proxy")) + ) def add_organism_quantity_value( self, @@ -3481,19 +3468,18 @@ def add_organism_quantity_value( if not (organism_qty and organism_qty_type): return - # Get vocab or create on the fly - vocab = vocabs.organism_quantity_type.ORGANISM_QUANTITY_TYPE.get( - graph=graph, - value=organism_qty_type, - source=dataset, - ) + # Retrieve vocab for field + vocab = self.fields["organismQuantityType"].get_vocab() + + # Get term or create on the fly + term = vocab(graph=graph, source=dataset).get(organism_qty_type) # Add to graph graph.add((organism_qty_observation, rdflib.SOSA.hasResult, uri)) graph.add((uri, a, utils.namespaces.TERN.Value)) graph.add((uri, a, utils.namespaces.TERN.Float)) graph.add((uri, rdflib.RDFS.label, rdflib.Literal("organism-count"))) - graph.add((uri, utils.namespaces.TERN.unit, vocab)) + graph.add((uri, utils.namespaces.TERN.unit, term)) graph.add((uri, rdflib.RDF.value, rdflib.Literal(organism_qty, datatype=rdflib.XSD.float))) def add_site( diff --git a/abis_mapping/templates/survey_occurrence_data/schema.json b/abis_mapping/templates/survey_occurrence_data/schema.json index c6964adc..371ba4d0 100644 --- a/abis_mapping/templates/survey_occurrence_data/schema.json +++ b/abis_mapping/templates/survey_occurrence_data/schema.json @@ -600,7 +600,10 @@ "WESTERN AUSTRALIA", "Western Australia" ] - } + }, + "vocabularies": [ + "CONSERVATION_JURISDICTION" + ] }, { "name": "threatStatusCheckProtocol", diff --git a/abis_mapping/templates/survey_site_data/mapping.py b/abis_mapping/templates/survey_site_data/mapping.py index 0c20ec1e..7e6f2aa3 100644 --- a/abis_mapping/templates/survey_site_data/mapping.py +++ b/abis_mapping/templates/survey_site_data/mapping.py @@ -501,29 +501,31 @@ def add_site( (relationship_to_related_site := row["relationshipToRelatedSite"]) and (related_site := row["relatedSiteID"]) ): - # Retrieve vocab - relationship_to_related_site_vocab = vocabs.relationship_to_related_site.RELATIONSHIP_TO_RELATED_SITE.get( - value=relationship_to_related_site, - ) + # Retrieve vocab for field + relationship_to_related_site_vocab = self.fields["relationshipToRelatedSite"].get_vocab() + + # Retrieve term + relationship_to_related_site_term = relationship_to_related_site_vocab( + graph=graph + ).get(relationship_to_related_site) # Assign triple based on related site string if (related_site_literal := utils.rdf.uri_or_string_literal(related_site)).datatype == rdflib.XSD.string: - graph.add((uri, relationship_to_related_site_vocab, related_site_literal)) + graph.add((uri, relationship_to_related_site_term, related_site_literal)) else: - graph.add((uri, relationship_to_related_site_vocab, rdflib.URIRef(related_site))) + graph.add((uri, relationship_to_related_site_term, rdflib.URIRef(related_site))) # Add site tern featuretype graph.add((uri, utils.namespaces.TERN.featureType, vocabs.site_type.SITE.iri)) - # Retrieve vocab or create on the fly - site_type_vocab = vocabs.site_type.SITE_TYPE.get( - graph=graph, - value=site_type, - source=dataset, - ) + # Retrieve vocab for field + site_type_vocab = self.fields["siteType"].get_vocab() + + # Retrieve term or create on the fly + site_type_term = site_type_vocab(graph=graph, source=dataset).get(site_type) # Add to site type graph - graph.add((uri, rdflib.DCTERMS.type, site_type_vocab)) + graph.add((uri, rdflib.DCTERMS.type, site_type_term)) # Add site name if available if site_name: @@ -643,13 +645,12 @@ def add_habitat_value( # Add label graph.add((uri, rdflib.RDFS.label, rdflib.Literal("Site habitat"))) - # Add flexible vocab - vocab = vocabs.target_habitat_scope.TARGET_HABITAT_SCOPE.get( - graph=graph, - value=raw, - source=dataset, - ) - graph.add((uri, rdflib.RDF.value, vocab)) + # Retrieve vocab for field + vocab = self.fields["habitat"].get_vocab() + + # Add flexible vocab term + term = vocab(graph=graph, source=dataset).get(raw) + graph.add((uri, rdflib.RDF.value, term)) def add_data_generalizations_attribute( self, diff --git a/abis_mapping/templates/survey_site_data/validators/shaping.py b/abis_mapping/templates/survey_site_data/validators/shaping.py index bd512a0f..1416534f 100644 --- a/abis_mapping/templates/survey_site_data/validators/shaping.py +++ b/abis_mapping/templates/survey_site_data/validators/shaping.py @@ -193,7 +193,7 @@ def add_site_shape(g: rdflib.Graph, dataset_shape: rdflib.term.Node) -> rdflib.t dcterms_type_opts = rdflib.BNode() g.add((dcterms_type_prop, SH.path, rdflib.DCTERMS.type)) site_type_vocab_values: list[rdflib.term.Node] = [ - v.iri for v in vocabs.site_type.SITE_TYPE.terms if v is not None + v.iri for v in vocabs.site_type.SiteType.terms if v is not None ] rdflib.collection.Collection(g, terms_list, site_type_vocab_values) g.add((terms_list_shape, SH["in"], terms_list)) diff --git a/abis_mapping/types/schema.py b/abis_mapping/types/schema.py index c33332ea..41232799 100644 --- a/abis_mapping/types/schema.py +++ b/abis_mapping/types/schema.py @@ -28,7 +28,7 @@ class Field(pydantic.BaseModel): type: str format: str | None constraints: Constraints - vocabularies: list[str] + vocabularies: list[str] = [] # Allow extra fields to be captured mainly to catch errors in json model_config = pydantic.ConfigDict(extra="allow") @@ -76,7 +76,8 @@ def get_vocab(self, name: str | None = None) -> Type[utils.vocabs.Vocabulary]: if name is not None: return utils.vocabs.get_vocab(name) - return utils.vocabs.get_vocab(self.vocabularies.pop(0)) + return utils.vocabs.get_vocab(self.vocabularies[0]) + class Schema(pydantic.BaseModel): """Model for overall schema object of a schema definition.""" diff --git a/abis_mapping/types/spatial.py b/abis_mapping/types/spatial.py index 9a06c569..56b5ad50 100644 --- a/abis_mapping/types/spatial.py +++ b/abis_mapping/types/spatial.py @@ -13,7 +13,7 @@ # Local from abis_mapping import settings from abis_mapping.utils import namespaces -from abis_mapping import vocabs +from abis_mapping import utils # Typing from typing import NamedTuple @@ -88,7 +88,12 @@ def original_datum_uri(self) -> rdflib.URIRef | None: Returns: rdflib.URIRef: Uri corresponding to original datum if known, else None. """ - return vocabs.geodetic_datum.GEODETIC_DATUM.get(self.original_datum_name) + # Retrieve vocab class + if (vocab := utils.vocabs.get_vocab("GEODETIC_DATUM")) is not None: + # Init with dummy graph and return corresponding URI + return vocab(graph=rdflib.Graph()).get(self.original_datum_name) + else: + return None @property def _transformed_geometry(self) -> shapely.Geometry: @@ -109,7 +114,13 @@ def transformer_datum_uri(self) -> rdflib.URIRef | None: Returns: rdflib.URIRef: Uri corresponding to transformer datum if known, else None. """ - return vocabs.geodetic_datum.GEODETIC_DATUM.get(settings.DEFAULT_TARGET_CRS) + # Retrieve vocab class + if (vocab := utils.vocabs.get_vocab("GEODETIC_DATUM")) is not None: + # Init with dummy graph and return corresponding uri + return vocab(graph=rdflib.Graph()).get(settings.DEFAULT_TARGET_CRS) + + # If vocab doesn't exist + return None @classmethod def from_geosparql_wkt_literal(cls, literal: rdflib.Literal | str) -> "Geometry": diff --git a/abis_mapping/utils/vocabs.py b/abis_mapping/utils/vocabs.py index 370c2f7f..4aae8dbc 100644 --- a/abis_mapping/utils/vocabs.py +++ b/abis_mapping/utils/vocabs.py @@ -154,7 +154,7 @@ def register( class RestrictedVocabulary(Vocabulary): """Restricted Vocabulary""" - def get(self, value: str) -> rdflib.URIRef: + def get(self, value: str | None) -> rdflib.URIRef: """Retrieves an IRI from the Vocabulary. Args: @@ -167,6 +167,11 @@ def get(self, value: str) -> rdflib.URIRef: VocabularyError: Raised if supplied value does not match an existing vocabulary term. """ + # Check for Value + if not value: + # Raise Error + raise VocabularyError("Value not supplied for vocabulary with no default") + # Sanitise Value sanitised_value = strings.sanitise(value) @@ -291,7 +296,7 @@ class VocabularyError(Exception): """Error Raised in Vocabulary Handling""" -def get_vocab(key: str) -> Type[Vocabulary] | None: +def get_vocab(key: str) -> Type[Vocabulary]: """Retrieves vocab object for given key. Args: @@ -300,5 +305,12 @@ def get_vocab(key: str) -> Type[Vocabulary] | None: Returns: Type[Vocabulary] | None: Corresponding vocabulary class for given key or None. + + Raises: + ValueError: If supplied key doesn't exist within the registry. """ - return Vocabulary.id_registry.get(key) + try: + return Vocabulary.id_registry[key] + except KeyError: + # Transform to ValueError, to assist with validation libraries. + raise ValueError(f"Key {key} not found in registry.") diff --git a/abis_mapping/vocabs/life_stage.py b/abis_mapping/vocabs/life_stage.py index e4346b3a..8cae0d11 100644 --- a/abis_mapping/vocabs/life_stage.py +++ b/abis_mapping/vocabs/life_stage.py @@ -185,13 +185,13 @@ # Vocabulary class LifeStage(utils.vocabs.FlexibleVocabulary): - vocab_id="LIFE_STAGE" - definition=rdflib.Literal("A type of lifeStage.") - base=utils.rdf.uri("bdr-cv/parameter/lifeStage/") - scheme=rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/5699eca7-9ef0-47a6-bcfb-9306e0e2b85e") - broader=utils.rdf.uri("bdr-cv/parameter/lifeStage", utils.namespaces.EXAMPLE), # TODO -> Need real URI - default=None # No default, ommitted if not provided - terms=( + vocab_id = "LIFE_STAGE" + definition = rdflib.Literal("A type of lifeStage.") + base = utils.rdf.uri("bdr-cv/parameter/lifeStage/") + scheme = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/5699eca7-9ef0-47a6-bcfb-9306e0e2b85e") + broader = utils.rdf.uri("bdr-cv/parameter/lifeStage", utils.namespaces.EXAMPLE) # TODO -> Need real URI + default = None # No default, omitted if not provided + terms = ( ADULT, EMBRYO, GAMETE, diff --git a/abis_mapping/vocabs/sampling_effort_unit.py b/abis_mapping/vocabs/sampling_effort_unit.py index 23663111..46e58f9e 100644 --- a/abis_mapping/vocabs/sampling_effort_unit.py +++ b/abis_mapping/vocabs/sampling_effort_unit.py @@ -12,7 +12,7 @@ class SamplingEffortUnit(utils.vocabs.FlexibleVocabulary): definition = rdflib.Literal("A type of samplingEffortUnit") base = utils.rdf.uri("bdr-cv/attribute/samplingEffortUnit/") scheme = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/dd085299-ae86-4371-ae15-61dfa432f924") - broader = utils.rdf.uri("bdr-cv/concept/samplingEffortUnit", utils.namespaces.EXAMPLE), # TODO -> Need real URI + broader = utils.rdf.uri("bdr-cv/concept/samplingEffortUnit", utils.namespaces.EXAMPLE) # TODO -> Need real URI default = None terms = () publish = False diff --git a/abis_mapping/vocabs/target_taxonomic_scope.py b/abis_mapping/vocabs/target_taxonomic_scope.py index 61fefca5..9d26fec4 100644 --- a/abis_mapping/vocabs/target_taxonomic_scope.py +++ b/abis_mapping/vocabs/target_taxonomic_scope.py @@ -79,6 +79,7 @@ ), ) + class TargetTaxonomicScope(utils.vocabs.FlexibleVocabulary): vocab_id = "TARGET_TAXONOMIC_SCOPE" definition = rdflib.Literal("A type of targetTaxonomicScope") diff --git a/abis_mapping/vocabs/threat_status.py b/abis_mapping/vocabs/threat_status.py index d2e24d21..c1857777 100644 --- a/abis_mapping/vocabs/threat_status.py +++ b/abis_mapping/vocabs/threat_status.py @@ -742,6 +742,7 @@ description="Western Australia, vulnerable species.", ) + # Vocabulary class ThreatStatus(utils.vocabs.FlexibleVocabulary): vocab_id = "THREAT_STATUS" @@ -825,7 +826,7 @@ class ThreatStatus(utils.vocabs.FlexibleVocabulary): WA_T, WA_VU, ) - publish=False + publish = False # Register diff --git a/docs/instructions.py b/docs/instructions.py index c13e07da..887311ba 100644 --- a/docs/instructions.py +++ b/docs/instructions.py @@ -51,6 +51,10 @@ def get_source( except KeyError: raise ValueError(f"Template '{self.mapper_id}' not found.") + # Check mapper returned + if mapper is None: + raise TypeError(f"Template '{self.mapper_id}' not defined; got NoneType") + # Create path path = mapper().root_dir() / "templates" / template @@ -95,7 +99,7 @@ def build_instructions(mapper_id: str) -> str: "threat_status": tables.threat_status.ThreatStatusTabler(mapper_id).generate_table(as_markdown=True), }, "values": { - "geodetic_datum_count": len(vocabs.geodetic_datum.GEODETIC_DATUM.terms), + "geodetic_datum_count": len(vocabs.geodetic_datum.GeodeticDatum.terms), }, "anchors": { "vocabulary_list": "vocabulary-list", diff --git a/docs/tables/base.py b/docs/tables/base.py index c6df848e..67c3017a 100644 --- a/docs/tables/base.py +++ b/docs/tables/base.py @@ -3,13 +3,12 @@ # Standard import abc import csv -import re # Local from abis_mapping import base # Typing -from typing import IO, Final, Any +from typing import IO, Final, Any, Mapping, Iterable class BaseTabler(abc.ABC): @@ -91,21 +90,18 @@ def __init__( fieldnames.insert(0, "__start__") fieldnames.append("__end__") - # Assign dialect attribute - self.dialect: Final[csv.Dialect] = csv.get_dialect("markdown") - # Call parent constructor - super().__init__(f, fieldnames, dialect="markdown", *args, **kwargs) + super().__init__(f, fieldnames, dialect="markdown", *args, **kwargs) # type: ignore[arg-type, misc] @property - def first_field(self) -> str: + def first_field(self) -> Any: """Returns name of first field.""" - return self.fieldnames[0] + return self.fieldnames[0] # type: ignore[index] @property - def last_field(self) -> str: + def last_field(self) -> Any: """Returns name of last field.""" - return self.fieldnames[-1] + return self.fieldnames[-1] # type: ignore[index] def writeheader(self) -> Any: """Writes the first row of the markdown table. @@ -121,23 +117,23 @@ def writeheader(self) -> Any: header_break = dict(zip(self.fieldnames, ["---"] * len(self.fieldnames))) return self.writerow(header_break) - def writerow(self, rowdict: dict[str, Any]) -> Any: + def writerow(self, rowdict: Mapping[Any, Any]) -> Any: """Writes a row of the markdown table. Args: - rowdict (dict[str, Any]): Dictionary to convert. + rowdict (Mapping[Any, Any]): Dictionary to convert. Returns: Any: Value returned from underlying file write method. """ # Add the first and last blank values to allow beginning and trailing pipes - rowdict[self.first_field] = None - rowdict[self.last_field] = None + rowdict[self.first_field] = None # type: ignore[index] + rowdict[self.last_field] = None # type: ignore[index] # Clean cells for fieldname in self.fieldnames: if isinstance(rowdict[fieldname], str): - rowdict[fieldname] = self._clean_cell(rowdict[fieldname]) + rowdict[fieldname] = self._clean_cell(rowdict[fieldname]) # type: ignore[index] # Call parent writerow and return return super().writerow(rowdict) @@ -154,7 +150,7 @@ def _clean_cell(self, contents: str) -> str: # Perform replacement return contents.replace('\n', "
") - def writerows(self, rowdicts: list[dict[str, Any]]) -> None: + def writerows(self, rowdicts: Iterable[Mapping[Any, Any]]) -> None: """Writes rows of the markdown table. Raises: diff --git a/docs/tables/fields.py b/docs/tables/fields.py index fc08307a..fad62b94 100644 --- a/docs/tables/fields.py +++ b/docs/tables/fields.py @@ -58,9 +58,12 @@ def generate_table( # Create a memory io and dictionary to csv writer output = io.StringIO() - header = [hdr.serialization_alias or hdr.title for hdr in FieldTableRow.model_fields.values()] + raw_hdr = (hdr.serialization_alias or hdr.title for hdr in FieldTableRow.model_fields.values()) + header = [hdr for hdr in raw_hdr if hdr is not None] + if as_markdown: - writer = tables.base.MarkdownDictWriter(output, fieldnames=header) + # MarkdownDictWriter is a subclass of DictWriter hence the type hint. + writer: csv.DictWriter = tables.base.MarkdownDictWriter(output, fieldnames=header) else: writer = csv.DictWriter(output, fieldnames=header) @@ -168,9 +171,10 @@ def determine_checklist( # Get validate method mock mocked_validate: mock.Mock = mocked_resource.return_value.validate - # Retrieve checklist + # Retrieve checklist and return if mocked_validate.called: - return mocked_validate.call_args.kwargs.get("checklist") + checklist: frictionless.Checklist = mocked_validate.call_args.kwargs.get("checklist") + return checklist # Else return None diff --git a/docs/tables/threat_status.py b/docs/tables/threat_status.py index d95cb7d4..46d05b73 100644 --- a/docs/tables/threat_status.py +++ b/docs/tables/threat_status.py @@ -41,7 +41,7 @@ def __init__( """ super().__init__(template_id) fields = self.mapper.schema()['fields'] - if len([fld for fld in fields if fld.get("vocabularies") and "THREAT_STATUS" in fld.get("vocabularies")])== 0: + if not [fld for fld in fields if fld.get("vocabularies") and "THREAT_STATUS" in fld.get("vocabularies")]: raise ValueError(f"No THREAT_STATUS vocabularies found for template {template_id}") def generate_table( @@ -62,18 +62,20 @@ def generate_table( output = io.StringIO() # Get header list - header = [hdr.serialization_alias for hdr in ThreatStatusRow.model_fields.values()] + raw_hdr = (hdr.serialization_alias or hdr.title for hdr in ThreatStatusRow.model_fields.values()) + header = [hdr for hdr in raw_hdr if hdr is not None] # Create writer if as_markdown: - writer = tables.base.MarkdownDictWriter(output, fieldnames=header) + # MarkdownDictWriter is a subclass of DictWriter hence the type hint + writer: csv.DictWriter = tables.base.MarkdownDictWriter(output, fieldnames=header) else: writer = csv.DictWriter(output, fieldnames=header) writer.writeheader() # Iterate through vocab's terms - for term in sorted(utils.vocabs.get_vocab("THREAT_STATUS").terms, key=lambda x: x.preferred_label): + for term in sorted(utils.vocabs.get_vocab("THREAT_STATUS").terms, key=lambda x: x.preferred_label): # type: ignore[arg-type, return-value] # noqa: E501 # Generate row row = self.generate_row(term) # Write to output @@ -98,9 +100,17 @@ def generate_row( Return; ThreatStatConsJurTableRow: Table row. + + Raises: + ValueError: If there is no preferred label for the supplied Term.' """ - # Split out preferred label - splt_preferred = threat_stat_cons_jur_term.preferred_label.split('/') + # Check preferred label + if (preferred_label := threat_stat_cons_jur_term.preferred_label) is not None: + # Split out preferred label + splt_preferred = preferred_label.split('/') + else: + # Raise + raise ValueError(f"No preferred label for {threat_stat_cons_jur_term}") # Split threat status alt labels threat_stat_alt: list[str] = [lbl.split('/')[1] for lbl in threat_stat_cons_jur_term.alternative_labels] diff --git a/docs/tables/vocabs.py b/docs/tables/vocabs.py index dcceebc9..2eeb7fe7 100644 --- a/docs/tables/vocabs.py +++ b/docs/tables/vocabs.py @@ -15,7 +15,7 @@ from abis_mapping import utils # Typing -from typing import Iterable, IO +from typing import Iterable, IO, Type class VocabTableRow(pydantic.BaseModel): @@ -51,9 +51,14 @@ def generate_table( # Create a memory io and dictionary to csv writer output = io.StringIO() - header = [hdr.serialization_alias or hdr.title for hdr in VocabTableRow.model_fields.values()] + # Get serialization title or fall back to given title for each field. + raw_hdr = (hdr.serialization_alias or hdr.title for hdr in VocabTableRow.model_fields.values()) + # Assert all values as not None + header = [hdr for hdr in raw_hdr if hdr is not None] + if as_markdown: - writer = tables.base.MarkdownDictWriter(output, fieldnames=header) + # MarkdownDictWriter is a child of DictWriter hence typehint + writer: csv.DictWriter = tables.base.MarkdownDictWriter(output, fieldnames=header) else: writer = csv.DictWriter(output, fieldnames=header) @@ -83,7 +88,7 @@ def generate_table( def generate_vocab_rows( self, field: types.schema.Field, - vocab: utils.vocabs.Vocabulary, + vocab: Type[utils.vocabs.Vocabulary], ) -> Iterable[VocabTableRow]: """Generates a set of rows based on vocabulary. @@ -95,7 +100,7 @@ def generate_vocab_rows( VocabTableRow: Vocabulary table rows. """ # Itermate through terms and yield each row. - for term in sorted(vocab.terms, key=lambda x: x.preferred_label): + for term in sorted(vocab.terms, key=lambda x: x.preferred_label): # type: ignore[arg-type, return-value] yield self.generate_row( field=field, term=term, @@ -152,4 +157,3 @@ def generate_row( finally: # Close output file args.output_dest.close() - diff --git a/scripts/lint.sh b/scripts/lint.sh index 69b9aea5..438f2c1e 100755 --- a/scripts/lint.sh +++ b/scripts/lint.sh @@ -1,3 +1,3 @@ #!/usr/bin/env bash set -x -flake8 tests abis_mapping \ No newline at end of file +flake8 tests abis_mapping docs diff --git a/scripts/typecheck.sh b/scripts/typecheck.sh index 6d757619..a9894edb 100755 --- a/scripts/typecheck.sh +++ b/scripts/typecheck.sh @@ -1,3 +1,3 @@ #!/usr/bin/env bash set -x -mypy tests abis_mapping \ No newline at end of file +mypy tests abis_mapping docs diff --git a/tests/base/test_mapper.py b/tests/base/test_mapper.py index 96be6e72..22e99aea 100644 --- a/tests/base/test_mapper.py +++ b/tests/base/test_mapper.py @@ -326,11 +326,15 @@ def test_extra_fields_middle(mocker: pytest_mock.MockerFixture) -> None: assert error_codes == ["incorrect-label"] -def test_fields(mocker: pytest_mock.MockerFixture) -> None: +def test_fields( + mocker: pytest_mock.MockerFixture, + mocked_vocab: unittest.mock.MagicMock, +) -> None: """Tests the fields method. Args: - mocker (pytest_mock.MockerFixture): + mocker (pytest_mock.MockerFixture): Pytest mocker fixture + mocked_vocab (unittest.mock.MagicMock): Patched get_vocab and resulting mock. """ # Patch schema method descriptor = { @@ -365,7 +369,7 @@ def test_fields(mocker: pytest_mock.MockerFixture) -> None: } ] } - mocked_schema = mocker.patch.object(base.mapper.ABISMapper, "schema", return_value=descriptor) + mocker.patch.object(base.mapper.ABISMapper, "schema", return_value=descriptor) # Create mapper class TestMapper(base.mapper.ABISMapper): @@ -390,9 +394,3 @@ def apply_validation( # Assert assert list(mapper.fields.keys()) == ["fieldA", "fieldB"] - mocked_schema.assert_called_once() - - - - - diff --git a/tests/conftest.py b/tests/conftest.py index 4990cd43..65a00ca2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,15 +2,47 @@ # Standard import json +import unittest.mock # Third-Party +import pytest +import pytest_mock import rdflib import rdflib.compare +# Local +from abis_mapping import utils + # Typing from typing import Union +@pytest.fixture +def mocked_vocab(mocker: pytest_mock.MockerFixture) -> unittest.mock.MagicMock: + """Provides a mocked term fixture. + + Args: + mocker (pytest_mock.MockerFixture): Pytest mocker fixture. + + Returns: + unittest.mock.MagicMock: Mocked term fixture. + """ + # Patch get_vocab + mocked_vocab = mocker.patch("abis_mapping.utils.vocabs.get_vocab") + + # Patch terms + mocked_vocab.return_value.terms = ( + utils.vocabs.Term( + labels=("SOME LABEL", "ANOTHER LABEL", ), + iri=rdflib.URIRef("https://example.org/some-term"), + description="Some description.", + ), + ) + + # Return + return mocked_vocab + + def compare_graphs( graph1: Union[rdflib.Graph, str], graph2: Union[rdflib.Graph, str], diff --git a/tests/docs/tables/conftest.py b/tests/docs/tables/conftest.py deleted file mode 100644 index d22e2964..00000000 --- a/tests/docs/tables/conftest.py +++ /dev/null @@ -1,38 +0,0 @@ -"""Provides fixtures and helpers for the unit tests.""" - -# Standard -import unittest.mock - -# Third-party -import pytest -import pytest_mock -import rdflib - -# Local -from abis_mapping import utils - - -@pytest.fixture -def mocked_vocab(mocker: pytest_mock.MockerFixture) -> unittest.mock.MagicMock: - """Provides a mocked term fixture. - - Args: - mocker (pytest_mock.MockerFixture): Pytest mocker fixture. - - Returns: - unittest.mock.MagicMock: Mocked term fixture. - """ - # Patch get_vocab - mocked_vocab = mocker.patch("abis_mapping.utils.vocabs.get_vocab") - - # Patch terms - mocked_vocab.return_value.terms = ( - utils.vocabs.Term( - labels=("SOME LABEL", "ANOTHER LABEL", ), - iri=rdflib.URIRef("https://example.org/some-term"), - description="Some description.", - ), - ) - - # Return - return mocked_vocab diff --git a/tests/docs/tables/test_fields.py b/tests/docs/tables/test_fields.py index 9765f3b3..46734ae1 100644 --- a/tests/docs/tables/test_fields.py +++ b/tests/docs/tables/test_fields.py @@ -102,11 +102,15 @@ def test_determine_checklist() -> None: assert len(checklist.checks) == 6 -def test_generate_table(mocked_mapper: unittest.mock.MagicMock) -> None: +def test_generate_table( + mocked_mapper: unittest.mock.MagicMock, + mocked_vocab: unittest.mock.MagicMock, +) -> None: """Tests generate_table method. Args: - mocked_mapper (pytest_mock.mocker.mock.MagicMock): Mocked mapper fixture. + mocked_mapper (unittest.mock.MagicMock): Mocked mapper fixture. + mocked_vocab (unittest.mock.MagicMock): Mocked vocab fixture. """ # Create an in memory io dest = io.StringIO() @@ -120,17 +124,23 @@ def test_generate_table(mocked_mapper: unittest.mock.MagicMock) -> None: ) # Assert + mocked_vocab.assert_called() + mocked_mapper.assert_called() assert dest.getvalue() == ( 'Field Name,Description,Mandatory / Optional,Datatype Format,Examples\r\n' 'someName,Some description,Mandatory,String,SOME EXAMPLE\r\n\n' ) -def test_generate_table_markdown(mocked_mapper: unittest.mock.MagicMock) -> None: +def test_generate_table_markdown( + mocked_mapper: unittest.mock.MagicMock, + mocked_vocab: unittest.mock.MagicMock, +) -> None: """Tests generate_table method with markdown format. Args: mocked_mapper (unittest.mock.MagicMock): Mocked mapper fixture. + mocked_vocab (unittest.mock.MagicMock): Mocked vocab fixture. """ # Create a tabler tabler = tables.fields.FieldTabler("some_id") @@ -139,6 +149,8 @@ def test_generate_table_markdown(mocked_mapper: unittest.mock.MagicMock) -> None actual = tabler.generate_table(as_markdown=True) # Assert + mocked_mapper.assert_called() + mocked_vocab.assert_called() assert actual == ( '|Field Name|Description|Mandatory / Optional|Datatype Format|Examples|\n' '|---|---|---|---|---|\n' diff --git a/tests/types/test_schema.py b/tests/types/test_schema.py index cfbe529a..aa90191f 100644 --- a/tests/types/test_schema.py +++ b/tests/types/test_schema.py @@ -6,7 +6,6 @@ # Local from abis_mapping import types -from abis_mapping import utils # Typing from typing import Any @@ -105,6 +104,3 @@ def test_get_vocab_no_vocabs(self, field_d: dict[str, Any]) -> None: with pytest.raises(IndexError): # Invoke field.get_vocab() - - - diff --git a/tests/types/test_spatial.py b/tests/types/test_spatial.py index 5e5f535d..cc7d8eec 100644 --- a/tests/types/test_spatial.py +++ b/tests/types/test_spatial.py @@ -130,9 +130,12 @@ def test_geometry_transformer_datum_uri() -> None: raw="POINT(0 0)", datum="AGD66", ) + # Retrieve geodetic datum vocab + vocab = utils.vocabs.get_vocab("GEODETIC_DATUM") # Assert default datum - assert geometry.transformer_datum_uri == vocabs.geodetic_datum.GEODETIC_DATUM.get(settings.DEFAULT_TARGET_CRS) + assert vocab is not None + assert geometry.transformer_datum_uri == vocab(graph=rdflib.Graph()).get(settings.DEFAULT_TARGET_CRS) @pytest.mark.parametrize( @@ -201,7 +204,7 @@ def test_geometry_to_rdf_literal() -> None: ( "POINT (571666.4475041276 5539109.815175673)", "EPSG:26917", - f"<{vocabs.geodetic_datum.GEODETIC_DATUM.get(settings.DEFAULT_TARGET_CRS)}> POINT (-80 50)", + f"<{vocabs.geodetic_datum.GeodeticDatum(rdflib.Graph()).get(settings.DEFAULT_TARGET_CRS)}> POINT (-80 50)", ), ] diff --git a/tests/utils/test_vocabs.py b/tests/utils/test_vocabs.py index 482c41cc..9a2cce8d 100644 --- a/tests/utils/test_vocabs.py +++ b/tests/utils/test_vocabs.py @@ -41,9 +41,9 @@ def test_vocabs_term() -> None: def test_vocabs_restricted_vocab() -> None: """Tests the RestrictedVocab Class""" # Create Vocab - vocab = abis_mapping.utils.vocabs.RestrictedVocabulary( - vocab_id="TEST_RESTRICT", - terms=( + class Vocab(abis_mapping.utils.vocabs.RestrictedVocabulary): + vocab_id = "TEST_RESTRICT" + terms = ( abis_mapping.utils.vocabs.Term( labels=("A",), iri=rdflib.URIRef("A"), @@ -54,8 +54,8 @@ def test_vocabs_restricted_vocab() -> None: iri=rdflib.URIRef("B"), description="B", ), - ), - ) + ) + vocab = Vocab(graph=rdflib.Graph()) # Assert Existing Values assert vocab.get("a") == rdflib.URIRef("A") @@ -71,14 +71,14 @@ def test_vocabs_restricted_vocab() -> None: def test_vocabs_flexible_vocab() -> None: """Tests the FlexibleVocab Class""" # Create Vocab - vocab = abis_mapping.utils.vocabs.FlexibleVocabulary( - vocab_id="TEST_FLEX", - definition=rdflib.Literal("definition"), - base=rdflib.URIRef("base/"), - scheme=rdflib.URIRef("scheme"), - broader=rdflib.URIRef("broader"), - default=None, - terms=( + class Vocab(abis_mapping.utils.vocabs.FlexibleVocabulary): + vocab_id = "TEST_FLEX" + definition = rdflib.Literal("definition") + base = rdflib.URIRef("base/") + scheme = rdflib.URIRef("scheme") + broader = rdflib.URIRef("broader") + default = None + terms = ( abis_mapping.utils.vocabs.Term( labels=("A",), iri=rdflib.URIRef("A"), @@ -89,20 +89,22 @@ def test_vocabs_flexible_vocab() -> None: iri=rdflib.URIRef("B"), description="B", ), - ), - ) - - # Create Graph + ) + # Create graph graph = rdflib.Graph() + # Initialize vocab + vocab = Vocab(graph=graph) + # Assert Existing Values - assert vocab.get(graph, "a") == rdflib.URIRef("A") - assert vocab.get(graph, "A") == rdflib.URIRef("A") - assert vocab.get(graph, "b") == rdflib.URIRef("B") - assert vocab.get(graph, "B") == rdflib.URIRef("B") + assert vocab.get("a") == rdflib.URIRef("A") + assert vocab.get("A") == rdflib.URIRef("A") + assert vocab.get("b") == rdflib.URIRef("B") + assert vocab.get("B") == rdflib.URIRef("B") # Assert New Values - assert vocab.get(graph, "C", rdflib.URIRef("D")) == rdflib.URIRef("base/C") + vocab.source = rdflib.URIRef("D") + assert vocab.get("C") == rdflib.URIRef("base/C") assert graph.serialize(format="ttl").strip() == textwrap.dedent( """ @prefix dcterms: . @@ -120,7 +122,7 @@ def test_vocabs_flexible_vocab() -> None: # Assert Invalid Values with pytest.raises(abis_mapping.utils.vocabs.VocabularyError): - vocab.get(graph, None) # No Default + vocab.get(None) # No Default def test_vocab_register_id() -> None: @@ -130,12 +132,16 @@ def test_vocab_register_id() -> None: def test_get_vocab() -> None: """Tests get_vocab function.""" - # Retrieve vocabs + # Retrieve vocab v1 = abis_mapping.utils.vocabs.get_vocab("SEX") - v2 = abis_mapping.utils.vocabs.get_vocab("NOT_A_VOCAB") # Assert exists - assert isinstance(v1, abis_mapping.utils.vocabs.FlexibleVocabulary) + assert v1 is not None + assert issubclass(v1, abis_mapping.utils.vocabs.FlexibleVocabulary) + - # Assert None - assert v2 is None +def test_get_vocab_invalid() -> None: + """Tests get_vocab function with an invalid vocab.""" + # Should raise key error + with pytest.raises(ValueError): + abis_mapping.utils.vocabs.get_vocab("NOT_A_VOCAB") diff --git a/tests/vocabs/test_organism_quantity_type.py b/tests/vocabs/test_organism_quantity_type.py index 1aac8cb1..227adbb4 100644 --- a/tests/vocabs/test_organism_quantity_type.py +++ b/tests/vocabs/test_organism_quantity_type.py @@ -1,7 +1,7 @@ """Tests the organism quantity type vocab functionality.""" # Local -from abis_mapping import vocabs +from abis_mapping import utils # Third-party import rdflib @@ -13,7 +13,9 @@ def test_get_percentage_biomass() -> None: graph = rdflib.Graph() # Get vocab - iri = vocabs.organism_quantity_type.ORGANISM_QUANTITY_TYPE.get(graph, "% of biomass") + vocab = utils.vocabs.get_vocab("ORGANISM_QUANTITY_TYPE") + assert vocab is not None + iri = vocab(graph=graph).get("% of biomass") # Assert assert iri == rdflib.URIRef("http://rs.gbif.org/vocabulary/gbif/quantityType/percentageOfBiomass") From dd0954b169900f1b2351edfef78334ac16ee7ae8 Mon Sep 17 00:00:00 2001 From: joecrowleygaia Date: Wed, 3 Jul 2024 12:19:41 +0800 Subject: [PATCH 3/3] fixed typecheck issue --- docs/tables/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tables/base.py b/docs/tables/base.py index a31896db..1b3dc6ce 100644 --- a/docs/tables/base.py +++ b/docs/tables/base.py @@ -65,7 +65,7 @@ class MarkdownDialect(csv.excel): escapechar = '\\' lineterminator = '\n' quoting = csv.QUOTE_NONE - quotechar = None + quotechar = None # type: ignore[assignment] # Register the dialect with the csv module