Skip to content

Commit

Permalink
Fix/explicit datatypes abox (#70)
Browse files Browse the repository at this point in the history
* multiple class types for individuals in the ABox
* explicit datatyping for the Tbox relations
* ignoring of null/None-valued objects in the input data
* fixes in the object property serialization (from "" to <>)
  • Loading branch information
MBueschelberger authored Nov 13, 2024
1 parent f6ed034 commit e5f2a28
Show file tree
Hide file tree
Showing 28 changed files with 987 additions and 392 deletions.
26 changes: 19 additions & 7 deletions data2rdf/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

import json
from abc import abstractmethod
from typing import Any, Dict, Optional, Union
from enum import Enum
from typing import Any, Dict, List, Optional, Union

from pydantic import (
AnyUrl,
Expand All @@ -17,6 +18,14 @@
from data2rdf.config import Config


class RelationType(str, Enum):
"""Relation Type of TBox modellings"""

ANNOTATION_PROPERTY = "annotation_property"
DATA_PROPERTY = "data_property"
OBJECT_PROPERTY = "object_property"


class BaseConfigModel(BaseModel):
"""Basic model for holding the data2rdf config"""

Expand Down Expand Up @@ -53,7 +62,7 @@ class BasicConceptMapping(BaseConfigModel):
"""Basic mapping for a concept in a file"""

key: Optional[str] = Field(
None, description="Key/column/ of the concept in the file"
None, description="Key/column of the concept in the file"
)


Expand All @@ -76,7 +85,7 @@ def graph(cls) -> Graph:
class BasicSuffixModel(BaseConfigModel):
"""Pydantic BaseModel for suffix and type of a class instance"""

iri: Union[str, AnyUrl] = Field(
iri: Union[str, AnyUrl, List[Union[str, AnyUrl]]] = Field(
..., description="Ontological class related to this concept"
)
suffix: Optional[str] = Field(
Expand All @@ -89,12 +98,12 @@ class BasicSuffixModel(BaseConfigModel):

@field_validator("iri")
@classmethod
def validate_iri(cls, value: Union[str, AnyUrl]) -> AnyUrl:
def validate_iri(cls, value: Union[AnyUrl, List[AnyUrl]]) -> AnyUrl:
"""Make sure that there are not blank spaces in the IRI"""
if isinstance(value, str):
value = AnyUrl(value.strip())
else:
if not isinstance(value, list):
value = AnyUrl(str(value).strip())
else:
value = [AnyUrl(str(iterable).strip()) for iterable in value]
return value

@field_validator("suffix")
Expand All @@ -106,4 +115,7 @@ def validate_suffix(

iri = info.data["iri"]
config = info.data["config"]
if isinstance(iri, list) and value is None:
raise TypeError("If the iri is a list, the suffix must be set ")

return value or str(iri).split(config.separator)[-1]
150 changes: 58 additions & 92 deletions data2rdf/models/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,17 @@
import warnings
from typing import Any, Dict, List, Optional, Union

from pydantic import (
from data2rdf.models.utils import apply_datatype, detect_datatype
from data2rdf.qudt.utils import _get_query_match
from data2rdf.utils import make_prefix

from data2rdf.models.base import ( # isort:skip
BasicGraphModel,
BasicSuffixModel,
RelationType,
)

from pydantic import ( # isort:skip
AnyUrl,
BaseModel,
Field,
Expand All @@ -12,10 +22,6 @@
model_validator,
)

from data2rdf.models.base import BasicGraphModel, BasicSuffixModel
from data2rdf.qudt.utils import _get_query_match
from data2rdf.utils import is_bool, is_float, is_integer, is_uri, make_prefix


class ValueRelationMapping(BaseModel):
"""Mapping between a object/data/annotation property and a value resolved from a location in the data file"""
Expand All @@ -29,6 +35,9 @@ class ValueRelationMapping(BaseModel):
description="""Object/Data/Annotation property for the value
resolving from `key` of this model""",
)
datatype: Optional[str] = Field(
None, description="XSD Datatype of the value"
)


class ClassTypeGraph(BasicGraphModel):
Expand All @@ -39,7 +48,7 @@ class ClassTypeGraph(BasicGraphModel):
description="""Value of the suffix of the
ontological class to be used""",
)
rdfs_type: AnyUrl = Field(
rdfs_type: str = Field(
"owl:Class", description="rdfs:type for this concept"
)
annotation_properties: Optional[List[ValueRelationMapping]] = Field(
Expand All @@ -52,43 +61,27 @@ class ClassTypeGraph(BasicGraphModel):
None, description="Mappings for Data Properties"
)

@classmethod
def value_json(cls, value) -> "Dict[str, Any]":
"""Return json with value definition"""
if is_integer(value):
dtype = "xsd:integer"
value = int(value)
elif is_float(value):
dtype = "xsd:float"
value = float(value)
elif is_bool(value):
dtype = "xsd:bool"
value = bool(value)
elif is_uri(value):
dtype = "xsd:anyURI"
value = str(value)
elif isinstance(value, str):
dtype = "xsd:string"
else:
raise TypeError(
f"Datatype of value `{value}` ({type(value)}) cannot be mapped to xsd."
)

return {"@type": dtype, "@value": value}

# OVERRIDE
@property
def json_ld(self) -> "Dict[str, Any]":
annotations = {
model.relation: self.value_json(model.value)
model.relation: (
apply_datatype(model)
if model.datatype
else detect_datatype(str(model.value))
)
for model in self.annotation_properties
}
datatypes = {
model.relation: self.value_json(model.value)
model.relation: (
apply_datatype(model.value, model.datatype)
if model.datatype
else detect_datatype(str(model.value))
)
for model in self.data_properties
}
objects = {
model.relation: str(model.value)
model.relation: {"@id": model.value}
for model in self.object_properties
}
return {
Expand Down Expand Up @@ -168,7 +161,11 @@ def json_ld(cls) -> Dict[str, Any]:
"qudt": "http://qudt.org/schema/qudt/",
},
"@id": f"{cls.config.prefix_name}:{cls.suffix}",
"@type": str(cls.iri),
"@type": (
[str(iri) for iri in cls.iri]
if isinstance(cls.iri, list)
else str(cls.iri)
),
**cls.unit_json,
**cls.value_json,
}
Expand All @@ -191,25 +188,7 @@ def unit_json(self) -> "Dict[str, Any]":
def value_json(self) -> "Dict[str, Any]":
"""Return json with value definition"""
if self.value:
if is_float(self.value):
dtype = "xsd:float"
value = float(self.value)
elif is_integer(self.value):
dtype = "xsd:integer"
value = int(self.value)
elif is_bool(self.value):
dtype = "xsd:bool"
value = bool(self.value)
elif is_uri(self.value):
dtype = "xsd:anyURI"
value = str(self.value)
else:
raise ValueError(
f"""Datatype not recognized for key
`{self.key}` with value:
`{self.value}`"""
)
value = {self.value_relation: {"@type": dtype, "@value": value}}
value = {self.value_relation: detect_datatype(str(self.value))}
else:
value = {}
return value
Expand All @@ -232,8 +211,12 @@ class PropertyGraph(BasicGraphModel, BasicSuffixModel):
description="""Data or annotation property
for mapping the data value to the individual.""",
)
datatype: Optional[str] = Field(
None, description="XSD Datatype of the value"
value_relation_type: Optional[RelationType] = Field(
None, description="Type of the semantic relation used in the mappings"
)
value_datatype: Optional[str] = Field(
None,
description="In case of an annotation or data property, this field indicates the XSD Datatype of the value",
)

@field_validator("annotation")
Expand Down Expand Up @@ -274,43 +257,14 @@ def json_ld(cls) -> Dict[str, Any]:
@property
def value_json(self) -> "Optional[Dict[str, str]]":
if not isinstance(self.value, type(None)):
if not self.datatype:
if is_integer(self.value):
dtype = "xsd:integer"
value = int(self.value)
elif is_float(self.value):
dtype = "xsd:float"
value = float(self.value)
elif is_bool(self.value):
dtype = "xsd:bool"
value = bool(self.value)
elif is_uri(self.value):
dtype = "xsd:anyURI"
value = str(self.value)
if self.value_relation_type != RelationType.OBJECT_PROPERTY:
if not self.value_datatype:
spec = detect_datatype(str(self.value))
else:
value = str(self.value)
dtype = "xsd:string"
spec = apply_datatype(self.value, self.value_datatype)
response = {self.value_relation: spec}
else:
dtype = f"xsd:{self.datatype}"
if dtype == "xsd:anyURI":
value = str(self.value)
elif dtype == "xsd:bool":
value = bool(self.value)
elif dtype == "xsd:integer":
value = int(self.value)
elif dtype == "xsd:float":
value = float(self.value)
elif dtype == "xsd:string":
value = str(self.value)
else:
raise ValueError(
f"""Datatype not recognized for concept with iri
`{self.iri}` and value:
`{self.value}`"""
)

response = {self.value_relation: {"@type": dtype, "@value": value}}

response = {self.value_relation: {"@id": str(self.value)}}
else:
response = {}
return response
Expand All @@ -321,10 +275,22 @@ def types_json(cls) -> "Dict[str, Any]":
if cls.annotation:
types = {
"@type": [
str(cls.iri),
(
[str(iri) for iri in cls.iri]
if isinstance(cls.iri, list)
else str(cls.iri)
),
cls.annotation,
]
}
else:
types = {"@type": str(cls.iri)}
types = {
"@type": [
(
[str(iri) for iri in cls.iri]
if isinstance(cls.iri, list)
else str(cls.iri)
)
]
}
return types
26 changes: 15 additions & 11 deletions data2rdf/models/mapping.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,11 @@
"""Mapping models for data2rdf"""

from enum import Enum

from typing import List, Optional, Union

from pydantic import AnyUrl, BaseModel, Field, field_validator, model_validator

from .base import BasicConceptMapping, BasicSuffixModel


class RelationType(str, Enum):
"""Relation Type of TBox modellings"""

ANNOTATION_PROPERTY = "annotation_property"
DATA_PROPERTY = "data_property"
OBJECT_PROPERTY = "object_property"
from .base import BasicConceptMapping, BasicSuffixModel, RelationType


class TBoxBaseMapping(BasicConceptMapping):
Expand All @@ -35,6 +27,10 @@ class TBoxBaseMapping(BasicConceptMapping):
..., description="Type of the semantic relation used in the mappings"
)

datatype: Optional[str] = Field(
None, description="XSD Datatype of the targed value"
)


class CustomRelation(BaseModel):
"""Custom relation model"""
Expand All @@ -51,6 +47,9 @@ class CustomRelation(BaseModel):
object_data_type: Optional[str] = Field(
None, description="XSD Data type of the object"
)
relation_type: Optional[RelationType] = Field(
None, description="Type of the semantic relation used in the mappings"
)


class ABoxBaseMapping(BasicConceptMapping, BasicSuffixModel):
Expand Down Expand Up @@ -89,7 +88,12 @@ class ABoxBaseMapping(BasicConceptMapping, BasicSuffixModel):
description="""Data or annotation property
for mapping the data value to the individual.""",
)

value_relation_type: Optional[RelationType] = Field(
None, description="Type of the semantic relation used in the mappings"
)
value_datatype: Optional[str] = Field(
None, description="XSD Datatype of the targed value"
)
unit_relation: Optional[Union[str, AnyUrl]] = Field(
None,
description="""Object property for mapping the IRI
Expand Down
Loading

0 comments on commit e5f2a28

Please sign in to comment.