Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added a module for converting to/from dicts #126

Merged
merged 7 commits into from
Aug 28, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api_reference/convert/.pages
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
title: "convert"
3 changes: 3 additions & 0 deletions docs/api_reference/convert/convert.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# convert

::: tripper.convert.convert
53 changes: 53 additions & 0 deletions tests/convert/test_convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""Test convertions."""
# pylint: disable=invalid-name
from tripper import Triplestore
from tripper.convert import load_dict, save_dict

ts = Triplestore(backend="rdflib")
EX = ts.bind("ex", "http://example.com/ex#")


config1 = {
"downloadUrl": "http://example.com/somedata.txt",
"mediaType": "application/text",
"anotherField": "More info...",
}

config2 = {
"downloadUrl": "http://example.com/somedata.txt",
"mediaType": "application/text",
"anotherField": "More info...",
"configurations": {
"key1": "val1",
"key2": 2,
"key3": 3.14,
"key4": None,
# "key5": ["a", 1, 2.2, None], # lists are not supported yet...
},
}

# Store dictionaries to triplestore
save_dict(ts, config1, EX.config1)
save_dict(ts, config2, EX.config2)

# Print content of triplestore
# print(ts.serialize())

# Load dictionaries from triplestore
d1 = load_dict(ts, EX.config1)
d2 = load_dict(ts, EX.config2)

# Check that we got back what we stored
assert d1 == config1
assert d2 == config2


# Now, test serialising using recognised_keys
save_dict(ts, config1, EX.config1b, recognised_keys="basic")
save_dict(ts, config2, EX.config2b, recognised_keys="basic")

d1b = load_dict(ts, EX.config1b, recognised_keys="basic")
d2b = load_dict(ts, EX.config2b, recognised_keys="basic")

assert d1b == config1
assert d2b == config2
4 changes: 4 additions & 0 deletions tripper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .literal import Literal
from .namespace import (
DC,
DCAT,
DCTERMS,
DM,
DOAP,
Expand All @@ -29,6 +30,7 @@
__all__ = (
"Literal",
#
"DCAT",
"DC",
"DCTERMS",
"DM",
Expand All @@ -48,4 +50,6 @@
"Triplestore",
"backend_packages",
"Tripper",
#
"__version__",
)
8 changes: 8 additions & 0 deletions tripper/convert/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""Tripper sub-package for converting between RDF and other repetations."""
from .convert import from_dict, load_dict, save_dict

__all__ = [
"from_dict",
"save_dict",
"load_dict",
]
197 changes: 197 additions & 0 deletions tripper/convert/convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
"""Tripper module for converting between RDF and other repetations."""
# pylint: disable=invalid-name,redefined-builtin
from collections.abc import Mapping
from uuid import uuid4

from tripper import DCAT, DCTERMS, EMMO, OWL, RDF, RDFS, Literal, Namespace
from tripper.utils import parse_literal

OTEIO = Namespace("http://emmo.info/oteio#")

BASIC_RECOGNISED_KEYS = {
"downloadUrl": DCAT.downloadUrl,
"mediaType": DCAT.mediaType,
"accessUrl": DCAT.accessUrl,
"accessService": DCAT.accessService,
"license": DCTERMS.license,
"accessRights": DCTERMS.accessRights,
"publisher": DCTERMS.publisher,
"description": DCTERMS.description,
"creator": DCTERMS.creator,
"contributor": DCTERMS.contributor,
"title": DCTERMS.title,
"available": DCTERMS.available,
"bibliographicCitation": DCTERMS.bibliographicCitation,
"conformsTo": DCTERMS.conformsTo,
"created": DCTERMS.created,
"references": DCTERMS.references,
"isReplacedBy": DCTERMS.isReplacedBy,
"requires": DCTERMS.requires,
"label": RDFS.label,
"comment": RDFS.comment,
}


def from_dict(
dct,
iri,
bases=(OTEIO.Dictionary,),
lang="en",
recognised_keys=None,
keep=False,
):
"""Serialise a dict as RDF.

Arguments:
dct: The dict to be saved.
iri: IRI of indicidual that stands for the dict.
bases: Parent class(es) or the dict.
lang: Language to use for keys.
recognised_keys: An optional dict that maps dict keys that
correspond to IRIs of recognised RDF properties.
If set to the special string "basic", the
`BASIC_RECOGNISED_KEYS` module will be used.
keep: Whether to keep the key-value pair representation for
items serialised with recognised_keys. Note that this
will duplicate potential large literal values.

Returns:
List of RDF triples.
"""
if recognised_keys == "basic":
recognised_keys = BASIC_RECOGNISED_KEYS

rdf = []
for base in bases:
rdf.append((iri, RDF.type, base))

for dkey, dvalue in dct.items():
uuid = uuid4()

recognised = recognised_keys and dkey in recognised_keys

if isinstance(dvalue, Mapping):
# Ideally this should be a blank node, but that becomes
# too nested for rdflib. Instead we make the IRI unique
# by embedding the UUID.
value = f"dict_{uuid}"
rdf.extend(
from_dict(
dvalue,
value,
lang=lang,
recognised_keys=recognised_keys,
keep=keep,
)
)
elif dvalue is None:
value = OWL.Nothing
else:
value = parse_literal(dvalue)

if recognised:
rdf.append((iri, recognised_keys[dkey], value))

if not recognised or keep:
uuid = uuid4()
key = f"_:key_{uuid}"
value_indv = f"_:value_{uuid}"
pair = f"_:pair_{uuid}"
rdf.extend(
[
(key, RDF.type, OTEIO.DictionaryKey),
(key, EMMO.hasStringValue, Literal(dkey, lang=lang)),
(value_indv, RDF.type, OTEIO.DictionaryValue),
(value_indv, EMMO.hasValue, value),
(pair, RDF.type, OTEIO.KeyValuePair),
(pair, OTEIO.hasDictionaryKey, key),
(pair, OTEIO.hasDictionaryValue, value_indv),
(iri, OTEIO.hasKeyValuePair, pair),
]
)

return rdf


def save_dict(
ts, dct, iri, bases=(OTEIO.Dictionary,), recognised_keys=None, keep=False
):
"""Save a dict to a triplestore.

Arguments:
dct: The dict to be saved.
iri: IRI of indicidual that stands for the dict.
bases: Parent class(es) or the dict.
lang: Language to use for keys.
CasperWA marked this conversation as resolved.
Show resolved Hide resolved
recognised_keys: An optional dict that maps dict keys that
correspond to IRIs of recognised RDF properties.
If set to the special string "basic", the
`BASIC_RECOGNISED_KEYS` module will be used.
keep: Whether to keep the key-value pair representation for
items serialised with recognised_keys. Note that this
will duplicate potential large literal values.
"""
if "dcat" not in ts.namespaces:
ts.bind("dcat", DCAT)
if "emmo" not in ts.namespaces:
ts.bind("emmo", EMMO)
if "oteio" not in ts.namespaces:
ts.bind("oteio", OTEIO)

ts.add_triples(
from_dict(
dct,
iri,
bases=bases,
recognised_keys=recognised_keys,
keep=keep,
)
)


def load_dict(ts, iri, recognised_keys=None):
"""Serialise a dict as RDF.

Arguments:
ts: Triplestore from which to fetch the dict.
iri: IRI of indicidual that stands for the dict to fetch.
recognised_keys: An optional dict that maps dict keys that
correspond to IRIs of recognised RDF properties.
If set to the special string "basic", the
`BASIC_RECOGNISED_KEYS` module will be used.

Returns:
A dict corresponding to `iri`.
"""
if recognised_keys == "basic":
recognised_keys = BASIC_RECOGNISED_KEYS

dct = {}

for _, _, pair in ts.triples(subject=iri, predicate=OTEIO.hasKeyValuePair):
key_iri = ts.value(pair, OTEIO.hasDictionaryKey)
key = ts.value(key_iri, EMMO.hasStringValue)
value_iri = ts.value(pair, OTEIO.hasDictionaryValue)
value = ts.value(value_iri, EMMO.hasValue)
value_type = ts.value(value, RDF.type)

raw_value = value.value if isinstance(value, Literal) else value

if value_type == OTEIO.Dictionary:
val = load_dict(ts, value, recognised_keys=recognised_keys)
elif value == OWL.Nothing:
val = None
else:
val = raw_value

dct[str(key)] = val

# Recognised IRIs
if recognised_keys:
iris = {v: k for k, v in recognised_keys.items()}
for _, p, o in ts.triples(subject=iri):
key = iris.get(p)
if key and p in iris and key not in dct:
dct[key] = o.value if isinstance(o, Literal) else o

return dct
1 change: 1 addition & 0 deletions tripper/triplestore.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ class Triplestore:
"xsd": XSD,
"owl": OWL,
# "skos": SKOS,
# "dcat": DCAT,
# "dc": DC,
# "dcterms": DCTERMS,
# "foaf": FOAF,
Expand Down
Loading