Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding translation for in-memory RML #85

Merged
merged 2 commits into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.1.5
1.2
6 changes: 5 additions & 1 deletion src/yatter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@ def translate(yarrrml_data, mapping_format=RML_URI):
rml_mapping.extend(generate_database_connections(yarrrml_data, list_initial_sources))
rml_mapping.extend(add_logical_targets(yarrrml_data))
rml_mapping.extend(add_functions(yarrrml_data))
external_refs = []
try:
mappings, mapping_format = get_non_asserted_mappings(yarrrml_data, mapping_format)
for mapping in yarrrml_data.get(YARRRML_MAPPINGS):
if mapping_format == R2RML_URI:
source_list = add_table(yarrrml_data, mapping, list_initial_sources)
else:
source_list = add_source(yarrrml_data, mapping, list_initial_sources)
source_list, external_refs = add_source(yarrrml_data, mapping, list_initial_sources)
subject_list = add_subject(yarrrml_data, mapping, mapping_format)
pred = add_predicate_object_maps(yarrrml_data, mapping, mapping_format)
it = 0
Expand All @@ -37,6 +38,9 @@ def translate(yarrrml_data, mapping_format=RML_URI):
rml_mapping[len(rml_mapping) - 1] = rml_mapping[len(rml_mapping) - 1][:-2]
rml_mapping.append(".\n\n\n")
it = it + 1
external_refs = list(dict.fromkeys(external_refs))
for ref in external_refs:
rml_mapping.append(ref)

logger.info("RML content is created!")
rml_mapping_string = "".join(rml_mapping)
Expand Down
18 changes: 17 additions & 1 deletion src/yatter/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@
VOID_URI = 'http://rdfs.org/ns/void#'
FNML_URI = 'http://semweb.mmlab.be/ns/fnml#'
GREL_URI = 'http://users.ugent.be/~bjdmeest/function/grel.ttl#'
SD_URI = 'https://w3id.org/okn/o/sd/'

RML_PREFIX = '@prefix'
RML_BASE = '@base'
RML_LOGICAL_SOURCE_CLASS = 'rml:LogicalSource'
RML_LOGICAL_SOURCE = 'rml:logicalSource'
RML_SOURCE = 'rml:source'
RML_REFERENCE_FORMULATION = 'rml:referenceFormulation'
RML_REFERENCE_FORMULATION_CLASS = 'rml:ReferenceFormulation'
RML_ITERATOR = 'rml:iterator'
RML_REFERENCE = 'rml:reference'
RML_LANGUAGE_MAP = 'rml:languageMap'
Expand Down Expand Up @@ -103,7 +105,16 @@
D2RQ_USER = 'd2rq:username'
D2RQ_PASS = 'd2rq:password'


##############################################################################
############################# SD CONSTANTS ###########################
##############################################################################
SD_DATASET_SPEC = 'sd:DatasetSpecification'
SD_NAME = 'sd:name'
SD_HAS_DATA_TRANSFORMATION = 'sd:hasDataTransformation'
SD_HAS_SOFTWARE_REQUIREMENTS = 'sd:hasSoftwareRequirements'
SD_HAS_SOURCE_CODE= 'sd:hasSourceCode'
SD_PROGRAMMING_LANGUAGE = 'sd:programmingLanguage'
KG4DI_DEFINED_BY = 'kg4di:definedBy'
##############################################################################
############################# YARRRML CONSTANTS ###########################
##############################################################################
Expand All @@ -122,6 +133,11 @@
YARRRML_USERNAME = 'username'
YARRRML_PASSWORD = 'password'

YARRRML_STRUCTURE_DEFINER = 'structureDefiner'
YARRRML_SOFTWARE_SPECIFICATION = 'softwareSpecification'
YARRRML_PROGRAMMING_LANGUAGE = 'programmingLanguage'
YARRRML_SOFTWARE_REQUIREMENTS = 'softwareRequirements'

YARRRML_MAPPINGS = 'mappings' # used for mappings in conditions and mappings main key
YARRRML_MAPPING = 'mapping'
YARRRRL_MAPPINGS_SHORTCUT = 'm'
Expand Down
2 changes: 1 addition & 1 deletion src/yatter/predicateobject.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ def ref_mapping(data, mapping, om, yarrrml_key, ref_type_property, mapping_forma
else:
if mapping_format == STAR_URI:
object = STAR_OBJECT
source_list = add_source(data, mapping_join, list_initial_sources)
source_list, external_refs = add_source(data, mapping_join, list_initial_sources)

number_joins_rml = len(subject_list) * len(source_list)
for i in range(number_joins_rml):
Expand Down
63 changes: 61 additions & 2 deletions src/yatter/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def add_source(data, mapping, list_initial_sources):
source_template = "\t" + RML_LOGICAL_SOURCE + " [\n\t\ta " + RML_LOGICAL_SOURCE_CLASS + \
";\n\t\t" + RML_SOURCE + " "
final_list = []
external_references_list = []
sources = get_sources(data, mapping)
for source in sources:
db_identifier = mapping
Expand All @@ -40,13 +41,24 @@ def add_source(data, mapping, list_initial_sources):
if YARRRML_ACCESS in source:
if YARRRML_QUERY in source:
final_list.append(source_template + database_source(mapping, source, db_identifier))
elif YARRRML_STRUCTURE_DEFINER in source:
source, external_references = add_in_memory_source(mapping,source)
final_list.append(source_template + source)
if external_references is not None:
external_references_list.append(external_references)
else:
final_list.append(source_template + add_source_full(mapping, source))
elif type(source) is list:
final_list.append(source_template + add_source_simplified(mapping, source))
if "$(" in source[0]:
source, external_references = add_in_memory_source(mapping, source)
final_list.append(source_template + source)
if external_references is not None:
external_references_list.append(external_references)
else:
final_list.append(source_template + add_source_simplified(mapping, source))
else:
raise Exception("ERROR: source " + source + " in mapping " + mapping + " not valid")
return final_list
return final_list, external_references_list


def add_table(data, mapping, list_initial_sources):
Expand Down Expand Up @@ -105,6 +117,53 @@ def add_source_simplified(mapping, source):
+ source[1] + "\";\n\t];\n"
return source_rdf

def add_in_memory_source(mapping, source):
external_reference_formulation = None
if type(source) is list:
source = extend_in_memory(source)
source_rdf = "[\n\t\t\ta " + SD_DATASET_SPEC + ";\n\t\t\t"

access = str(source.get(YARRRML_ACCESS)).replace("$(","").replace(")","")
source_rdf += SD_NAME + " \"" + access + "\";\n"

if YARRRML_SOFTWARE_SPECIFICATION in source:
source_rdf += "\t\t\t" + SD_HAS_DATA_TRANSFORMATION + "[\n\t\t\t\t"

if YARRRML_SOFTWARE_REQUIREMENTS in source.get(YARRRML_SOFTWARE_SPECIFICATION):
software_requirements = str(source.get(YARRRML_SOFTWARE_SPECIFICATION)[YARRRML_SOFTWARE_REQUIREMENTS])
source_rdf += SD_HAS_SOFTWARE_REQUIREMENTS + " \""+ software_requirements +"\";\n\t\t\t\t"

if YARRRML_PROGRAMMING_LANGUAGE in source.get(YARRRML_SOFTWARE_SPECIFICATION):
programming_language = str(source.get(YARRRML_SOFTWARE_SPECIFICATION)[YARRRML_PROGRAMMING_LANGUAGE])
source_rdf += SD_HAS_SOURCE_CODE +"[\n\t\t\t\t\t" + SD_PROGRAMMING_LANGUAGE + " \"" + programming_language + "\";"
source_rdf +="\n\t\t\t\t];\n"

source_rdf += "\t\t\t];\n"
source_rdf += "\t\t];\n"

if YARRRML_ITERATOR in source:
source_rdf += "\t\t" + RML_ITERATOR + " \"" + source.get(YARRRML_ITERATOR) + "\";\n"

if YARRRML_REFERENCE_FORMULATION in source:
reference_formulation = str(source.get(YARRRML_REFERENCE_FORMULATION))
source_rdf += "\t\t" + RML_REFERENCE_FORMULATION + " ql:"+ reference_formulation + ";\n"
external_reference_formulation = "ql:" + reference_formulation + " a " + RML_REFERENCE_FORMULATION_CLASS +";\n"
external_reference_formulation +="\t" + KG4DI_DEFINED_BY +" \""+source.get(YARRRML_STRUCTURE_DEFINER) +"\"."
source_rdf += "\t];\n"



return source_rdf, external_reference_formulation

def extend_in_memory(source):
features = source[0].split("~")
access = features[0]
defined_by = features[1].split("-")[0]
reference_formulation = features[1].split("-")[1]
extended_source = {"access": access, "structureDefiner": defined_by, "referenceFormulation": reference_formulation}
if len(source) == 2:
extended_source["iterator"] = source[1]
return extended_source

def add_source_full(mapping, source):
source_rdf = ""
Expand Down
46 changes: 46 additions & 0 deletions test/rml-in-memory/IMTC001/mapping.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
@prefix insta: <http://instagram.com/data/>.
@prefix rr: <http://www.w3.org/ns/r2rml#>.
@prefix rml: <http://semweb.mmlab.be/ns/rml#>.
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
@prefix xsd: <http://www.w3.org/2001/XMLSchema#>.
@prefix sd: <https://w3id.org/okn/o/sd/>.
@prefix kg4di: <https://w3id.org/kg4di/>.
@prefix ql: <http://semweb.mmlab.be/ns/ql#>.
@base <http://example.com/ns#>.


<df_map_0> a rr:TriplesMap;

rml:logicalSource [
a rml:LogicalSource;
rml:source [
a sd:DatasetSpecification;
sd:name "output_dataframe";
sd:hasDataTransformation[
sd:hasSoftwareRequirements "pandas>=1.1.0";
sd:hasSourceCode[
sd:programmingLanguage "Python3.9";
];
];
];
rml:referenceFormulation ql:DataFrame;
];
rr:subjectMap [
a rr:SubjectMap;
rr:template "http://instagram.com/data/user{Id}";
];
rr:predicateObjectMap [
rr:predicateMap [
a rr:PredicateMap;
rr:constant rdf:type;
];
rr:objectMap [
a rr:ObjectMap;
rr:constant insta:User;
];
].


ql:DataFrame a rml:ReferenceFormulation;
kg4di:definedBy "Pandas".
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ mappings:
sources:
access: $(output_dataframe)
referenceFormulation: DataFrame
structureDefiner: Pandas
softwareSpecification:
programmingLanguage: Python3.9
softwareRequirements: pandas>=1.1.0
s: http://instagram.com/data/user{Id}
structureDefiner: Pandas
softwareSpecification:
programmingLanguage: Python3.9
softwareRequirements: pandas>=1.1.0
s: http://instagram.com/data/user$(Id)
po:
- [a, insta:User]
43 changes: 0 additions & 43 deletions test/rml-in-memory/IMTC001/mappings.rml.ttl

This file was deleted.

26 changes: 26 additions & 0 deletions test/rml-in-memory/IMTC001/test_imtc001.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
__author__ = "Ioannis Dasoulas"
__credits__ = ["Ioannis Dasoulas"]

__license__ = "Apache-2.0"
__maintainer__ = "David Chaves-Fraga"
__email__ = "[email protected]"


import os
from ruamel.yaml import YAML
import yatter
from rdflib.graph import Graph
from rdflib import compare
RML_URI = 'http://semweb.mmlab.be/ns/rml#'


def test_imtc001():
expected_mapping = Graph()
expected_mapping.parse(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'mapping.ttl'), format="ttl")

translated_mapping = Graph()
yaml = YAML(typ='safe', pure=True)
mapping_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'mapping.yml')
translated_mapping.parse(data=yatter.translate(yaml.load(open(mapping_path)), mapping_format=RML_URI), format="ttl")

assert compare.isomorphic(expected_mapping, translated_mapping)
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
@prefix xsd: <http://www.w3.org/2001/XMLSchema#>.
@prefix sd: <https://w3id.org/okn/o/sd/>.
@prefix kg4di: <https://w3id.org/kg4di/definedBy>.
@prefix kg4di: <https://w3id.org/kg4di/>.
@prefix ql: <http://semweb.mmlab.be/ns/ql#>.
@base <http://example.com/ns#>.


<df_map> a rr:TriplesMap;
<df_map_0> a rr:TriplesMap;

rml:logicalSource [
a rml:LogicalSource;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ mappings:
sources:
access: $(output_dataframe)
referenceFormulation: DataFrame
structureDefiner: Pandas
s: http://instagram.com/data/user{Id}
structureDefiner: Pandas
s: http://instagram.com/data/user$(Id)
po:
- [a, insta:User]
- [a, insta:User]
26 changes: 26 additions & 0 deletions test/rml-in-memory/IMTC002/test_imtc002.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
__author__ = "Ioannis Dasoulas"
__credits__ = ["Ioannis Dasoulas"]

__license__ = "Apache-2.0"
__maintainer__ = "David Chaves-Fraga"
__email__ = "[email protected]"


import os
from ruamel.yaml import YAML
import yatter
from rdflib.graph import Graph
from rdflib import compare
RML_URI = 'http://semweb.mmlab.be/ns/rml#'


def test_imtc002():
expected_mapping = Graph()
expected_mapping.parse(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'mapping.ttl'), format="ttl")

translated_mapping = Graph()
yaml = YAML(typ='safe', pure=True)
mapping_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'mapping.yml')
translated_mapping.parse(data=yatter.translate(yaml.load(open(mapping_path)), mapping_format=RML_URI), format="ttl")

assert compare.isomorphic(expected_mapping, translated_mapping)
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
@prefix xsd: <http://www.w3.org/2001/XMLSchema#>.
@prefix sd: <https://w3id.org/okn/o/sd/>.
@prefix kg4di: <https://w3id.org/kg4di/definedBy>.
@prefix kg4di: <https://w3id.org/kg4di/>.
@prefix ql: <http://semweb.mmlab.be/ns/ql#>.
@base <http://example.com/ns#>.


<df_map> a rr:TriplesMap;
<df_map_0> a rr:TriplesMap;

rml:logicalSource [
a rml:LogicalSource;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ mappings:
df_map:
sources:
- [$(output_dataframe)~Pandas-DataFrame]
s: http://instagram.com/data/user{Id}
s: http://instagram.com/data/user$(Id)
po:
- [a, insta:User]
Loading
Loading