Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/ted 207 #63

Merged
merged 12 commits into from
Apr 18, 2022
1 change: 1 addition & 0 deletions ted_sws/core/model/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class MappingSuite(MappingSuiteComponent):
title: str = "no_title"
version: str = "0.1"
ontology_version: str = "0.0.1"
xsd_version: str = "no_xsd_version"
metadata_constraints: MetadataConstraints
transformation_rule_set: TransformationRuleSet
shacl_test_suites: List[SHACLTestSuite]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import json
import pathlib
from datetime import datetime
import pandas as pd

FORM_NUMBER_FIELD = 'Form number'
LEGAL_BASIS_FIELD = 'Legal Basis'
YEAR_FIELD = 'Year'
NOTICE_TYPE_FIELD = 'Notice type (eForms)'
FORM_TYPE_FIELD = 'Form type(eForms)'
VERSION_FIELD = 'Version'
EPO_VERSION_FIELD = 'EPO version'
XSD_VERSION_FIELD = 'XSD version number(s)'
TITLE_FIELD = 'Title'
IDENTIFIER_FIELD = 'Identifier'

FORM_NUMBER_KEY = "form_number"
LEGAL_BASIS_KEY = "legal_basis"
YEAR_KEY = "year"
NOTICE_TYPE_KEY = "notice_type"
FORM_TYPE_KEY = "form_type"
TITLE_KEY = "title"
CREATED_KEY = "created_at"
IDENTIFIER_KEY = "identifier"
VERSION_KEY = "version"
ONTOLOGY_VERSION_KEY = "ontology_version"
XSD_VERSION_KEY = "xsd_version"
METADATA_CONSTRAINTS_KEY = "metadata_constraints"
CONSTRAINTS_KEY = "constraints"

CONCEPTUAL_MAPPINGS_METADATA_SHEET_NAME = "Metadata"


def generate_metadata(raw_metadata: dict) -> str:
"""
This feature restructures the metadata into a default format.
:param raw_metadata:
:return:
"""
constraints = {FORM_NUMBER_KEY: raw_metadata[FORM_NUMBER_FIELD], LEGAL_BASIS_KEY: raw_metadata[LEGAL_BASIS_FIELD],
YEAR_KEY: raw_metadata[YEAR_FIELD], NOTICE_TYPE_KEY: raw_metadata[NOTICE_TYPE_FIELD],
FORM_TYPE_KEY: raw_metadata[FORM_TYPE_FIELD]}

metadata = {TITLE_KEY: raw_metadata[TITLE_FIELD][0], IDENTIFIER_KEY: raw_metadata[IDENTIFIER_FIELD][0],
CREATED_KEY: datetime.now().isoformat(), VERSION_KEY: raw_metadata[VERSION_FIELD][0],
ONTOLOGY_VERSION_KEY: raw_metadata[EPO_VERSION_FIELD][0],
XSD_VERSION_KEY: raw_metadata[XSD_VERSION_FIELD][0],
METADATA_CONSTRAINTS_KEY: {CONSTRAINTS_KEY: constraints}}
return json.dumps(metadata)


def mapping_suite_processor_generate_metadata(conceptual_mappings_file_path: pathlib.Path,
output_metadata_file_path: pathlib.Path):
"""
This function reads metadata from conceptual_mapping_file and generates metadata for a mapping suite package.
The result is written to the output_metadata_file file.
:param conceptual_mappings_file_path:
:param output_metadata_file_path:
:return:
"""
with open(conceptual_mappings_file_path, 'rb') as excel_file:
conceptual_mappings_metadata_df = pd.read_excel(excel_file, sheet_name=CONCEPTUAL_MAPPINGS_METADATA_SHEET_NAME)
raw_metadata = conceptual_mappings_metadata_df.set_index('Field').T.to_dict('list')
metadata = generate_metadata(raw_metadata=raw_metadata)

with open(output_metadata_file_path, 'w') as metadata_file:
metadata_file.write(metadata)
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import pathlib
from typing import Iterator
import pandas as pd

CONCEPTUAL_MAPPINGS_RULES_SHEET_NAME = "Rules"
RULES_SF_FIELD_ID = 'Standard Form Field ID (M)'
RULES_SF_FIELD_NAME = 'Standard Form Field Name (M)'
RULES_E_FORM_BT_ID = 'eForm BT-ID (O)'
RULES_E_FORM_BT_NAME = 'eForm BT Name (O)'
RULES_BASE_XPATH = 'Base XPath (for anchoring) (M)'
RULES_FIELD_XPATH = 'Field XPath (M)'
RULES_CLASS_PATH = 'Class path (M)'
RULES_PROPERTY_PATH = 'Property path (M)'


def sparql_validation_generator(data: pd.DataFrame) -> Iterator[str]:
"""
This function generates SPARQL queries based on data in the dataframe.
:param data:
:return:
"""
for index, row in data.iterrows():
sf_field_id = row[RULES_SF_FIELD_ID]
sf_field_name = row[RULES_SF_FIELD_NAME]
e_form_bt_id = row[RULES_E_FORM_BT_ID]
e_form_bt_name = row[RULES_E_FORM_BT_NAME]
base_xpath = row[RULES_BASE_XPATH]
field_xpath = row[RULES_FIELD_XPATH]
class_path = row[RULES_CLASS_PATH]
property_path = row[RULES_PROPERTY_PATH]
yield f"#title: {sf_field_id} - {sf_field_name}\n" \
f"#description: “{sf_field_id} - {sf_field_name}” in SF corresponds to “{e_form_bt_id} {e_form_bt_name}” in eForms. The corresponding XML element is {base_xpath}{field_xpath}. The expected ontology instances are epo: {class_path} .\n" \
f"ASK WHERE {{ {property_path} }}"


def mapping_suite_processor_generate_sparql_queries(conceptual_mappings_file_path: pathlib.Path,
output_sparql_queries_folder_path: pathlib.Path):
"""
This function reads data from conceptual_mappings.xlsx and generates SPARQL validation queries in provided package.
:param conceptual_mappings_file_path:
:param output_sparql_queries_folder_path:
:return:
"""
with open(conceptual_mappings_file_path, 'rb') as excel_file:
conceptual_mappings_rules_df = pd.read_excel(excel_file, sheet_name=CONCEPTUAL_MAPPINGS_RULES_SHEET_NAME)
conceptual_mappings_rules_df.columns = conceptual_mappings_rules_df.iloc[0]
conceptual_mappings_rules_df = conceptual_mappings_rules_df[1:]
conceptual_mappings_rules_df = conceptual_mappings_rules_df[
conceptual_mappings_rules_df[RULES_PROPERTY_PATH].notnull()]
sparql_queries = sparql_validation_generator(conceptual_mappings_rules_df)
for index, sparql_query in enumerate(sparql_queries):
output_file_path = output_sparql_queries_folder_path / f"sparql_query_{index}.rq"
with open(output_file_path, "w") as output_file:
output_file.write(sparql_query)
Original file line number Diff line number Diff line change
@@ -1,60 +1,31 @@
import pathlib
from typing import Iterator

import pandas as pd

from ted_sws.data_manager.adapters.mapping_suite_repository import TRANSFORM_PACKAGE_NAME, VALIDATE_PACKAGE_NAME, \
SPARQL_PACKAGE_NAME
SPARQL_PACKAGE_NAME, METADATA_FILE_NAME
from ted_sws.mapping_suite_processor.services.conceptual_mapping_generate_metadata import \
mapping_suite_processor_generate_metadata
from ted_sws.mapping_suite_processor.services.conceptual_mapping_generate_sparql_queries import \
mapping_suite_processor_generate_sparql_queries

CONCEPTUAL_MAPPINGS_FILE_NAME = "conceptual_mappings.xlsx"
CONCEPTUAL_MAPPINGS_RULES_SHEET_NAME = "Rules"
CONCEPTUAL_MAPPINGS_ASSERTIONS = "cm_assertions"
RULES_SF_FIELD_ID = 'Standard Form Field ID (M)'
RULES_SF_FIELD_NAME = 'Standard Form Field Name (M)'
RULES_E_FORM_BT_ID = 'eForm BT-ID (O)'
RULES_E_FORM_BT_NAME = 'eForm BT Name (O)'
RULES_BASE_XPATH = 'Base XPath (for anchoring) (M)'
RULES_FIELD_XPATH = 'Field XPath (M)'
RULES_CLASS_PATH = 'Class path (M)'
RULES_PROPERTY_PATH = 'Property path (M)'


def sparql_validation_generator(data: pd.DataFrame) -> Iterator[str]:
"""
This function generates SPARQL queries based on data in the dataframe.
:param data:
:return:
"""
for index, row in data.iterrows():
sf_field_id = row[RULES_SF_FIELD_ID]
sf_field_name = row[RULES_SF_FIELD_NAME]
e_form_bt_id = row[RULES_E_FORM_BT_ID]
e_form_bt_name = row[RULES_E_FORM_BT_NAME]
base_xpath = row[RULES_BASE_XPATH]
field_xpath = row[RULES_FIELD_XPATH]
class_path = row[RULES_CLASS_PATH]
property_path = row[RULES_PROPERTY_PATH]
yield f"#title: {sf_field_id} - {sf_field_name}\n" \
f"#description: “{sf_field_id} - {sf_field_name}” in SF corresponds to “{e_form_bt_id} {e_form_bt_name}” in eForms. The corresponding XML element is {base_xpath}{field_xpath}. The expected ontology instances are epo: {class_path} .\n" \
f"ASK WHERE {{ {property_path} }}"


def mapping_suite_processor_generate_sparql_queries(mapping_suite_package_path: pathlib.Path):
def mapping_suite_processor_expand_package(mapping_suite_package_path: pathlib.Path):
"""
This function reads data from conceptual_mappings.xlsx and generates SPARQL validation queries in provided package.
This function reads data from conceptual_mappings.xlsx and expand provided package.
:param mapping_suite_package_path:
:return:
"""
conceptual_mappings_file_path = mapping_suite_package_path / TRANSFORM_PACKAGE_NAME / CONCEPTUAL_MAPPINGS_FILE_NAME
with open(conceptual_mappings_file_path, 'rb') as excel_file:
conceptual_mappings_rules_df = pd.read_excel(excel_file, sheet_name=CONCEPTUAL_MAPPINGS_RULES_SHEET_NAME)
conceptual_mappings_rules_df.columns = conceptual_mappings_rules_df.iloc[0]
conceptual_mappings_rules_df = conceptual_mappings_rules_df[1:]
conceptual_mappings_rules_df = conceptual_mappings_rules_df[conceptual_mappings_rules_df[RULES_PROPERTY_PATH].notnull()]
sparql_queries = sparql_validation_generator(conceptual_mappings_rules_df)
for index, sparql_query in enumerate(sparql_queries):
output_file_path = mapping_suite_package_path / VALIDATE_PACKAGE_NAME / SPARQL_PACKAGE_NAME / CONCEPTUAL_MAPPINGS_ASSERTIONS
output_file_path.mkdir(parents=True, exist_ok=True)
output_file_path = output_file_path / f"sparql_query_{index}.rq"
with open(output_file_path, "w") as output_file:
output_file.write(sparql_query)
cm_sparql_folder_path = mapping_suite_package_path / VALIDATE_PACKAGE_NAME / SPARQL_PACKAGE_NAME / CONCEPTUAL_MAPPINGS_ASSERTIONS
metadata_file_path = mapping_suite_package_path / METADATA_FILE_NAME
cm_sparql_folder_path.mkdir(parents=True, exist_ok=True)

mapping_suite_processor_generate_sparql_queries(conceptual_mappings_file_path=conceptual_mappings_file_path,
output_sparql_queries_folder_path=cm_sparql_folder_path
)

mapping_suite_processor_generate_metadata(conceptual_mappings_file_path=conceptual_mappings_file_path,
output_metadata_file_path=metadata_file_path
)
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"identifier": "test_package",
"version": "0.1",
"ontology_version": "3.0.0.alpha",
"xsd_version": "R2.0.9.S05.E01",
"metadata_constraints": {
"constraints": {
"form_number": [
Expand Down
97 changes: 97 additions & 0 deletions tests/unit/mapping_suite_processor/polygon.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import sys\n",
"sys.path.append(\"/mnt/c/Users/capitan/Desktop/WORKS/MEANING/ted-sws\")\n",
"sys.path = list(set(sys.path))\n",
"import os\n",
"os.getcwd()\n",
"os.chdir(\"/mnt/c/Users/capitan/Desktop/WORKS/MEANING/ted-sws\")\n",
"import pandas as pd\n",
"from ted_sws.data_manager.adapters.mapping_suite_repository import TRANSFORM_PACKAGE_NAME\n",
"from ted_sws.mapping_suite_processor.services.conceptual_mapping_processor import CONCEPTUAL_MAPPINGS_FILE_NAME\n",
"from tests import TEST_DATA_PATH"
]
},
{
"cell_type": "code",
"execution_count": 6,
"outputs": [],
"source": [
"conceptual_mappings_file_path = TEST_DATA_PATH / \"notice_transformer\" / \"mapping_suite_processor_repository\" / \"test_package\" / TRANSFORM_PACKAGE_NAME / CONCEPTUAL_MAPPINGS_FILE_NAME"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 9,
"outputs": [],
"source": [
"df = pd.read_excel(conceptual_mappings_file_path,sheet_name=\"Metadata\")"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 10,
"outputs": [
{
"data": {
"text/plain": " Field Value examples\n0 Form number F03\n1 Legal Basis *\n2 Year *\n3 Notice type (eForms) NaN\n4 Form type(eForms) NaN\n5 Version 0.0.1\n6 EPO version 3.0.0.alpha\n7 XSD version number(s) R2.0.9.S05.E01\n8 Title sample_title\n9 Identifier mapping_id",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Field</th>\n <th>Value examples</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Form number</td>\n <td>F03</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Legal Basis</td>\n <td>*</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Year</td>\n <td>*</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Notice type (eForms)</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Form type(eForms)</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>5</th>\n <td>Version</td>\n <td>0.0.1</td>\n </tr>\n <tr>\n <th>6</th>\n <td>EPO version</td>\n <td>3.0.0.alpha</td>\n </tr>\n <tr>\n <th>7</th>\n <td>XSD version number(s)</td>\n <td>R2.0.9.S05.E01</td>\n </tr>\n <tr>\n <th>8</th>\n <td>Title</td>\n <td>sample_title</td>\n </tr>\n <tr>\n <th>9</th>\n <td>Identifier</td>\n <td>mapping_id</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from ted_sws.data_manager.adapters.mapping_suite_repository import MappingSuiteRepositoryInFileSystem
from ted_sws.mapping_suite_processor.services.conceptual_mapping_processor import \
mapping_suite_processor_generate_sparql_queries, CONCEPTUAL_MAPPINGS_ASSERTIONS
from ted_sws.mapping_suite_processor.services.conceptual_mapping_processor import CONCEPTUAL_MAPPINGS_ASSERTIONS, \
mapping_suite_processor_expand_package
from tests import temporary_copy


def test_mapping_suite_processor_generate_sparql_queries(file_system_repository_path):
def test_mapping_suite_processor_expand_package(file_system_repository_path):
mapping_suite_package_path = file_system_repository_path / "test_package"
with temporary_copy(mapping_suite_package_path) as tmp_mapping_suite_package_path:
mapping_suite_processor_generate_sparql_queries(mapping_suite_package_path=tmp_mapping_suite_package_path)
mapping_suite_repository = MappingSuiteRepositoryInFileSystem(repository_path=tmp_mapping_suite_package_path.parent)
mapping_suite_processor_expand_package(mapping_suite_package_path=tmp_mapping_suite_package_path)
mapping_suite_repository = MappingSuiteRepositoryInFileSystem(
repository_path=tmp_mapping_suite_package_path.parent)
mapping_suite = mapping_suite_repository.get(reference="test_package")
assert mapping_suite
assert mapping_suite.sparql_test_suites
Expand All @@ -19,3 +20,12 @@ def test_mapping_suite_processor_generate_sparql_queries(file_system_repository_
for sparql_test_suite in mapping_suite.sparql_test_suites:
if sparql_test_suite.identifier == CONCEPTUAL_MAPPINGS_ASSERTIONS:
assert len(sparql_test_suite.sparql_tests) == 66

assert mapping_suite.metadata_constraints
assert mapping_suite.title == "sample_title"
assert mapping_suite.identifier == "mapping_id"
assert mapping_suite.version == "0.0.1"
assert mapping_suite.ontology_version == "3.0.0.alpha"
assert "F03" in set(mapping_suite.metadata_constraints.constraints["form_number"])
assert "F04" not in set(mapping_suite.metadata_constraints.constraints["form_number"])