Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/ted 1177 #425

Merged
merged 6 commits into from
Jan 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions ted_sws/data_manager/adapters/sparql_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,14 @@ class SPARQLTripleStoreEndpoint(TripleStoreEndpointABC):
def __init__(self, endpoint_url: str, user: str = None, password: str = None, use_post_method: bool = False):
user = user if user else config.AGRAPH_SUPER_USER
password = password if password else config.AGRAPH_SUPER_PASSWORD
self.endpoint = SPARQLClientPool.create_or_reuse_connection(endpoint_url, user, password,
use_post_method=use_post_method)
sparql_wrapper = SPARQLWrapper(endpoint_url)
sparql_wrapper.setCredentials(
user=user,
passwd=password
)
if use_post_method:
sparql_wrapper.setMethod(method=POST)
self.endpoint = sparql_wrapper

def _set_sparql_query(self, sparql_query: str):
"""
Expand Down
18 changes: 11 additions & 7 deletions ted_sws/data_manager/adapters/supra_notice_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
from ted_sws.data_manager.adapters import inject_date_string_fields, remove_date_string_fields
from ted_sws.data_manager.adapters.repository_abc import DailySupraNoticeRepositoryABC

DAILY_SUPRA_NOTICE_ID = "notice_fetched_date"
DAILY_SUPRA_NOTICE_FETCHED_DATE = "notice_fetched_date"
DAILY_SUPRA_NOTICE_CREATED_AT = "created_at"
DAILY_SUPRA_NOTICE_ID = "_id"


class DailySupraNoticeRepository(DailySupraNoticeRepositoryABC):
Expand All @@ -25,7 +26,7 @@ def __init__(self, mongodb_client: MongoClient, database_name: str = None):
daily_supra_notice_db = mongodb_client[self._database_name]
self.collection = daily_supra_notice_db[self._collection_name]
self.collection.create_index(
[(DAILY_SUPRA_NOTICE_ID, ASCENDING)]) # TODO: index creation may bring race condition error.
[(DAILY_SUPRA_NOTICE_FETCHED_DATE, ASCENDING)]) # TODO: index creation may bring race condition error.

def _create_dict_from_daily_supra_notice(self, daily_supra_notice: DailySupraNotice) -> dict:
"""
Expand All @@ -34,9 +35,10 @@ def _create_dict_from_daily_supra_notice(self, daily_supra_notice: DailySupraNot
:return:
"""
daily_supra_notice_dict = daily_supra_notice.dict()
daily_supra_notice_dict[DAILY_SUPRA_NOTICE_ID] = datetime.combine(
daily_supra_notice_dict[DAILY_SUPRA_NOTICE_ID], time())
inject_date_string_fields(data=daily_supra_notice_dict, date_field_name=DAILY_SUPRA_NOTICE_ID)
daily_supra_notice_dict[DAILY_SUPRA_NOTICE_FETCHED_DATE] = datetime.combine(
daily_supra_notice_dict[DAILY_SUPRA_NOTICE_FETCHED_DATE], time())
daily_supra_notice_dict[DAILY_SUPRA_NOTICE_ID] = daily_supra_notice_dict[DAILY_SUPRA_NOTICE_FETCHED_DATE]
inject_date_string_fields(data=daily_supra_notice_dict, date_field_name=DAILY_SUPRA_NOTICE_FETCHED_DATE)
inject_date_string_fields(data=daily_supra_notice_dict, date_field_name=DAILY_SUPRA_NOTICE_CREATED_AT)
return daily_supra_notice_dict

Expand All @@ -47,8 +49,10 @@ def _create_daily_supra_notice_from_dict(self, daily_supra_notice_dict: dict) ->
:return:
"""
if daily_supra_notice_dict is not None:
daily_supra_notice_dict[DAILY_SUPRA_NOTICE_ID] = daily_supra_notice_dict[DAILY_SUPRA_NOTICE_ID].date()
remove_date_string_fields(data=daily_supra_notice_dict, date_field_name=DAILY_SUPRA_NOTICE_ID)
daily_supra_notice_dict.pop(DAILY_SUPRA_NOTICE_ID, None)
daily_supra_notice_dict[DAILY_SUPRA_NOTICE_FETCHED_DATE] = daily_supra_notice_dict[
DAILY_SUPRA_NOTICE_FETCHED_DATE].date()
remove_date_string_fields(data=daily_supra_notice_dict, date_field_name=DAILY_SUPRA_NOTICE_FETCHED_DATE)
remove_date_string_fields(data=daily_supra_notice_dict, date_field_name=DAILY_SUPRA_NOTICE_CREATED_AT)
daily_supra_notice = DailySupraNotice.parse_obj(daily_supra_notice_dict)
return daily_supra_notice
Expand Down
3 changes: 2 additions & 1 deletion ted_sws/notice_validator/resources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
NOTICE_VALIDATOR_RESOURCES_PATH = pathlib.Path(__file__).parent.resolve()
SPARQL_QUERY_TEMPLATES_PATH = NOTICE_VALIDATOR_RESOURCES_PATH / "sparql_query_templates"
NOTICE_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH = SPARQL_QUERY_TEMPLATES_PATH / "check_notice_availability.rq"
NOTICES_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH = SPARQL_QUERY_TEMPLATES_PATH / "check_notices_availability.rq"
NOTICES_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH = SPARQL_QUERY_TEMPLATES_PATH / "check_notices_availability.rq"
GET_NOTICE_URI_SPARQL_QUERY_TEMPLATE_PATH = SPARQL_QUERY_TEMPLATES_PATH / "get_notice_uri.rq"
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
PREFIX epo: <http://data.europa.eu/a4g/ontology#>
select distinct ?s
{
?s a epo:Notice .
?s epo:hasDispatchDate ?o .
}
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import time
from typing import List, Set

from pymongo import MongoClient
from ted_sws.core.model.notice import Notice, NoticeStatus
from ted_sws.core.service.batch_processing import chunks
from ted_sws.data_manager.adapters.notice_repository import NoticeRepository
from ted_sws.data_manager.adapters.sparql_endpoint import SPARQLTripleStoreEndpoint
from ted_sws.data_manager.adapters.sparql_endpoint import SPARQLTripleStoreEndpoint, SPARQLStringEndpoint
from ted_sws.event_manager.services.log import log_notice_error
from ted_sws.notice_validator.resources import NOTICE_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH, \
NOTICES_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH
NOTICES_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH, GET_NOTICE_URI_SPARQL_QUERY_TEMPLATE_PATH

WEBAPI_SPARQL_URL = "https://publications.europa.eu/webapi/rdf/sparql"
WEBAPI_SPARQL_RUN_FORMAT = "application/sparql-results+json"
Expand Down Expand Up @@ -44,13 +44,22 @@ def check_availability_of_notices_in_cellar(notice_uries: List[str], endpoint_ur
return set(result['s'].to_list())


def generate_notice_uri_from_notice_id(notice_id: str) -> str:
def generate_notice_uri_from_notice(notice: Notice) -> str:
"""
This service generates Cellar URI for a notice, determined by notice_id
:param notice_id:
:param notice:
:return:
"""
# TODO: implement notice_uri logic
if notice.distilled_rdf_manifestation and notice.distilled_rdf_manifestation.object_data:
sparql_endpoint = SPARQLStringEndpoint(rdf_content=notice.distilled_rdf_manifestation.object_data)
sparql_query = GET_NOTICE_URI_SPARQL_QUERY_TEMPLATE_PATH.read_text(encoding="utf-8")
notice_uries = sparql_endpoint.with_query(sparql_query=sparql_query).fetch_tabular()["s"].to_list()
if len(notice_uries) == 1:
return notice_uries[0]
else:
log_notice_error(message="Invalid extraction of notice URI from distilled RDF manifestation!",
notice_id=notice.ted_id)

return INVALID_NOTICE_URI


Expand All @@ -63,7 +72,7 @@ def validate_notice_availability_in_cellar(notice: Notice, notice_uri: str = Non
"""
if notice.status in [NoticeStatus.PUBLISHED, NoticeStatus.PUBLICLY_UNAVAILABLE]:
if not notice_uri:
notice_uri = generate_notice_uri_from_notice_id(notice_id=notice.ted_id)
notice_uri = generate_notice_uri_from_notice(notice=notice)
if check_availability_of_notice_in_cellar(notice_uri=notice_uri):
notice.update_status_to(new_status=NoticeStatus.PUBLICLY_AVAILABLE)
else:
Expand All @@ -85,7 +94,7 @@ def validate_notices_availability_in_cellar(notice_statuses: List[NoticeStatus],
selected_notices = notice_repository.get_notices_by_status(notice_status=notice_status)
for selected_notices_chunk in chunks(selected_notices, chunk_size=DEFAULT_NOTICES_BATCH_SIZE):
selected_notices_map = {
generate_notice_uri_from_notice_id(notice_id=notice.ted_id): notice
generate_notice_uri_from_notice(notice=notice): notice
for notice in selected_notices_chunk
}
selected_notices_uries = list(selected_notices_map.keys())
Expand Down
12 changes: 9 additions & 3 deletions tests/e2e/notice_validator/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import pytest

from ted_sws.core.model.manifestation import XMLManifestation
from ted_sws.core.model.manifestation import XMLManifestation, RDFManifestation
from ted_sws.core.model.notice import Notice
from tests import TEST_DATA_PATH


@pytest.fixture
Expand All @@ -25,11 +26,16 @@ def fake_notice_F03_content(fake_repository_path, fake_mapping_suite_F03_id):
notice_content = f.read()
return notice_content

@pytest.fixture
def fake_notice_F03_rdf_content():
return (TEST_DATA_PATH / "rdf_manifestations" / "002705-2021.ttl").read_text(encoding="utf-8")

@pytest.fixture
def fake_notice_F03(fake_notice_F03_content, fake_notice_id):
def fake_notice_F03(fake_notice_F03_content, fake_notice_id, fake_notice_F03_rdf_content):
xml_manifestation = XMLManifestation(object_data=fake_notice_F03_content)
return Notice(ted_id=fake_notice_id, xml_manifestation=xml_manifestation)
notice = Notice(ted_id=fake_notice_id, xml_manifestation=xml_manifestation)
notice._distilled_rdf_manifestation = RDFManifestation(object_data=fake_notice_F03_rdf_content)
return notice


@pytest.fixture
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,21 @@
from ted_sws.core.model.notice import NoticeStatus
from ted_sws.notice_validator.services.check_availability_of_notice_in_cellar import \
check_availability_of_notice_in_cellar, validate_notice_availability_in_cellar, \
check_availability_of_notices_in_cellar, DEFAULT_NOTICES_BATCH_SIZE
check_availability_of_notices_in_cellar, DEFAULT_NOTICES_BATCH_SIZE, generate_notice_uri_from_notice, \
INVALID_NOTICE_URI


def test_generate_notice_uri_from_notice(fake_notice_F03):
notice_uri = generate_notice_uri_from_notice(notice=fake_notice_F03)
assert notice_uri != INVALID_NOTICE_URI


def test_validate_notices_availability_in_cellar(valid_cellar_uri, invalid_cellar_uri):
notice_uries = [valid_cellar_uri] * DEFAULT_NOTICES_BATCH_SIZE + [invalid_cellar_uri]
available_uries = check_availability_of_notices_in_cellar(notice_uries=notice_uries)
assert valid_cellar_uri in available_uries
assert invalid_cellar_uri not in available_uries

def test_check_availability_of_notice_in_cellar(valid_cellar_uri, invalid_cellar_uri):
assert check_availability_of_notice_in_cellar(notice_uri=valid_cellar_uri)
assert not check_availability_of_notice_in_cellar(notice_uri=invalid_cellar_uri)
Expand All @@ -20,10 +32,3 @@ def test_validate_notice_availability_in_cellar(fake_notice_F03, valid_cellar_ur
fake_notice_F03._status = NoticeStatus.PUBLISHED
validate_notice_availability_in_cellar(notice=fake_notice_F03, notice_uri=invalid_cellar_uri)
assert fake_notice_F03.status == NoticeStatus.PUBLICLY_UNAVAILABLE


def test_validate_notices_availability_in_cellar(valid_cellar_uri, invalid_cellar_uri):
notice_uries = [valid_cellar_uri] * DEFAULT_NOTICES_BATCH_SIZE + [invalid_cellar_uri]
available_uries = check_availability_of_notices_in_cellar(notice_uries=notice_uries)
assert valid_cellar_uri in available_uries
assert invalid_cellar_uri not in available_uries
2 changes: 1 addition & 1 deletion tests/test_data/rdf_manifestations/002705-2021.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -1630,7 +1630,7 @@
epo:concernsProcedure <http://data.europa.eu/a4g/resource/Procedure/2021-S-002-002705/58509ec4-99dc-3865-9935-58d7ebd66d29> .

<http://data.europa.eu/a4g/resource/ResultNotice/2021-S-002-002705/58509ec4-99dc-3865-9935-58d7ebd66d29>
a epo:ResultNotice;
a epo:Notice, epo:ResultNotice;;
epo:announcesContract <http://data.europa.eu/a4g/resource/Contract/2021-S-002-002705/0c434fb4-13b2-3f81-9fb1-7ec07b891bd2>;
epo:announcesNoticeAwardInformation <http://data.europa.eu/a4g/resource/NoticeAwardInformation/2021-S-002-002705/58509ec4-99dc-3865-9935-58d7ebd66d29>;
epo:announcesRole <http://data.europa.eu/a4g/resource/WinnerRole/2021-S-002-002705/b9a7c460-794b-36b5-a069-6e9a637315f4>;
Expand Down