diff --git a/ted_sws/data_manager/adapters/sparql_endpoint.py b/ted_sws/data_manager/adapters/sparql_endpoint.py index 296dd982a..51adb7341 100644 --- a/ted_sws/data_manager/adapters/sparql_endpoint.py +++ b/ted_sws/data_manager/adapters/sparql_endpoint.py @@ -128,8 +128,14 @@ class SPARQLTripleStoreEndpoint(TripleStoreEndpointABC): def __init__(self, endpoint_url: str, user: str = None, password: str = None, use_post_method: bool = False): user = user if user else config.AGRAPH_SUPER_USER password = password if password else config.AGRAPH_SUPER_PASSWORD - self.endpoint = SPARQLClientPool.create_or_reuse_connection(endpoint_url, user, password, - use_post_method=use_post_method) + sparql_wrapper = SPARQLWrapper(endpoint_url) + sparql_wrapper.setCredentials( + user=user, + passwd=password + ) + if use_post_method: + sparql_wrapper.setMethod(method=POST) + self.endpoint = sparql_wrapper def _set_sparql_query(self, sparql_query: str): """ diff --git a/ted_sws/data_manager/adapters/supra_notice_repository.py b/ted_sws/data_manager/adapters/supra_notice_repository.py index e030ef3c4..8af9cef11 100644 --- a/ted_sws/data_manager/adapters/supra_notice_repository.py +++ b/ted_sws/data_manager/adapters/supra_notice_repository.py @@ -8,8 +8,9 @@ from ted_sws.data_manager.adapters import inject_date_string_fields, remove_date_string_fields from ted_sws.data_manager.adapters.repository_abc import DailySupraNoticeRepositoryABC -DAILY_SUPRA_NOTICE_ID = "notice_fetched_date" +DAILY_SUPRA_NOTICE_FETCHED_DATE = "notice_fetched_date" DAILY_SUPRA_NOTICE_CREATED_AT = "created_at" +DAILY_SUPRA_NOTICE_ID = "_id" class DailySupraNoticeRepository(DailySupraNoticeRepositoryABC): @@ -25,7 +26,7 @@ def __init__(self, mongodb_client: MongoClient, database_name: str = None): daily_supra_notice_db = mongodb_client[self._database_name] self.collection = daily_supra_notice_db[self._collection_name] self.collection.create_index( - [(DAILY_SUPRA_NOTICE_ID, ASCENDING)]) # TODO: index creation may bring race condition error. + [(DAILY_SUPRA_NOTICE_FETCHED_DATE, ASCENDING)]) # TODO: index creation may bring race condition error. def _create_dict_from_daily_supra_notice(self, daily_supra_notice: DailySupraNotice) -> dict: """ @@ -34,9 +35,10 @@ def _create_dict_from_daily_supra_notice(self, daily_supra_notice: DailySupraNot :return: """ daily_supra_notice_dict = daily_supra_notice.dict() - daily_supra_notice_dict[DAILY_SUPRA_NOTICE_ID] = datetime.combine( - daily_supra_notice_dict[DAILY_SUPRA_NOTICE_ID], time()) - inject_date_string_fields(data=daily_supra_notice_dict, date_field_name=DAILY_SUPRA_NOTICE_ID) + daily_supra_notice_dict[DAILY_SUPRA_NOTICE_FETCHED_DATE] = datetime.combine( + daily_supra_notice_dict[DAILY_SUPRA_NOTICE_FETCHED_DATE], time()) + daily_supra_notice_dict[DAILY_SUPRA_NOTICE_ID] = daily_supra_notice_dict[DAILY_SUPRA_NOTICE_FETCHED_DATE] + inject_date_string_fields(data=daily_supra_notice_dict, date_field_name=DAILY_SUPRA_NOTICE_FETCHED_DATE) inject_date_string_fields(data=daily_supra_notice_dict, date_field_name=DAILY_SUPRA_NOTICE_CREATED_AT) return daily_supra_notice_dict @@ -47,8 +49,10 @@ def _create_daily_supra_notice_from_dict(self, daily_supra_notice_dict: dict) -> :return: """ if daily_supra_notice_dict is not None: - daily_supra_notice_dict[DAILY_SUPRA_NOTICE_ID] = daily_supra_notice_dict[DAILY_SUPRA_NOTICE_ID].date() - remove_date_string_fields(data=daily_supra_notice_dict, date_field_name=DAILY_SUPRA_NOTICE_ID) + daily_supra_notice_dict.pop(DAILY_SUPRA_NOTICE_ID, None) + daily_supra_notice_dict[DAILY_SUPRA_NOTICE_FETCHED_DATE] = daily_supra_notice_dict[ + DAILY_SUPRA_NOTICE_FETCHED_DATE].date() + remove_date_string_fields(data=daily_supra_notice_dict, date_field_name=DAILY_SUPRA_NOTICE_FETCHED_DATE) remove_date_string_fields(data=daily_supra_notice_dict, date_field_name=DAILY_SUPRA_NOTICE_CREATED_AT) daily_supra_notice = DailySupraNotice.parse_obj(daily_supra_notice_dict) return daily_supra_notice diff --git a/ted_sws/notice_validator/resources/__init__.py b/ted_sws/notice_validator/resources/__init__.py index c9aea559b..2bf4595d6 100644 --- a/ted_sws/notice_validator/resources/__init__.py +++ b/ted_sws/notice_validator/resources/__init__.py @@ -3,4 +3,5 @@ NOTICE_VALIDATOR_RESOURCES_PATH = pathlib.Path(__file__).parent.resolve() SPARQL_QUERY_TEMPLATES_PATH = NOTICE_VALIDATOR_RESOURCES_PATH / "sparql_query_templates" NOTICE_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH = SPARQL_QUERY_TEMPLATES_PATH / "check_notice_availability.rq" -NOTICES_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH = SPARQL_QUERY_TEMPLATES_PATH / "check_notices_availability.rq" \ No newline at end of file +NOTICES_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH = SPARQL_QUERY_TEMPLATES_PATH / "check_notices_availability.rq" +GET_NOTICE_URI_SPARQL_QUERY_TEMPLATE_PATH = SPARQL_QUERY_TEMPLATES_PATH / "get_notice_uri.rq" \ No newline at end of file diff --git a/ted_sws/notice_validator/resources/sparql_query_templates/get_notice_uri.rq b/ted_sws/notice_validator/resources/sparql_query_templates/get_notice_uri.rq new file mode 100644 index 000000000..8f2da0559 --- /dev/null +++ b/ted_sws/notice_validator/resources/sparql_query_templates/get_notice_uri.rq @@ -0,0 +1,6 @@ +PREFIX epo: +select distinct ?s +{ + ?s a epo:Notice . + ?s epo:hasDispatchDate ?o . +} \ No newline at end of file diff --git a/ted_sws/notice_validator/services/check_availability_of_notice_in_cellar.py b/ted_sws/notice_validator/services/check_availability_of_notice_in_cellar.py index 67a9c4ead..02a89a285 100644 --- a/ted_sws/notice_validator/services/check_availability_of_notice_in_cellar.py +++ b/ted_sws/notice_validator/services/check_availability_of_notice_in_cellar.py @@ -1,13 +1,13 @@ import time from typing import List, Set - from pymongo import MongoClient from ted_sws.core.model.notice import Notice, NoticeStatus from ted_sws.core.service.batch_processing import chunks from ted_sws.data_manager.adapters.notice_repository import NoticeRepository -from ted_sws.data_manager.adapters.sparql_endpoint import SPARQLTripleStoreEndpoint +from ted_sws.data_manager.adapters.sparql_endpoint import SPARQLTripleStoreEndpoint, SPARQLStringEndpoint +from ted_sws.event_manager.services.log import log_notice_error from ted_sws.notice_validator.resources import NOTICE_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH, \ - NOTICES_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH + NOTICES_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH, GET_NOTICE_URI_SPARQL_QUERY_TEMPLATE_PATH WEBAPI_SPARQL_URL = "https://publications.europa.eu/webapi/rdf/sparql" WEBAPI_SPARQL_RUN_FORMAT = "application/sparql-results+json" @@ -44,13 +44,22 @@ def check_availability_of_notices_in_cellar(notice_uries: List[str], endpoint_ur return set(result['s'].to_list()) -def generate_notice_uri_from_notice_id(notice_id: str) -> str: +def generate_notice_uri_from_notice(notice: Notice) -> str: """ This service generates Cellar URI for a notice, determined by notice_id - :param notice_id: + :param notice: :return: """ - # TODO: implement notice_uri logic + if notice.distilled_rdf_manifestation and notice.distilled_rdf_manifestation.object_data: + sparql_endpoint = SPARQLStringEndpoint(rdf_content=notice.distilled_rdf_manifestation.object_data) + sparql_query = GET_NOTICE_URI_SPARQL_QUERY_TEMPLATE_PATH.read_text(encoding="utf-8") + notice_uries = sparql_endpoint.with_query(sparql_query=sparql_query).fetch_tabular()["s"].to_list() + if len(notice_uries) == 1: + return notice_uries[0] + else: + log_notice_error(message="Invalid extraction of notice URI from distilled RDF manifestation!", + notice_id=notice.ted_id) + return INVALID_NOTICE_URI @@ -63,7 +72,7 @@ def validate_notice_availability_in_cellar(notice: Notice, notice_uri: str = Non """ if notice.status in [NoticeStatus.PUBLISHED, NoticeStatus.PUBLICLY_UNAVAILABLE]: if not notice_uri: - notice_uri = generate_notice_uri_from_notice_id(notice_id=notice.ted_id) + notice_uri = generate_notice_uri_from_notice(notice=notice) if check_availability_of_notice_in_cellar(notice_uri=notice_uri): notice.update_status_to(new_status=NoticeStatus.PUBLICLY_AVAILABLE) else: @@ -85,7 +94,7 @@ def validate_notices_availability_in_cellar(notice_statuses: List[NoticeStatus], selected_notices = notice_repository.get_notices_by_status(notice_status=notice_status) for selected_notices_chunk in chunks(selected_notices, chunk_size=DEFAULT_NOTICES_BATCH_SIZE): selected_notices_map = { - generate_notice_uri_from_notice_id(notice_id=notice.ted_id): notice + generate_notice_uri_from_notice(notice=notice): notice for notice in selected_notices_chunk } selected_notices_uries = list(selected_notices_map.keys()) diff --git a/tests/e2e/notice_validator/conftest.py b/tests/e2e/notice_validator/conftest.py index 827eb0270..1857c8984 100644 --- a/tests/e2e/notice_validator/conftest.py +++ b/tests/e2e/notice_validator/conftest.py @@ -1,7 +1,8 @@ import pytest -from ted_sws.core.model.manifestation import XMLManifestation +from ted_sws.core.model.manifestation import XMLManifestation, RDFManifestation from ted_sws.core.model.notice import Notice +from tests import TEST_DATA_PATH @pytest.fixture @@ -25,11 +26,16 @@ def fake_notice_F03_content(fake_repository_path, fake_mapping_suite_F03_id): notice_content = f.read() return notice_content +@pytest.fixture +def fake_notice_F03_rdf_content(): + return (TEST_DATA_PATH / "rdf_manifestations" / "002705-2021.ttl").read_text(encoding="utf-8") @pytest.fixture -def fake_notice_F03(fake_notice_F03_content, fake_notice_id): +def fake_notice_F03(fake_notice_F03_content, fake_notice_id, fake_notice_F03_rdf_content): xml_manifestation = XMLManifestation(object_data=fake_notice_F03_content) - return Notice(ted_id=fake_notice_id, xml_manifestation=xml_manifestation) + notice = Notice(ted_id=fake_notice_id, xml_manifestation=xml_manifestation) + notice._distilled_rdf_manifestation = RDFManifestation(object_data=fake_notice_F03_rdf_content) + return notice @pytest.fixture diff --git a/tests/e2e/notice_validator/test_check_availability_of_notice_in_cellar.py b/tests/e2e/notice_validator/test_check_availability_of_notice_in_cellar.py index 2f2fbce54..cc7c946b7 100644 --- a/tests/e2e/notice_validator/test_check_availability_of_notice_in_cellar.py +++ b/tests/e2e/notice_validator/test_check_availability_of_notice_in_cellar.py @@ -1,9 +1,21 @@ from ted_sws.core.model.notice import NoticeStatus from ted_sws.notice_validator.services.check_availability_of_notice_in_cellar import \ check_availability_of_notice_in_cellar, validate_notice_availability_in_cellar, \ - check_availability_of_notices_in_cellar, DEFAULT_NOTICES_BATCH_SIZE + check_availability_of_notices_in_cellar, DEFAULT_NOTICES_BATCH_SIZE, generate_notice_uri_from_notice, \ + INVALID_NOTICE_URI +def test_generate_notice_uri_from_notice(fake_notice_F03): + notice_uri = generate_notice_uri_from_notice(notice=fake_notice_F03) + assert notice_uri != INVALID_NOTICE_URI + + +def test_validate_notices_availability_in_cellar(valid_cellar_uri, invalid_cellar_uri): + notice_uries = [valid_cellar_uri] * DEFAULT_NOTICES_BATCH_SIZE + [invalid_cellar_uri] + available_uries = check_availability_of_notices_in_cellar(notice_uries=notice_uries) + assert valid_cellar_uri in available_uries + assert invalid_cellar_uri not in available_uries + def test_check_availability_of_notice_in_cellar(valid_cellar_uri, invalid_cellar_uri): assert check_availability_of_notice_in_cellar(notice_uri=valid_cellar_uri) assert not check_availability_of_notice_in_cellar(notice_uri=invalid_cellar_uri) @@ -20,10 +32,3 @@ def test_validate_notice_availability_in_cellar(fake_notice_F03, valid_cellar_ur fake_notice_F03._status = NoticeStatus.PUBLISHED validate_notice_availability_in_cellar(notice=fake_notice_F03, notice_uri=invalid_cellar_uri) assert fake_notice_F03.status == NoticeStatus.PUBLICLY_UNAVAILABLE - - -def test_validate_notices_availability_in_cellar(valid_cellar_uri, invalid_cellar_uri): - notice_uries = [valid_cellar_uri] * DEFAULT_NOTICES_BATCH_SIZE + [invalid_cellar_uri] - available_uries = check_availability_of_notices_in_cellar(notice_uries=notice_uries) - assert valid_cellar_uri in available_uries - assert invalid_cellar_uri not in available_uries diff --git a/tests/test_data/rdf_manifestations/002705-2021.ttl b/tests/test_data/rdf_manifestations/002705-2021.ttl index 79b6369f7..8d8f3fe35 100644 --- a/tests/test_data/rdf_manifestations/002705-2021.ttl +++ b/tests/test_data/rdf_manifestations/002705-2021.ttl @@ -1630,7 +1630,7 @@ epo:concernsProcedure . - a epo:ResultNotice; + a epo:Notice, epo:ResultNotice;; epo:announcesContract ; epo:announcesNoticeAwardInformation ; epo:announcesRole ;