From 3814a13541e5f381d8a65e569a1ece3e28ca6f88 Mon Sep 17 00:00:00 2001 From: Kolea Plesco Date: Sun, 25 Sep 2022 23:31:26 +0300 Subject: [PATCH 1/2] Publication Notice URI --- .../services/metadata_transformer.py | 16 ++++++++++------ tests/unit/notice_packager/conftest.py | 5 +++++ .../notice_packager/test_metadata_transformer.py | 13 ++++++++++++- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/ted_sws/notice_packager/services/metadata_transformer.py b/ted_sws/notice_packager/services/metadata_transformer.py index 216941135..53bd9abd3 100644 --- a/ted_sws/notice_packager/services/metadata_transformer.py +++ b/ted_sws/notice_packager/services/metadata_transformer.py @@ -38,10 +38,6 @@ def normalize_value(cls, value: str) -> str: def denormalize_value(cls, value: str) -> str: return value.replace(NORM_SEP, DENORM_SEP) - @classmethod - def __year(cls, metadata: PackagerMetadata) -> str: - return metadata.notice.id.split(NORM_SEP)[1] - @classmethod def from_notice_metadata(cls, notice_metadata: ExtractedMetadata) -> PackagerMetadata: _date = datetime.datetime.now() @@ -53,11 +49,11 @@ def from_notice_metadata(cls, notice_metadata: ExtractedMetadata) -> PackagerMet metadata.notice.id = cls.normalize_value(notice_metadata.notice_publication_number) # WORK - metadata.work.uri = f"{BASE_WORK}{cls.__year(metadata)}/{metadata.notice.id}" + metadata.work.uri = publication_notice_uri(metadata.notice.id) title_search = [t.title.text for t in notice_metadata.title if t.title.language == LANGUAGE.upper()] if len(title_search) > 0: metadata.work.title = {LANGUAGE: title_search[0]} - metadata.work.date_creation = datetime.datetime\ + metadata.work.date_creation = datetime.datetime \ .strptime(notice_metadata.publication_date, '%Y%m%d').strftime('%Y-%m-%d') metadata.work.dataset_version = _date.strftime('%Y%m%d') + '-' + _revision @@ -65,3 +61,11 @@ def from_notice_metadata(cls, notice_metadata: ExtractedMetadata) -> PackagerMet metadata.expression.title = {LANGUAGE: BASE_TITLE + " " + metadata.notice.id} return metadata + + +def publication_notice_year(notice_id): + return notice_id.split(NORM_SEP)[1] + + +def publication_notice_uri(notice_id): + return f"{BASE_WORK}{publication_notice_year(notice_id)}/{notice_id}" diff --git a/tests/unit/notice_packager/conftest.py b/tests/unit/notice_packager/conftest.py index 805076de3..3e05bd02c 100644 --- a/tests/unit/notice_packager/conftest.py +++ b/tests/unit/notice_packager/conftest.py @@ -89,3 +89,8 @@ def non_existing_rdf_files_path(): @pytest.fixture def invalid_rdf_files_path(): return TEST_DATA_PATH / "notice_packager" / "mets_packages" / "invalid_rdfs" + + +@pytest.fixture +def notice_id(): + return "196390_2016" diff --git a/tests/unit/notice_packager/test_metadata_transformer.py b/tests/unit/notice_packager/test_metadata_transformer.py index b799e2a9b..61522931c 100644 --- a/tests/unit/notice_packager/test_metadata_transformer.py +++ b/tests/unit/notice_packager/test_metadata_transformer.py @@ -8,7 +8,8 @@ """ """ from ted_sws.notice_metadata_processor.model.metadata import ExtractedMetadata -from ted_sws.notice_packager.services.metadata_transformer import MetadataTransformer +from ted_sws.notice_packager.services.metadata_transformer import MetadataTransformer, publication_notice_uri, \ + publication_notice_year def test_notice_metadata(notice_sample_metadata: ExtractedMetadata): @@ -23,3 +24,13 @@ def test_metadata_transformer(notice_sample_metadata: ExtractedMetadata): assert hasattr(template_metadata, "work") assert hasattr(template_metadata, "expression") assert hasattr(template_metadata, "manifestation") + + +def test_publication_notice_year(notice_id): + year = publication_notice_year(notice_id) + assert year == "2016" + + +def test_publication_notice_uri(notice_id): + uri = publication_notice_uri(notice_id) + assert uri == "http://data.europa.eu/a4g/resource/2016/196390_2016" From 63b1786251e6b461aab61ea239a8e188866cceda Mon Sep 17 00:00:00 2001 From: Kolea Plesco Date: Thu, 29 Sep 2022 18:08:52 +0300 Subject: [PATCH 2/2] Changed separator vars names --- .../services/metadata_transformer.py | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/ted_sws/notice_packager/services/metadata_transformer.py b/ted_sws/notice_packager/services/metadata_transformer.py index 53bd9abd3..d1408df2a 100644 --- a/ted_sws/notice_packager/services/metadata_transformer.py +++ b/ted_sws/notice_packager/services/metadata_transformer.py @@ -17,8 +17,11 @@ from ted_sws.notice_packager.model.metadata import PackagerMetadata, ACTION_CREATE, LANGUAGE, REVISION, BASE_WORK, \ BASE_TITLE -NORM_SEP = '_' -DENORM_SEP = '-' +# This is used in pipeline +NORMALIZED_SEPARATOR = '_' + +# This is used in TED API +DENORMALIZED_SEPARATOR = '-' class MetadataTransformer: @@ -32,11 +35,23 @@ def template_metadata(self, action: str = ACTION_CREATE) -> PackagerMetadata: @classmethod def normalize_value(cls, value: str) -> str: - return value.replace(DENORM_SEP, NORM_SEP) + """ + The initial (TED API) separator is replaced with pipeline's one. + This is used when notice comes in from API + :param value: + :return: + """ + return value.replace(DENORMALIZED_SEPARATOR, NORMALIZED_SEPARATOR) @classmethod def denormalize_value(cls, value: str) -> str: - return value.replace(NORM_SEP, DENORM_SEP) + """ + The pipeline's separator is replaced with initial (TED API)'s one. + This is used when notice goes out to API + :param value: + :return: + """ + return value.replace(NORMALIZED_SEPARATOR, DENORMALIZED_SEPARATOR) @classmethod def from_notice_metadata(cls, notice_metadata: ExtractedMetadata) -> PackagerMetadata: @@ -64,7 +79,7 @@ def from_notice_metadata(cls, notice_metadata: ExtractedMetadata) -> PackagerMet def publication_notice_year(notice_id): - return notice_id.split(NORM_SEP)[1] + return notice_id.split(NORMALIZED_SEPARATOR)[1] def publication_notice_uri(notice_id):