Skip to content

Commit

Permalink
Merge pull request #280 from OP-TED/feature/TED-559
Browse files Browse the repository at this point in the history
Publication Notice URI
  • Loading branch information
kaleanych authored Sep 30, 2022
2 parents e4652d8 + 63b1786 commit 3791f09
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 11 deletions.
39 changes: 29 additions & 10 deletions ted_sws/notice_packager/services/metadata_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,11 @@
from ted_sws.notice_packager.model.metadata import PackagerMetadata, ACTION_CREATE, LANGUAGE, REVISION, BASE_WORK, \
BASE_TITLE

NORM_SEP = '_'
DENORM_SEP = '-'
# This is used in pipeline
NORMALIZED_SEPARATOR = '_'

# This is used in TED API
DENORMALIZED_SEPARATOR = '-'


class MetadataTransformer:
Expand All @@ -32,15 +35,23 @@ def template_metadata(self, action: str = ACTION_CREATE) -> PackagerMetadata:

@classmethod
def normalize_value(cls, value: str) -> str:
return value.replace(DENORM_SEP, NORM_SEP)
"""
The initial (TED API) separator is replaced with pipeline's one.
This is used when notice comes in from API
:param value:
:return:
"""
return value.replace(DENORMALIZED_SEPARATOR, NORMALIZED_SEPARATOR)

@classmethod
def denormalize_value(cls, value: str) -> str:
return value.replace(NORM_SEP, DENORM_SEP)

@classmethod
def __year(cls, metadata: PackagerMetadata) -> str:
return metadata.notice.id.split(NORM_SEP)[1]
"""
The pipeline's separator is replaced with initial (TED API)'s one.
This is used when notice goes out to API
:param value:
:return:
"""
return value.replace(NORMALIZED_SEPARATOR, DENORMALIZED_SEPARATOR)

@classmethod
def from_notice_metadata(cls, notice_metadata: ExtractedMetadata) -> PackagerMetadata:
Expand All @@ -53,15 +64,23 @@ def from_notice_metadata(cls, notice_metadata: ExtractedMetadata) -> PackagerMet
metadata.notice.id = cls.normalize_value(notice_metadata.notice_publication_number)

# WORK
metadata.work.uri = f"{BASE_WORK}{cls.__year(metadata)}/{metadata.notice.id}"
metadata.work.uri = publication_notice_uri(metadata.notice.id)
title_search = [t.title.text for t in notice_metadata.title if t.title.language == LANGUAGE.upper()]
if len(title_search) > 0:
metadata.work.title = {LANGUAGE: title_search[0]}
metadata.work.date_creation = datetime.datetime\
metadata.work.date_creation = datetime.datetime \
.strptime(notice_metadata.publication_date, '%Y%m%d').strftime('%Y-%m-%d')
metadata.work.dataset_version = _date.strftime('%Y%m%d') + '-' + _revision

# EXPRESSION
metadata.expression.title = {LANGUAGE: BASE_TITLE + " " + metadata.notice.id}

return metadata


def publication_notice_year(notice_id):
return notice_id.split(NORMALIZED_SEPARATOR)[1]


def publication_notice_uri(notice_id):
return f"{BASE_WORK}{publication_notice_year(notice_id)}/{notice_id}"
5 changes: 5 additions & 0 deletions tests/unit/notice_packager/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,8 @@ def non_existing_rdf_files_path():
@pytest.fixture
def invalid_rdf_files_path():
return TEST_DATA_PATH / "notice_packager" / "mets_packages" / "invalid_rdfs"


@pytest.fixture
def notice_id():
return "196390_2016"
13 changes: 12 additions & 1 deletion tests/unit/notice_packager/test_metadata_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
""" """

from ted_sws.notice_metadata_processor.model.metadata import ExtractedMetadata
from ted_sws.notice_packager.services.metadata_transformer import MetadataTransformer
from ted_sws.notice_packager.services.metadata_transformer import MetadataTransformer, publication_notice_uri, \
publication_notice_year


def test_notice_metadata(notice_sample_metadata: ExtractedMetadata):
Expand All @@ -23,3 +24,13 @@ def test_metadata_transformer(notice_sample_metadata: ExtractedMetadata):
assert hasattr(template_metadata, "work")
assert hasattr(template_metadata, "expression")
assert hasattr(template_metadata, "manifestation")


def test_publication_notice_year(notice_id):
year = publication_notice_year(notice_id)
assert year == "2016"


def test_publication_notice_uri(notice_id):
uri = publication_notice_uri(notice_id)
assert uri == "http://data.europa.eu/a4g/resource/2016/196390_2016"

0 comments on commit 3791f09

Please sign in to comment.