Skip to content

Commit

Permalink
Merge pull request #27 from meaningfy-ws/feature/TED-142
Browse files Browse the repository at this point in the history
  • Loading branch information
kaleanych authored Mar 15, 2022
2 parents 5f1d14b + 51d4e93 commit 0e822ab
Show file tree
Hide file tree
Showing 17 changed files with 147 additions and 80 deletions.
2 changes: 2 additions & 0 deletions ted_sws/domain/model/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ class Metadata(PropertyBaseModel):
"""
Unified interface for metadata
"""
class Config:
underscore_attrs_are_private = True


class NormalisedMetadata(Metadata):
Expand Down
3 changes: 0 additions & 3 deletions ted_sws/notice_packager/adapters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,3 @@

# __init__.py

from jinja2 import Environment, PackageLoader

TEMPLATES = Environment(loader=PackageLoader("ted_sws.notice_packager.resources", "templates"))
4 changes: 2 additions & 2 deletions ted_sws/notice_packager/adapters/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@

import abc
import os
from zipfile import ZipFile, ZIP_DEFLATED
from typing import List, Union
from pathlib import Path
from typing import List, Union
from zipfile import ZipFile, ZIP_DEFLATED

ARCHIVE_ZIP_FORMAT = "zip"
ARCHIVE_ZIP_COMPRESSION = ZIP_DEFLATED
Expand Down
20 changes: 11 additions & 9 deletions ted_sws/notice_packager/adapters/template_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,32 @@
This module provides template generators for all needed package templates.
"""

from . import TEMPLATES
from ted_sws.notice_packager.model.metadata import validate_notice_action_type
from typing import Dict
from jinja2 import Environment, PackageLoader

from ted_sws.notice_packager.model.metadata import PackagerMetadata, validate_notice_action_type

TEMPLATES = Environment(loader=PackageLoader("ted_sws.notice_packager.resources", "templates"))


class TemplateGenerator:
@classmethod
def __generate_template(cls, template, data: Dict = None):
template_render = TEMPLATES.get_template(template).render(data)
def __generate_template(cls, template, data: PackagerMetadata = None):
template_render = TEMPLATES.get_template(template).render(data.dict())
return template_render

@classmethod
def mets_xml_dmd_rdf_generator(cls, data: Dict = None):
def mets_xml_dmd_rdf_generator(cls, data: PackagerMetadata = None):
template = 'mets_xml_dmd_rdf.jinja2'
return cls.__generate_template(template, data)

@classmethod
def tmd_rdf_generator(cls, data: Dict = None):
def tmd_rdf_generator(cls, data: PackagerMetadata = None):
template = 'tmd_rdf.jinja2'
return cls.__generate_template(template, data)

@classmethod
def mets2action_mets_xml_generator(cls, data: Dict = None):
action = data["notice"]["action"]["type"]
def mets2action_mets_xml_generator(cls, data: PackagerMetadata = None):
action = data.notice.action.type
validate_notice_action_type(action)

template = 'mets2action_mets_xml.jinja2'
Expand Down
Empty file.
56 changes: 56 additions & 0 deletions ted_sws/notice_packager/entrypoints/bulk_packager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/usr/bin/python3

# notice_packager.py
# Date: 14/03/2022
# Author: Kolea PLESCO
# Email: [email protected]

"""
This module provides functionalities to generate bulk/multiple notice packages for test purposes.
"""

import base64
from pathlib import Path

from ted_sws.metadata_normaliser.services.xml_manifestation_metadata_extractor import XMLManifestationMetadataExtractor
from ted_sws.notice_packager.adapters.archiver import PATH_TYPE
from ted_sws.notice_packager.services.metadata_transformer import MetadataTransformer
from ted_sws.notice_packager.services.notice_packager import create_notice_package
from tests import TEST_DATA_PATH
from tests.fakes.fake_notice import FakeNotice

DEFAULT_OUTPUT_FOLDER: Path = TEST_DATA_PATH / "notice_packager" / "mets_packages" / "pkgs"
DEFAULT_RDF_PATH: Path = TEST_DATA_PATH / "notice_packager" / "templates" / "196390_2016.rdf"
DEFAULT_FILES_COUNT: int = 3000


def generate_packages(files_count: int = DEFAULT_FILES_COUNT, output_folder: PATH_TYPE = DEFAULT_OUTPUT_FOLDER,
rdf_file: PATH_TYPE = DEFAULT_RDF_PATH) -> str:
with open(rdf_file, "r") as f:
rdf_content = f.read()

encoded_rdf_content = base64.b64encode(bytes(rdf_content, 'utf-8'))

output_folder = Path(output_folder)

base_idx = 100000
year = 2022

for i in range(files_count):
doc_id = str(base_idx + i) + "-" + str(year)
notice_id = MetadataTransformer.normalize_value(doc_id)

output_file = output_folder / Path(notice_id + ".zip")

notice = FakeNotice(ted_id=notice_id)
notice_metadata = XMLManifestationMetadataExtractor(
xml_manifestation=notice.xml_manifestation).to_metadata()
notice_metadata.notice_publication_number = doc_id

create_notice_package(
notice_metadata,
rdf_content=encoded_rdf_content,
save_to=output_file
)

return str(output_folder)
21 changes: 8 additions & 13 deletions ted_sws/notice_packager/model/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
"""

import datetime

from ted_sws.domain.model.metadata import Metadata
from typing import List, Dict

from pydantic import validator

from ted_sws.domain.model.metadata import Metadata

WORK_AGENT = "PUBL"
PUBLICATION_FREQUENCY = "OTHER"
Expand Down Expand Up @@ -47,12 +47,7 @@ def validate_notice_action_type(v):
raise ValueError('No such action: %s' % v)


class MetaMetadata(Metadata):
class Config:
underscore_attrs_are_private = True


class NoticeActionMetadata(MetaMetadata):
class NoticeActionMetadata(Metadata):
"""
Notice action metadata
"""
Expand All @@ -65,7 +60,7 @@ def validate_notice_action_type(cls, v):
return v


class NoticeMetadata(MetaMetadata):
class NoticeMetadata(Metadata):
"""
General notice metadata
"""
Expand All @@ -74,7 +69,7 @@ class NoticeMetadata(MetaMetadata):
action: NoticeActionMetadata = NoticeActionMetadata()


class WorkMetadata(MetaMetadata):
class WorkMetadata(Metadata):
"""
What is the minimal input necessary to produce the work metadata,
and the rest is a bunch of constants OR generated values (e.g. date, URI, ...)
Expand All @@ -94,19 +89,19 @@ class WorkMetadata(MetaMetadata):
dataset_has_frequency_publication_frequency: str = PUBLICATION_FREQUENCY


class ExpressionMetadata(MetaMetadata):
class ExpressionMetadata(Metadata):
title: Dict[str, str] = None
uses_language: str = USES_LANGUAGE


class ManifestationMetadata(MetaMetadata):
class ManifestationMetadata(Metadata):
type: str = MANIFESTATION_TYPE
date_publication: str = datetime.datetime.now().strftime('%Y-%m-%d')
distribution_has_status_distribution_status: str = DISTRIBUTION_STATUS
distribution_has_media_type_concept_media_type: str = MEDIA_TYPE


class PackagerMetadata(MetaMetadata):
class PackagerMetadata(Metadata):
notice: NoticeMetadata = NoticeMetadata()
work: WorkMetadata = WorkMetadata()
expression: ExpressionMetadata = ExpressionMetadata()
Expand Down
16 changes: 8 additions & 8 deletions ted_sws/notice_packager/services/metadata_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
This transformed metadata is what adapters expect.
"""

import datetime

from ted_sws.metadata_normaliser.model.metadata import ExtractedMetadata
from ted_sws.notice_packager.model.metadata import PackagerMetadata, ACTION_CREATE, LANGUAGE, REVISION, BASE_WORK, \
BASE_TITLE
from typing import Dict, List
import datetime

NORM_SEP = '_'

Expand All @@ -24,28 +24,28 @@ class MetadataTransformer:
def __init__(self, notice_metadata: ExtractedMetadata):
self.notice_metadata = notice_metadata

def template_metadata(self, action: str = ACTION_CREATE) -> Dict:
def template_metadata(self, action: str = ACTION_CREATE) -> PackagerMetadata:
metadata = self.from_notice_metadata(self.notice_metadata)
metadata['notice']['action']['type'] = action
metadata.notice.action.type = action
return metadata

@classmethod
def __normalize_value(cls, value: str) -> str:
def normalize_value(cls, value: str) -> str:
return value.replace('-', NORM_SEP)

@classmethod
def __year(cls, metadata: PackagerMetadata) -> str:
return metadata.notice.id.split(NORM_SEP)[1]

@classmethod
def from_notice_metadata(cls, notice_metadata: ExtractedMetadata) -> Dict:
def from_notice_metadata(cls, notice_metadata: ExtractedMetadata) -> PackagerMetadata:
_date = datetime.datetime.now()
_revision = REVISION

metadata = PackagerMetadata()

# NOTICE
metadata.notice.id = cls.__normalize_value(notice_metadata.notice_publication_number)
metadata.notice.id = cls.normalize_value(notice_metadata.notice_publication_number)

# WORK
metadata.work.uri = f"{BASE_WORK}{cls.__year(metadata)}/{metadata.notice.id}"
Expand All @@ -59,4 +59,4 @@ def from_notice_metadata(cls, notice_metadata: ExtractedMetadata) -> Dict:
# EXPRESSION
metadata.expression.title = {LANGUAGE: BASE_TITLE + " " + metadata.notice.id}

return metadata.dict()
return metadata
11 changes: 6 additions & 5 deletions ted_sws/notice_packager/services/notice_packager.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,17 @@
import base64
import os.path
from pathlib import Path
from tempfile import TemporaryDirectory, NamedTemporaryFile
from tempfile import TemporaryDirectory
from typing import List, Union

from tests.fakes.fake_notice import FakeNotice
from ted_sws.domain.model.notice import Notice
from ted_sws.metadata_normaliser.model.metadata import ExtractedMetadata
from ted_sws.metadata_normaliser.services.xml_manifestation_metadata_extractor import XMLManifestationMetadataExtractor
from ted_sws.notice_packager.adapters.archiver import ArchiverFactory, ARCHIVE_ZIP_FORMAT, PATH_TYPE
from ted_sws.notice_packager.adapters.template_generator import TemplateGenerator
from ted_sws.notice_packager.model.metadata import ACTION_CREATE
from ted_sws.notice_packager.services.metadata_transformer import MetadataTransformer
from tests.fakes.fake_notice import FakeNotice

ARCHIVE_NAME_FORMAT = "eProcurement_notice_{notice_id}.zip"
FILE_METS_XML_FORMAT = "{notice_id}-0.mets.xml.dmd.rdf"
Expand Down Expand Up @@ -83,16 +83,16 @@ def create_notice_package(in_data: IN_DATA_TYPE, rdf_content: Union[str, bytes]
:param extra_files: additional files paths to be added to archive
:param action:
:param save_to:
:return:
:return: base64 encoded archive or path to archive
"""

notice_metadata: NOTICE_METADATA_TYPE = __validated_in_data(in_data)
archiver = ArchiverFactory.get_archiver(ARCHIVE_ZIP_FORMAT)
metadata_transformer = MetadataTransformer(notice_metadata)
template_metadata = metadata_transformer.template_metadata(action=action)

notice_id = template_metadata['notice']['id']
notice_action = template_metadata['notice']['action']['type']
notice_id = template_metadata.notice.id
notice_action = template_metadata.notice.action.type

tmp_dir = TemporaryDirectory()
tmp_dir_path = Path(tmp_dir.name)
Expand Down Expand Up @@ -130,6 +130,7 @@ def create_notice_package(in_data: IN_DATA_TYPE, rdf_content: Union[str, bytes]
with open(package_path, "rb") as f:
raw_archive_content = f.read()

# TODO: clear out the return
if save_to is None:
archive_content = base64.b64encode(raw_archive_content)
return str(archive_content, 'utf-8')
Expand Down
1 change: 1 addition & 0 deletions tests/test_data/notice_packager/mets_packages/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*
29 changes: 18 additions & 11 deletions tests/unit/notice_packager/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,42 +7,49 @@

""" """

import pytest
import json
from typing import Dict

from tests import TEST_DATA_PATH
import pytest

from ted_sws.metadata_normaliser.model.metadata import ExtractedMetadata
from ted_sws.metadata_normaliser.services.xml_manifestation_metadata_extractor import XMLManifestationMetadataExtractor
from ted_sws.notice_packager.model.metadata import PackagerMetadata, NoticeMetadata, WorkMetadata, ExpressionMetadata, \
ManifestationMetadata
from tests import TEST_DATA_PATH


# template_metadata START


@pytest.fixture()
def template_sample_metadata() -> Dict:
def template_sample_metadata_json() -> Dict:
return json.load((TEST_DATA_PATH / "notice_packager" / "template_metadata.json").open())


@pytest.fixture()
def template_sample_notice(template_sample_metadata) -> Dict:
return template_sample_metadata["notice"]
def template_sample_metadata(template_sample_metadata_json) -> PackagerMetadata:
return PackagerMetadata(**template_sample_metadata_json)


@pytest.fixture()
def template_sample_notice(template_sample_metadata) -> NoticeMetadata:
return template_sample_metadata.notice


@pytest.fixture()
def template_sample_work(template_sample_metadata) -> Dict:
return template_sample_metadata["work"]
def template_sample_work(template_sample_metadata) -> WorkMetadata:
return template_sample_metadata.work


@pytest.fixture()
def template_sample_expression(template_sample_metadata) -> Dict:
return template_sample_metadata["expression"]
def template_sample_expression(template_sample_metadata) -> ExpressionMetadata:
return template_sample_metadata.expression


@pytest.fixture()
def template_sample_manifestation(template_sample_metadata) -> Dict:
return template_sample_metadata["manifestation"]
def template_sample_manifestation(template_sample_metadata) -> ManifestationMetadata:
return template_sample_metadata.manifestation

# template_metadata END

Expand Down
23 changes: 3 additions & 20 deletions tests/unit/notice_packager/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,8 @@
""" """


from ted_sws.notice_packager.model.metadata import PackagerMetadata, NoticeMetadata, WorkMetadata, ExpressionMetadata, \
ManifestationMetadata
from ted_sws.notice_packager.model.metadata import PackagerMetadata


def test_validate_notice(template_sample_notice):
NoticeMetadata(**template_sample_notice)


def test_validate_work_metadata(template_sample_work):
WorkMetadata(**template_sample_work)


def test_validate_expression_metadata(template_sample_expression):
ExpressionMetadata(**template_sample_expression)


def test_validate_manifestation_metadata(template_sample_manifestation):
ManifestationMetadata(**template_sample_manifestation)


def test_validate_packager_metadata(template_sample_metadata):
PackagerMetadata(**template_sample_metadata)
def test_validate_packager_metadata(template_sample_metadata_json):
PackagerMetadata(**template_sample_metadata_json)
Loading

0 comments on commit 0e822ab

Please sign in to comment.