Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/ted 142 #27

Merged
merged 7 commits into from
Mar 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ted_sws/domain/model/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ class Metadata(PropertyBaseModel):
"""
Unified interface for metadata
"""
class Config:
underscore_attrs_are_private = True


class NormalisedMetadata(Metadata):
Expand Down
3 changes: 0 additions & 3 deletions ted_sws/notice_packager/adapters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,3 @@

# __init__.py

from jinja2 import Environment, PackageLoader

TEMPLATES = Environment(loader=PackageLoader("ted_sws.notice_packager.resources", "templates"))
4 changes: 2 additions & 2 deletions ted_sws/notice_packager/adapters/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@

import abc
import os
from zipfile import ZipFile, ZIP_DEFLATED
from typing import List, Union
from pathlib import Path
from typing import List, Union
from zipfile import ZipFile, ZIP_DEFLATED

ARCHIVE_ZIP_FORMAT = "zip"
ARCHIVE_ZIP_COMPRESSION = ZIP_DEFLATED
Expand Down
20 changes: 11 additions & 9 deletions ted_sws/notice_packager/adapters/template_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,32 @@
This module provides template generators for all needed package templates.
"""

from . import TEMPLATES
from ted_sws.notice_packager.model.metadata import validate_notice_action_type
from typing import Dict
from jinja2 import Environment, PackageLoader

from ted_sws.notice_packager.model.metadata import PackagerMetadata, validate_notice_action_type

TEMPLATES = Environment(loader=PackageLoader("ted_sws.notice_packager.resources", "templates"))


class TemplateGenerator:
@classmethod
def __generate_template(cls, template, data: Dict = None):
template_render = TEMPLATES.get_template(template).render(data)
def __generate_template(cls, template, data: PackagerMetadata = None):
template_render = TEMPLATES.get_template(template).render(data.dict())
return template_render

@classmethod
def mets_xml_dmd_rdf_generator(cls, data: Dict = None):
def mets_xml_dmd_rdf_generator(cls, data: PackagerMetadata = None):
template = 'mets_xml_dmd_rdf.jinja2'
return cls.__generate_template(template, data)

@classmethod
def tmd_rdf_generator(cls, data: Dict = None):
def tmd_rdf_generator(cls, data: PackagerMetadata = None):
template = 'tmd_rdf.jinja2'
return cls.__generate_template(template, data)

@classmethod
def mets2action_mets_xml_generator(cls, data: Dict = None):
action = data["notice"]["action"]["type"]
def mets2action_mets_xml_generator(cls, data: PackagerMetadata = None):
action = data.notice.action.type
validate_notice_action_type(action)

template = 'mets2action_mets_xml.jinja2'
Expand Down
Empty file.
56 changes: 56 additions & 0 deletions ted_sws/notice_packager/entrypoints/bulk_packager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/usr/bin/python3

# notice_packager.py
# Date: 14/03/2022
# Author: Kolea PLESCO
# Email: [email protected]

"""
This module provides functionalities to generate bulk/multiple notice packages for test purposes.
"""

import base64
from pathlib import Path

from ted_sws.metadata_normaliser.services.xml_manifestation_metadata_extractor import XMLManifestationMetadataExtractor
from ted_sws.notice_packager.adapters.archiver import PATH_TYPE
from ted_sws.notice_packager.services.metadata_transformer import MetadataTransformer
from ted_sws.notice_packager.services.notice_packager import create_notice_package
from tests import TEST_DATA_PATH
from tests.fakes.fake_notice import FakeNotice

DEFAULT_OUTPUT_FOLDER: Path = TEST_DATA_PATH / "notice_packager" / "mets_packages" / "pkgs"
DEFAULT_RDF_PATH: Path = TEST_DATA_PATH / "notice_packager" / "templates" / "196390_2016.rdf"
DEFAULT_FILES_COUNT: int = 3000


def generate_packages(files_count: int = DEFAULT_FILES_COUNT, output_folder: PATH_TYPE = DEFAULT_OUTPUT_FOLDER,
rdf_file: PATH_TYPE = DEFAULT_RDF_PATH) -> str:
with open(rdf_file, "r") as f:
rdf_content = f.read()

encoded_rdf_content = base64.b64encode(bytes(rdf_content, 'utf-8'))

output_folder = Path(output_folder)

base_idx = 100000
year = 2022

for i in range(files_count):
doc_id = str(base_idx + i) + "-" + str(year)
notice_id = MetadataTransformer.normalize_value(doc_id)

output_file = output_folder / Path(notice_id + ".zip")

notice = FakeNotice(ted_id=notice_id)
notice_metadata = XMLManifestationMetadataExtractor(
xml_manifestation=notice.xml_manifestation).to_metadata()
notice_metadata.notice_publication_number = doc_id

create_notice_package(
notice_metadata,
rdf_content=encoded_rdf_content,
save_to=output_file
)

return str(output_folder)
21 changes: 8 additions & 13 deletions ted_sws/notice_packager/model/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
"""

import datetime

from ted_sws.domain.model.metadata import Metadata
from typing import List, Dict

from pydantic import validator

from ted_sws.domain.model.metadata import Metadata

WORK_AGENT = "PUBL"
PUBLICATION_FREQUENCY = "OTHER"
Expand Down Expand Up @@ -47,12 +47,7 @@ def validate_notice_action_type(v):
raise ValueError('No such action: %s' % v)


class MetaMetadata(Metadata):
class Config:
underscore_attrs_are_private = True


class NoticeActionMetadata(MetaMetadata):
class NoticeActionMetadata(Metadata):
"""
Notice action metadata
"""
Expand All @@ -65,7 +60,7 @@ def validate_notice_action_type(cls, v):
return v


class NoticeMetadata(MetaMetadata):
class NoticeMetadata(Metadata):
"""
General notice metadata
"""
Expand All @@ -74,7 +69,7 @@ class NoticeMetadata(MetaMetadata):
action: NoticeActionMetadata = NoticeActionMetadata()


class WorkMetadata(MetaMetadata):
class WorkMetadata(Metadata):
"""
What is the minimal input necessary to produce the work metadata,
and the rest is a bunch of constants OR generated values (e.g. date, URI, ...)
Expand All @@ -94,19 +89,19 @@ class WorkMetadata(MetaMetadata):
dataset_has_frequency_publication_frequency: str = PUBLICATION_FREQUENCY


class ExpressionMetadata(MetaMetadata):
class ExpressionMetadata(Metadata):
title: Dict[str, str] = None
uses_language: str = USES_LANGUAGE


class ManifestationMetadata(MetaMetadata):
class ManifestationMetadata(Metadata):
type: str = MANIFESTATION_TYPE
date_publication: str = datetime.datetime.now().strftime('%Y-%m-%d')
distribution_has_status_distribution_status: str = DISTRIBUTION_STATUS
distribution_has_media_type_concept_media_type: str = MEDIA_TYPE


class PackagerMetadata(MetaMetadata):
class PackagerMetadata(Metadata):
notice: NoticeMetadata = NoticeMetadata()
work: WorkMetadata = WorkMetadata()
expression: ExpressionMetadata = ExpressionMetadata()
Expand Down
16 changes: 8 additions & 8 deletions ted_sws/notice_packager/services/metadata_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
This transformed metadata is what adapters expect.
"""

import datetime

from ted_sws.metadata_normaliser.model.metadata import ExtractedMetadata
from ted_sws.notice_packager.model.metadata import PackagerMetadata, ACTION_CREATE, LANGUAGE, REVISION, BASE_WORK, \
BASE_TITLE
from typing import Dict, List
import datetime

NORM_SEP = '_'

Expand All @@ -24,28 +24,28 @@ class MetadataTransformer:
def __init__(self, notice_metadata: ExtractedMetadata):
self.notice_metadata = notice_metadata

def template_metadata(self, action: str = ACTION_CREATE) -> Dict:
def template_metadata(self, action: str = ACTION_CREATE) -> PackagerMetadata:
metadata = self.from_notice_metadata(self.notice_metadata)
metadata['notice']['action']['type'] = action
metadata.notice.action.type = action
return metadata

@classmethod
def __normalize_value(cls, value: str) -> str:
def normalize_value(cls, value: str) -> str:
return value.replace('-', NORM_SEP)

@classmethod
def __year(cls, metadata: PackagerMetadata) -> str:
return metadata.notice.id.split(NORM_SEP)[1]

@classmethod
def from_notice_metadata(cls, notice_metadata: ExtractedMetadata) -> Dict:
def from_notice_metadata(cls, notice_metadata: ExtractedMetadata) -> PackagerMetadata:
_date = datetime.datetime.now()
_revision = REVISION

metadata = PackagerMetadata()

# NOTICE
metadata.notice.id = cls.__normalize_value(notice_metadata.notice_publication_number)
metadata.notice.id = cls.normalize_value(notice_metadata.notice_publication_number)

# WORK
metadata.work.uri = f"{BASE_WORK}{cls.__year(metadata)}/{metadata.notice.id}"
Expand All @@ -59,4 +59,4 @@ def from_notice_metadata(cls, notice_metadata: ExtractedMetadata) -> Dict:
# EXPRESSION
metadata.expression.title = {LANGUAGE: BASE_TITLE + " " + metadata.notice.id}

return metadata.dict()
return metadata
11 changes: 6 additions & 5 deletions ted_sws/notice_packager/services/notice_packager.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,17 @@
import base64
import os.path
from pathlib import Path
from tempfile import TemporaryDirectory, NamedTemporaryFile
from tempfile import TemporaryDirectory
from typing import List, Union

from tests.fakes.fake_notice import FakeNotice
from ted_sws.domain.model.notice import Notice
from ted_sws.metadata_normaliser.model.metadata import ExtractedMetadata
from ted_sws.metadata_normaliser.services.xml_manifestation_metadata_extractor import XMLManifestationMetadataExtractor
from ted_sws.notice_packager.adapters.archiver import ArchiverFactory, ARCHIVE_ZIP_FORMAT, PATH_TYPE
from ted_sws.notice_packager.adapters.template_generator import TemplateGenerator
from ted_sws.notice_packager.model.metadata import ACTION_CREATE
from ted_sws.notice_packager.services.metadata_transformer import MetadataTransformer
from tests.fakes.fake_notice import FakeNotice

ARCHIVE_NAME_FORMAT = "eProcurement_notice_{notice_id}.zip"
FILE_METS_XML_FORMAT = "{notice_id}-0.mets.xml.dmd.rdf"
Expand Down Expand Up @@ -83,16 +83,16 @@ def create_notice_package(in_data: IN_DATA_TYPE, rdf_content: Union[str, bytes]
:param extra_files: additional files paths to be added to archive
:param action:
:param save_to:
:return:
:return: base64 encoded archive or path to archive
"""

notice_metadata: NOTICE_METADATA_TYPE = __validated_in_data(in_data)
archiver = ArchiverFactory.get_archiver(ARCHIVE_ZIP_FORMAT)
metadata_transformer = MetadataTransformer(notice_metadata)
template_metadata = metadata_transformer.template_metadata(action=action)

notice_id = template_metadata['notice']['id']
notice_action = template_metadata['notice']['action']['type']
notice_id = template_metadata.notice.id
notice_action = template_metadata.notice.action.type

tmp_dir = TemporaryDirectory()
tmp_dir_path = Path(tmp_dir.name)
Expand Down Expand Up @@ -130,6 +130,7 @@ def create_notice_package(in_data: IN_DATA_TYPE, rdf_content: Union[str, bytes]
with open(package_path, "rb") as f:
raw_archive_content = f.read()

# TODO: clear out the return
if save_to is None:
archive_content = base64.b64encode(raw_archive_content)
return str(archive_content, 'utf-8')
Expand Down
1 change: 1 addition & 0 deletions tests/test_data/notice_packager/mets_packages/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*
29 changes: 18 additions & 11 deletions tests/unit/notice_packager/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,42 +7,49 @@

""" """

import pytest
import json
from typing import Dict

from tests import TEST_DATA_PATH
import pytest

from ted_sws.metadata_normaliser.model.metadata import ExtractedMetadata
from ted_sws.metadata_normaliser.services.xml_manifestation_metadata_extractor import XMLManifestationMetadataExtractor
from ted_sws.notice_packager.model.metadata import PackagerMetadata, NoticeMetadata, WorkMetadata, ExpressionMetadata, \
ManifestationMetadata
from tests import TEST_DATA_PATH


# template_metadata START


@pytest.fixture()
def template_sample_metadata() -> Dict:
def template_sample_metadata_json() -> Dict:
return json.load((TEST_DATA_PATH / "notice_packager" / "template_metadata.json").open())


@pytest.fixture()
def template_sample_notice(template_sample_metadata) -> Dict:
return template_sample_metadata["notice"]
def template_sample_metadata(template_sample_metadata_json) -> PackagerMetadata:
return PackagerMetadata(**template_sample_metadata_json)


@pytest.fixture()
def template_sample_notice(template_sample_metadata) -> NoticeMetadata:
return template_sample_metadata.notice


@pytest.fixture()
def template_sample_work(template_sample_metadata) -> Dict:
return template_sample_metadata["work"]
def template_sample_work(template_sample_metadata) -> WorkMetadata:
return template_sample_metadata.work


@pytest.fixture()
def template_sample_expression(template_sample_metadata) -> Dict:
return template_sample_metadata["expression"]
def template_sample_expression(template_sample_metadata) -> ExpressionMetadata:
return template_sample_metadata.expression


@pytest.fixture()
def template_sample_manifestation(template_sample_metadata) -> Dict:
return template_sample_metadata["manifestation"]
def template_sample_manifestation(template_sample_metadata) -> ManifestationMetadata:
return template_sample_metadata.manifestation

# template_metadata END

Expand Down
23 changes: 3 additions & 20 deletions tests/unit/notice_packager/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,8 @@
""" """


from ted_sws.notice_packager.model.metadata import PackagerMetadata, NoticeMetadata, WorkMetadata, ExpressionMetadata, \
ManifestationMetadata
from ted_sws.notice_packager.model.metadata import PackagerMetadata


def test_validate_notice(template_sample_notice):
NoticeMetadata(**template_sample_notice)


def test_validate_work_metadata(template_sample_work):
WorkMetadata(**template_sample_work)


def test_validate_expression_metadata(template_sample_expression):
ExpressionMetadata(**template_sample_expression)


def test_validate_manifestation_metadata(template_sample_manifestation):
ManifestationMetadata(**template_sample_manifestation)


def test_validate_packager_metadata(template_sample_metadata):
PackagerMetadata(**template_sample_metadata)
def test_validate_packager_metadata(template_sample_metadata_json):
PackagerMetadata(**template_sample_metadata_json)
Loading