Skip to content

Commit

Permalink
feat(xmllib): add migration metadata to resources (DEV-4194) (#1190)
Browse files Browse the repository at this point in the history
  • Loading branch information
Nora-Olivia-Ammann authored Oct 2, 2024
1 parent 99c76ea commit 1784065
Show file tree
Hide file tree
Showing 6 changed files with 217 additions and 2 deletions.
64 changes: 63 additions & 1 deletion src/dsp_tools/xmllib/models/dsp_base_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from lxml import etree

from dsp_tools.models.custom_warnings import DspToolsUserWarning
from dsp_tools.models.exceptions import InputError
from dsp_tools.xmllib.models.migration_metadata import MigrationMetadata
from dsp_tools.xmllib.models.values import ColorValue
from dsp_tools.xmllib.models.values import LinkValue
from dsp_tools.xmllib.models.values import Richtext
Expand All @@ -19,7 +21,7 @@
XML_NAMESPACE_MAP = {None: "https://dasch.swiss/schema", "xsi": "http://www.w3.org/2001/XMLSchema-instance"}
DASCH_SCHEMA = "{https://dasch.swiss/schema}"

LIST_SEPARATOR = "\n -"
LIST_SEPARATOR = "\n - "


@dataclass
Expand All @@ -29,6 +31,7 @@ class AnnotationResource:
annotation_of: str
comments: list[str]
permissions: str = "res-default"
migration_metadata: MigrationMetadata | None = None

def __post_init__(self) -> None:
_check_strings(string_to_check=self.res_id, res_id=self.res_id, field_name="Resource ID")
Expand All @@ -53,6 +56,17 @@ def add_comments(self, comments: list[str]) -> AnnotationResource:
self.comments.extend(comments)
return self

def add_migration_metadata(
self, creation_date: str | None, iri: str | None = None, ark: str | None = None
) -> AnnotationResource:
if self.migration_metadata:
raise InputError(
f"The resource with the ID '{self.res_id}' already contains migration metadata, "
f"no new data can be added."
)
self.migration_metadata = MigrationMetadata(creation_date=creation_date, iri=iri, ark=ark, res_id=self.res_id)
return self

def serialise(self) -> etree._Element:
self.comments = _transform_unexpected_input(self.comments, "comments", self.res_id)
res_ele = self._serialise_resource_element()
Expand All @@ -79,6 +93,7 @@ class RegionResource:
geometry: dict[str, Any]
comments: list[str]
permissions: str = "res-default"
migration_metadata: MigrationMetadata | None = None

def __post_init__(self) -> None:
_check_strings(string_to_check=self.res_id, res_id=self.res_id, field_name="Resource ID")
Expand Down Expand Up @@ -121,6 +136,17 @@ def add_comments(self, comments: list[str]) -> RegionResource:
self.comments.extend(comments)
return self

def add_migration_metadata(
self, creation_date: str | None, iri: str | None = None, ark: str | None = None
) -> RegionResource:
if self.migration_metadata:
raise InputError(
f"The resource with the ID '{self.res_id}' already contains migration metadata, "
f"no new data can be added."
)
self.migration_metadata = MigrationMetadata(creation_date=creation_date, iri=iri, ark=ark, res_id=self.res_id)
return self

def serialise(self) -> etree._Element:
self.comments = _transform_unexpected_input(self.comments, "comments", self.res_id)
res_ele = self._serialise_resource_element()
Expand Down Expand Up @@ -157,6 +183,7 @@ class LinkResource:
link_to: list[str]
comments: list[str]
permissions: str = "res-default"
migration_metadata: MigrationMetadata | None = None

def new(
self, res_id: str, label: str, link_to: list[str], comments: list[str], permissions: str = "res-default"
Expand All @@ -177,6 +204,17 @@ def add_comments(self, comments: list[str]) -> LinkResource:
self.comments.extend(comments)
return self

def add_migration_metadata(
self, creation_date: str | None, iri: str | None = None, ark: str | None = None
) -> LinkResource:
if self.migration_metadata:
raise InputError(
f"The resource with the ID '{self.res_id}' already contains migration metadata, "
f"no new data can be added."
)
self.migration_metadata = MigrationMetadata(creation_date=creation_date, iri=iri, ark=ark, res_id=self.res_id)
return self

def serialise(self) -> etree._Element:
self._check_for_and_convert_unexpected_input()
res_ele = self._serialise_resource_element()
Expand Down Expand Up @@ -236,6 +274,7 @@ class VideoSegmentResource:
keywords: list[str] = field(default_factory=list)
relates_to: list[str] = field(default_factory=list)
permissions: str = "res-default"
migration_metadata: MigrationMetadata | None = None

def new(
self,
Expand Down Expand Up @@ -294,6 +333,17 @@ def add_relates_to_multiple(self, relates_to: list[str]) -> VideoSegmentResource
self.relates_to.extend(relates_to)
return self

def add_migration_metadata(
self, creation_date: str | None, iri: str | None = None, ark: str | None = None
) -> VideoSegmentResource:
if self.migration_metadata:
raise InputError(
f"The resource with the ID '{self.res_id}' already contains migration metadata, "
f"no new data can be added."
)
self.migration_metadata = MigrationMetadata(creation_date=creation_date, iri=iri, ark=ark, res_id=self.res_id)
return self

def serialise(self) -> etree._Element:
self._check_for_and_convert_unexpected_input()
res_ele = self._serialise_resource_element()
Expand Down Expand Up @@ -326,6 +376,7 @@ class AudioSegmentResource:
keywords: list[str] = field(default_factory=list)
relates_to: list[str] = field(default_factory=list)
permissions: str = "res-default"
migration_metadata: MigrationMetadata | None = None

def new(
self,
Expand Down Expand Up @@ -384,6 +435,17 @@ def add_relates_to_multiple(self, relates_to: list[str]) -> AudioSegmentResource
self.relates_to.extend(relates_to)
return self

def add_migration_metadata(
self, creation_date: str | None, iri: str | None = None, ark: str | None = None
) -> AudioSegmentResource:
if self.migration_metadata:
raise InputError(
f"The resource with the ID '{self.res_id}' already contains migration metadata, "
f"no new data can be added."
)
self.migration_metadata = MigrationMetadata(creation_date=creation_date, iri=iri, ark=ark, res_id=self.res_id)
return self

def serialise(self) -> etree._Element:
self._check_for_and_convert_unexpected_input()
res_ele = self._serialise_resource_element()
Expand Down
51 changes: 51 additions & 0 deletions src/dsp_tools/xmllib/models/migration_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import warnings
from dataclasses import dataclass

from typing_extensions import deprecated

from dsp_tools.models.custom_warnings import DspToolsUserWarning
from dsp_tools.xmllib.value_checkers import is_dsp_ark
from dsp_tools.xmllib.value_checkers import is_dsp_iri
from dsp_tools.xmllib.value_checkers import is_timestamp

LIST_SEPARATOR = "\n - "


@dataclass
class MigrationMetadata:
creation_date: str | None
iri: str | None
ark: str | None
res_id: str

@deprecated("This is for salsah migration only and will be deleted in future releases.")
def __post_init__(self) -> None:
msg_list = []
if self.creation_date and not is_timestamp(self.creation_date):
msg_list.append(f"The value for creation date is not a valid timestamp: {self.creation_date}")
if self.iri and not is_dsp_iri(self.iri):
msg_list.append(f"The provided IRI is not valid: {self.iri}")
if self.ark and not is_dsp_ark(self.ark):
msg_list.append(f"The provided ARK is not valid: {self.ark}")
if msg_list:
msg = (
f"The migration metadata of the resource with the ID '{self.res_id}' has the following problem(s):"
f"{LIST_SEPARATOR}{LIST_SEPARATOR.join(msg_list)}"
)
warnings.warn(DspToolsUserWarning(msg))

def as_attrib(self) -> dict[str, str]:
attrib_dict = {}
if self.creation_date:
attrib_dict["creation_date"] = self.creation_date
if self.iri:
attrib_dict["iri"] = self.iri
if self.ark:
attrib_dict["ark"] = self.ark
if not attrib_dict:
msg = (
f"The metadata of the resource with the ID '{self.res_id}' does not contain any values. "
f"Please check if an error occurred."
)
warnings.warn(DspToolsUserWarning(msg))
return attrib_dict
19 changes: 18 additions & 1 deletion src/dsp_tools/xmllib/models/resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from dsp_tools.xmllib.models.file_values import AbstractFileValue
from dsp_tools.xmllib.models.file_values import FileValue
from dsp_tools.xmllib.models.file_values import IIIFUri
from dsp_tools.xmllib.models.migration_metadata import MigrationMetadata
from dsp_tools.xmllib.models.values import BooleanValue
from dsp_tools.xmllib.models.values import ColorValue
from dsp_tools.xmllib.models.values import DateValue
Expand All @@ -32,7 +33,7 @@
XML_NAMESPACE_MAP = {None: "https://dasch.swiss/schema", "xsi": "http://www.w3.org/2001/XMLSchema-instance"}
DASCH_SCHEMA = "{https://dasch.swiss/schema}"

LIST_SEPARATOR = "\n -"
LIST_SEPARATOR = "\n - "


@dataclass
Expand All @@ -43,6 +44,7 @@ class Resource:
values: list[Value] = field(default_factory=list)
permissions: str = "res-default"
file_value: AbstractFileValue | None = None
migration_metadata: MigrationMetadata | None = None

def __post_init__(self) -> None:
msg = []
Expand Down Expand Up @@ -396,3 +398,18 @@ def add_iiif_uri(self, iiif_uri: str, permissions: str | None = None, comment: s
)
self.file_value = IIIFUri(iiif_uri, permissions, comment, self.res_id)
return self

#######################
# Migration Metadata
#######################

def add_migration_metadata(
self, creation_date: str | None, iri: str | None = None, ark: str | None = None
) -> Resource:
if self.migration_metadata:
raise InputError(
f"The resource with the ID '{self.res_id}' already contains migration metadata, "
f"no new data can be added."
)
self.migration_metadata = MigrationMetadata(creation_date=creation_date, iri=iri, ark=ark, res_id=self.res_id)
return self
10 changes: 10 additions & 0 deletions src/dsp_tools/xmllib/value_checkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,13 @@ def find_geometry_problem(value: Any) -> str:
except (json.JSONDecodeError, TypeError, IndexError, KeyError, AssertionError):
msg += f"\n'{value}' is not a valid JSON geometry object."
return msg


def is_dsp_iri(value: Any) -> bool:
"""Checks if a value is a valid internal dsp IRI"""
return bool(regex.search(r"^http://rdfh\.ch/\d{4}/", str(value)))


def is_dsp_ark(value: Any) -> bool:
"""Checks if a value is a valid ARK"""
return bool(regex.search(r"^ark:/", str(value)))
53 changes: 53 additions & 0 deletions test/unittests/xmllib/test_migration_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import warnings
from warnings import WarningMessage

import pytest

from dsp_tools.models.custom_warnings import DspToolsUserWarning
from dsp_tools.xmllib.models.migration_metadata import MigrationMetadata


def test_migration_metadata_creation_date_good() -> None:
with warnings.catch_warnings(record=True) as caught_warnings:
MigrationMetadata("2019-01-09T15:45:54.502951Z", None, None, "id")
assert len(caught_warnings) == 1
assert isinstance(caught_warnings[0], WarningMessage)


def test_migration_metadata_iri_good() -> None:
with warnings.catch_warnings(record=True) as caught_warnings:
MigrationMetadata(None, "http://rdfh.ch/4123/TqAnYQzrSzC2ctT06OJMYB", None, "id")
assert len(caught_warnings) == 1
assert isinstance(caught_warnings[0], WarningMessage)


def test_migration_metadata_ark_good() -> None:
with warnings.catch_warnings(record=True) as caught_warnings:
MigrationMetadata(None, None, "ark:/72163/4123-43xc6ivb931-a.2022829", "id")
assert len(caught_warnings) == 1
assert isinstance(caught_warnings[0], WarningMessage)


def test_migration_metadata_creation_date_warns() -> None:
with pytest.warns(DspToolsUserWarning):
MigrationMetadata("2019-01-054.502951Z", None, None, "id")


def test_migration_metadata_iri_warns() -> None:
with pytest.warns(DspToolsUserWarning):
MigrationMetadata(None, "http:123/TqAnYQzrSzC2ctT06OJMYB", None, "id")


def test_migration_metadata_ark_warns() -> None:
with pytest.warns(DspToolsUserWarning):
MigrationMetadata(None, None, "163/4123-43xc6ivb931-a.2022829", "id")


def test_migration_metadata_as_attrib_empty() -> None:
with pytest.warns(DspToolsUserWarning):
result = MigrationMetadata(None, None, None, "id").as_attrib()
assert not result


if __name__ == "__main__":
pytest.main([__file__])
22 changes: 22 additions & 0 deletions test/unittests/xmllib/test_value_checkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from dsp_tools.xmllib.value_checkers import is_color
from dsp_tools.xmllib.value_checkers import is_date
from dsp_tools.xmllib.value_checkers import is_decimal
from dsp_tools.xmllib.value_checkers import is_dsp_ark
from dsp_tools.xmllib.value_checkers import is_dsp_iri
from dsp_tools.xmllib.value_checkers import is_geoname
from dsp_tools.xmllib.value_checkers import is_integer
from dsp_tools.xmllib.value_checkers import is_string_like
Expand Down Expand Up @@ -157,5 +159,25 @@ def test_is_geometry_wrong(val: Any) -> None:
assert find_geometry_problem(val)


@pytest.mark.parametrize("val", ["http://rdfh.ch/4123/DiAmYQzQSzC7cdTo6OJMYA"])
def test_is_dsp_iri_correct(val: Any) -> None:
assert is_dsp_iri(val)


@pytest.mark.parametrize("val", ["http://www.example.org/prefix1/", "ark:/72163/4123-43xc6ivb931-a.2022829"])
def test_is_dsp_iri_wrong(val: Any) -> None:
assert not is_dsp_iri(val)


@pytest.mark.parametrize("val", ["ark:/72163/4123-43xc6ivb931-a.2022829"])
def test_is_dsp_ark_correct(val: Any) -> None:
assert is_dsp_ark(val)


@pytest.mark.parametrize("val", ["http://www.example.org/prefix1/", "http://rdfh.ch/4123/DiAmYQzQSzC7cdTo6OJMYA"])
def test_is_dsp_ark_wrong(val: Any) -> None:
assert not is_dsp_ark(val)


if __name__ == "__main__":
pytest.main([__file__])

0 comments on commit 1784065

Please sign in to comment.