Skip to content

Commit

Permalink
Add Collect creator data from Europeana API (WordPress#5057)
Browse files Browse the repository at this point in the history
* Add Collect creator data from Europeana API (Fixes WordPress#2834)
* Add Collect creator method  data from Europeana API (Fixes WordPress#2834)

Co-authored-by: Krystle Salazar <[email protected]>
Co-authored-by: Olga Bulat <[email protected]>
  • Loading branch information
3 people authored Oct 29, 2024
1 parent 267b437 commit c4fa959
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 0 deletions.
12 changes: 12 additions & 0 deletions catalog/dags/providers/provider_api_scripts/europeana.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def get_record_data(self, data: dict) -> dict | None:
"foreign_identifier": self._get_foreign_identifier(data),
"meta_data": self._get_meta_data_dict(data),
"title": self._get_title(data),
"creator": self._get_creator(data),
"license_info": self._get_license_info(data),
"filetype": self._get_filetype(item_data),
"filesize": self._get_filesize(item_data),
Expand Down Expand Up @@ -157,6 +158,17 @@ def _get_filetype(self, item_data: dict) -> str:
def _get_filesize(self, item_data: dict) -> int:
return item_data.get("ebucoreFileByteSize")

@staticmethod
def _get_creator(data: dict) -> str | None:
creators = data.get("dcCreator", [])
if not creators:
return None
if isinstance(creators, list) and (creator_list := [c for c in creators if c]):
return ", ".join(creator_list)
elif isinstance(creators, str):
return creators
return None

def _get_meta_data_dict(self, data: dict) -> dict:
meta_data = {
"country": data.get("country"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@ def test_record_builder_get_record_data(ingester, record_builder):
"description": "Sello en seco: España artística y monumental.",
}

expected_creator = "http://hispana.mcu.es/lod/oai:bibliotecadigital.jcyl.es:26229#ent2, http://hispana.mcu.es/lod/oai:bibliotecadigital.jcyl.es:26229#ent3, http://hispana.mcu.es/lod/oai:bibliotecadigital.jcyl.es:26229#ent4" # codespell:ignore

assert record_data == {
"foreign_landing_url": (
"http://bibliotecadigital.jcyl.es/i18n/consulta/registro.cmd?" "id=26229"
Expand All @@ -207,6 +209,7 @@ def test_record_builder_get_record_data(ingester, record_builder):
"filesize": 36272,
"filetype": "jpeg",
"thumbnail_url": "https://api.europeana.eu/api/v2/thumbnail-by-url.json?uri=http%3A%2F%2Fbibliotecadigital.jcyl.es%2Fi18n%2Fcatalogo_imagenes%2Fimagen_id.cmd%3FidImagen%3D102620362&type=IMAGE",
"creator": expected_creator,
}


Expand Down Expand Up @@ -284,6 +287,35 @@ def test_get_image_dimensions(item_data, expected, record_builder):
assert record_builder._get_image_dimensions(item_data) == expected


@pytest.mark.parametrize(
"item_data, expected",
[
# Single creator in a list
pytest.param({"dcCreator": ["Chandler"]}, "Chandler", id="single_creator"),
# Multiple creators in a list
pytest.param(
{"dcCreator": ["Chandler", "Joey"]},
"Chandler, Joey",
id="multiple_creators",
),
# dcCreator is a string
pytest.param({"dcCreator": "Chandler"}, "Chandler", id="dcCreator_string"),
# dcCreator is None
pytest.param({"dcCreator": None}, None, id="dcCreator_none"),
# dcCreator is an empty string
pytest.param({"dcCreator": ""}, None, id="dcCreator_empty_string"),
# Empty creator list
pytest.param({"dcCreator": []}, None, id="empty_creator_list"),
# Empty string in creator list
pytest.param({"dcCreator": [""]}, None, id="empty_string_in_list"),
# Missing dcCreator key
pytest.param({}, None, id="no_dcCreator"),
],
)
def test_get_creator(item_data, expected, record_builder):
assert record_builder._get_creator(item_data) == expected


@pytest.mark.parametrize(
"item_data, expected",
[
Expand Down

0 comments on commit c4fa959

Please sign in to comment.