Skip to content

Commit

Permalink
Create RelatedLink objects from "related" column
Browse files Browse the repository at this point in the history
ref #958
  • Loading branch information
blms committed Dec 5, 2023
1 parent b4325ef commit 04c8f9b
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 9 deletions.
39 changes: 32 additions & 7 deletions apps/ingest/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ def clean_metadata(metadata):
:rtype: dict
"""
metadata = {key.casefold().replace(' ', '_'): value for key, value in metadata.items()}
fields = [f.name for f in Manifest._meta.get_fields()]
fields = [
*(f.name for f in Manifest._meta.get_fields()),
"related", # used for related external links
]
invalid_keys = []

for key in metadata.keys():
Expand All @@ -33,13 +36,31 @@ def clean_metadata(metadata):
if key not in fields:
invalid_keys.append(key)

# TODO: Update this method to allow all "invalid" keys to populate Manifest.metadata JSONField
for invalid_key in invalid_keys:
metadata.pop(invalid_key)



return metadata

def create_related_links(manifest, related_str):
"""
Create RelatedLink objects from supplied related links string and associate each with supplied
Manifest. String should consist of semicolon-separated URLs.
:param manifest:
:type related_str: iiif.manifest.models.Manifest
:param related_str:
:type related_str: str
:rtype: None
"""
for link in related_str.split(";"):
(format, _) = guess_type(link)
RelatedLink.objects.create(
manifest=manifest,
link=link,
format=format or "text/html", # assume web page if MIME type cannot be determined
data_type="Document", # assume this is not meant for seeAlso
)

def create_manifest(ingest):
"""
Create or update a Manifest from supplied metadata and images.
Expand All @@ -61,7 +82,13 @@ def create_manifest(ingest):
else:
manifest = Manifest.objects.create()
for (key, value) in metadata.items():
setattr(manifest, key, value)
if key == "related":
# add RelatedLinks from metadata spreadsheet key "related"
create_related_links(manifest, value)
else:
# all other keys should exist as fields on Manifest (for now)
setattr(manifest, key, value)
# TODO: if the key doesn't exist on Manifest model, add it to Manifest.metadata
else:
manifest = Manifest()

Expand All @@ -77,9 +104,7 @@ def create_manifest(ingest):
manifest.collections.set(ingest.collections.all())
# Save again once relationship is set
manifest.save()

# if type(ingest, .models.Remote):
if isinstance(ingest, Remote):
else:
RelatedLink(
manifest=manifest,
link=ingest.remote_url,
Expand Down
9 changes: 7 additions & 2 deletions apps/ingest/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from apps.ingest.models import IngestTaskWatcher

from .mail import send_email_on_failure, send_email_on_success
from .services import create_manifest
from .services import create_manifest, create_related_links

# Use `apps.get_model` to avoid circular import error. Because the parameters used to
# create a background task have to be serializable, we can't just pass in the model object.
Expand Down Expand Up @@ -129,7 +129,12 @@ def create_canvases_from_s3_ingest(metadata, ingest_id):
except Manifest.DoesNotExist:
manifest = Manifest.objects.create(pid=pid)
for (key, value) in metadata.items():
setattr(manifest, key, value)
if key == "related":
# add RelatedLinks from metadata spreadsheet key "related"
create_related_links(manifest, value)
else:
# all other keys should exist as fields on Manifest (for now)
setattr(manifest, key, value)
# Image server: set from ingest
ingest = S3Ingest.objects.get(pk=ingest_id)
manifest.image_server = ingest.image_server
Expand Down
17 changes: 17 additions & 0 deletions apps/ingest/tests/test_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,23 @@ def test_it_creates_mainfest_with_metadata_property(self):
assert local.manifest.pid == '808'
assert local.manifest.title == 'Goodie Mob'

def test_create_related_links(self):
metadata = {
'pid': '808',
'related': 'https://github.com/ecds/readux/tree/develop;https://archive.org/download/cherokeehymnbook00boud/cherokeehymnbook00boud.pdf'
}
local = self.mock_local('no_meta_file.zip', metadata=metadata)
local.manifest = create_manifest(local)
related_links = local.manifest.related_links
# should get 2 from metadata, 1 from volume url
assert len(related_links) == 3
# should get github link format as text/html
assert any([link["@id"] == "https://github.com/ecds/readux/tree/develop" for link in related_links])
assert any([link["format"] == "text/html" for link in related_links])
# should get pdf format too
assert any([link["@id"] == "https://archive.org/download/cherokeehymnbook00boud/cherokeehymnbook00boud.pdf" for link in related_links])
assert any([link["format"] == "application/pdf" for link in related_links])

def test_moving_bulk_bundle_to_s3(self):
"""
It should upload Local.bundle_from_bulk to mock S3 by saving it to
Expand Down

0 comments on commit 04c8f9b

Please sign in to comment.