Skip to content

Commit

Permalink
Create parent record link from an IsPartOf related DOI reference
Browse files Browse the repository at this point in the history
  • Loading branch information
marksparkza committed Aug 11, 2023
1 parent 97e69b8 commit aa5bd9e
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 11 deletions.
1 change: 1 addition & 0 deletions odp/api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ class RecordAuditModel(AuditModel):
record_metadata: dict[str, Any]
record_collection_id: str
record_schema_id: str
record_parent_id: Optional[str]


class RecordTagAuditModel(AuditModel):
Expand Down
62 changes: 61 additions & 1 deletion odp/api/routers/record.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from datetime import datetime, timezone
from functools import partial
from typing import Any
Expand All @@ -15,7 +16,7 @@
from odp.api.lib.utils import output_published_record_model, output_tag_instance_model
from odp.api.models import (AuditModel, CatalogRecordModel, RecordAuditModel, RecordModel, RecordModelIn, RecordTagAuditModel, TagInstanceModel,
TagInstanceModelIn)
from odp.const import ODPCollectionTag, ODPMetadataSchema, ODPScope
from odp.const import DOI_REGEX, ODPCollectionTag, ODPMetadataSchema, ODPScope
from odp.db import Session
from odp.db.models import (AuditCommand, CatalogRecord, Collection, CollectionTag, PublishedRecord, Record, RecordAudit, RecordTag, RecordTagAudit,
SchemaType, Tag, TagCardinality, TagType, User, VocabularyTerm)
Expand Down Expand Up @@ -79,6 +80,61 @@ def output_catalog_record_model(catalog_record: CatalogRecord) -> CatalogRecordM
)


def get_parent_id(metadata: dict[str, Any], schema_id: ODPMetadataSchema) -> str | None:
"""Return the id of the parent record implied by an IsPartOf related identifier.
The child-parent relationship is only established when both sides have a DOI.
This is supported for the SAEON.DataCite4 and SAEON.ISO19115 metadata schemas.
"""
try:
child_doi = metadata['doi']
except KeyError:
return

if schema_id not in (ODPMetadataSchema.SAEON_DATACITE4, ODPMetadataSchema.SAEON_ISO19115):
return

try:
parent_refs = list(filter(
lambda ref: ref['relationType'] == 'IsPartOf' and ref['relatedIdentifierType'] == 'DOI',
metadata['relatedIdentifiers']
))
except KeyError:
return

if not parent_refs:
return

if len(parent_refs) > 1:
raise HTTPException(
HTTP_422_UNPROCESSABLE_ENTITY,
'Cannot determine parent DOI: found multiple related identifiers with relation IsPartOf and type DOI.',
)

# related DOIs sometimes appear as doi.org links, sometimes as plain DOIs
if match := re.search(DOI_REGEX[1:], parent_refs[0]['relatedIdentifier']):
parent_doi = match.group(0)
parent_record = Session.execute(
select(Record).
where(func.lower(Record.doi) == parent_doi.lower())
).scalar_one_or_none()

if parent_record is None:
raise HTTPException(
HTTP_422_UNPROCESSABLE_ENTITY,
f'Record not found for parent DOI {parent_doi}.',
)

if parent_record.doi.lower() == child_doi.lower():
raise HTTPException(
HTTP_422_UNPROCESSABLE_ENTITY,
'DOI cannot be a parent of itself.',
)

return parent_record.id


def get_validity(metadata: dict[str, Any], schema: JSONSchema) -> Any:
if (result := schema.evaluate(JSON(metadata))).valid:
return result.output('flag')
Expand All @@ -103,6 +159,7 @@ def create_audit_record(
_metadata=record.metadata_,
_collection_id=record.collection_id,
_schema_id=record.schema_id,
_parent_id=record.parent_id,
).save()


Expand Down Expand Up @@ -256,6 +313,7 @@ def _create_record(
doi=record_in.doi,
sid=record_in.sid,
collection_id=record_in.collection_id,
parent_id=get_parent_id(record_in.metadata, record_in.schema_id),
schema_id=record_in.schema_id,
schema_type=SchemaType.metadata,
metadata_=record_in.metadata,
Expand Down Expand Up @@ -385,6 +443,7 @@ def _set_record(
record.doi = record_in.doi
record.sid = record_in.sid
record.collection_id = record_in.collection_id
record.parent_id = get_parent_id(record_in.metadata, record_in.schema_id)
record.schema_id = record_in.schema_id
record.schema_type = SchemaType.metadata
record.metadata_ = record_in.metadata
Expand Down Expand Up @@ -733,6 +792,7 @@ async def get_record_audit_detail(
record_metadata=row.RecordAudit._metadata,
record_collection_id=row.RecordAudit._collection_id,
record_schema_id=row.RecordAudit._schema_id,
record_parent_id=row.RecordAudit._parent_id,
)


Expand Down
43 changes: 35 additions & 8 deletions test/api/test_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
def record_batch():
"""Create and commit a batch of Record instances."""
records = []
for _ in range(randint(3, 5)):
records += [record := RecordFactory(is_child_record=False)]
for n in range(randint(3, 5)):
# ensure record 0 has a DOI; it can be used as a parent record
kwargs = dict(identifiers='doi') if n == 0 else {}
records += [record := RecordFactory(**kwargs)]
RecordTagFactory.create_batch(randint(0, 3), record=record)
CollectionTagFactory.create_batch(randint(0, 3), collection=record.collection)
return records
Expand All @@ -29,24 +31,24 @@ def record_batch():
def record_batch_no_tags():
"""Create and commit a batch of Record instances
without any tag instances."""
return [RecordFactory(is_child_record=False) for _ in range(randint(3, 5))]
return [RecordFactory() for _ in range(randint(3, 5))]


@pytest.fixture
def record_batch_with_ids():
"""Create and commit a batch of Record instances
with both DOIs and SIDs."""
return [RecordFactory(identifiers='both', is_child_record=False) for _ in range(randint(3, 5))]
return [RecordFactory(identifiers='both') for _ in range(randint(3, 5))]


def record_build(collection=None, collection_tags=None, **id):
def record_build(collection=None, collection_tags=None, parent_doi=None, **id):
"""Build and return an uncommitted Record instance.
Referenced collection is however committed."""
record = RecordFactory.build(
**id,
collection=collection or (collection := CollectionFactory()),
collection_id=collection.id,
is_child_record=False,
parent_doi=parent_doi,
)
if collection_tags:
for ct in collection_tags:
Expand Down Expand Up @@ -87,6 +89,11 @@ def is_same_user(request):
return request.param


@pytest.fixture(params=[None, 'doi', 'doi.org'])
def with_parent(request):
return request.param


def new_generic_tag(cardinality, is_keyword_tag=False):
schema_uri = 'https://odp.saeon.ac.za/schema/tag/keyword' if is_keyword_tag else 'https://odp.saeon.ac.za/schema/tag/generic'
return TagFactory(
Expand Down Expand Up @@ -121,6 +128,7 @@ def assert_db_state(records):
assert row.collection_id == records[n].collection_id
assert row.schema_id == records[n].schema_id
assert row.schema_type == records[n].schema_type
assert row.parent_id == records[n].parent_id


def assert_db_tag_state(record_id, *record_tags):
Expand Down Expand Up @@ -155,6 +163,7 @@ def assert_audit_log(command, record):
assert result._metadata == record.metadata_
assert result._collection_id == record.collection_id
assert result._schema_id == record.schema_id
assert result._parent_id == record.parent_id


def assert_no_audit_log():
Expand Down Expand Up @@ -317,7 +326,7 @@ def test_get_record_not_found(api, record_batch, collection_auth):
(True, all_scopes, []),
(True, all_scopes_excluding(ODPScope.RECORD_ADMIN), []),
])
def test_create_record(api, record_batch, admin_route, scopes, collection_tags, collection_auth):
def test_create_record(api, record_batch, admin_route, scopes, collection_tags, collection_auth, with_parent):
route = '/record/admin/' if admin_route else '/record/'

authorized = admin_route and ODPScope.RECORD_ADMIN in scopes or \
Expand All @@ -336,9 +345,17 @@ def test_create_record(api, record_batch, admin_route, scopes, collection_tags,
else:
new_record_collection = None # new collection

if with_parent == 'doi':
parent_doi = record_batch[0].doi.upper() # ensure DOI ref works case-insensitively
elif with_parent == 'doi.org':
parent_doi = f'https://doi.org/{record_batch[0].doi.upper()}'
else:
parent_doi = None

modified_record_batch = record_batch + [record := record_build(
collection=new_record_collection,
collection_tags=collection_tags,
parent_doi=parent_doi,
)]

r = api(scopes, api_client_collections).post(route, json=dict(
Expand All @@ -356,6 +373,7 @@ def test_create_record(api, record_batch, admin_route, scopes, collection_tags,
assert_no_audit_log()
else:
record.id = r.json().get('id')
record.parent_id = record_batch[0].id if record.doi and parent_doi else None
assert_json_record_result(r, r.json(), record)
assert_db_state(modified_record_batch)
assert_audit_log('insert', record)
Expand Down Expand Up @@ -437,7 +455,7 @@ def test_create_record_conflict(api, record_batch_with_ids, is_admin_route, coll
(True, all_scopes, []),
(True, all_scopes_excluding(ODPScope.RECORD_ADMIN), []),
])
def test_update_record(api, record_batch, admin_route, scopes, collection_tags, collection_auth):
def test_update_record(api, record_batch, admin_route, scopes, collection_tags, collection_auth, with_parent):
route = '/record/admin/' if admin_route else '/record/'

authorized = admin_route and ODPScope.RECORD_ADMIN in scopes or \
Expand All @@ -456,12 +474,20 @@ def test_update_record(api, record_batch, admin_route, scopes, collection_tags,
else:
modified_record_collection = None # new collection

if with_parent == 'doi':
parent_doi = record_batch[0].doi.upper() # ensure DOI ref works case-insensitively
elif with_parent == 'doi.org':
parent_doi = f'https://doi.org/{record_batch[0].doi.upper()}'
else:
parent_doi = None

modified_record_batch = record_batch.copy()
modified_record_batch[2] = (record := record_build(
id=record_batch[2].id,
doi=record_batch[2].doi,
collection=modified_record_collection,
collection_tags=collection_tags,
parent_doi=parent_doi,
))

r = api(scopes, api_client_collections).put(route + record.id, json=dict(
Expand All @@ -482,6 +508,7 @@ def test_update_record(api, record_batch, admin_route, scopes, collection_tags,
assert_db_state(record_batch)
assert_no_audit_log()
else:
record.parent_id = record_batch[0].id if record.doi and parent_doi else None
assert_json_record_result(r, r.json(), record)
assert_db_state(modified_record_batch)
assert_audit_log('update', record)
Expand Down
18 changes: 16 additions & 2 deletions test/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,19 @@ def _sanitize_id(val):
return re.sub(r'[^-.:\w]', '_', val)


def create_metadata(record, n):
metadata = {'foo': f'test-{n}'}
if record.doi:
metadata |= {'doi': record.doi}
if record.parent_doi:
metadata |= {"relatedIdentifiers": [{
"relatedIdentifier": record.parent_doi,
"relatedIdentifierType": "DOI",
"relationType": "IsPartOf"
}]}
return metadata


def schema_uri_from_type(schema):
if schema.type == 'metadata':
return choice((
Expand Down Expand Up @@ -224,12 +237,13 @@ class Meta:
class RecordFactory(ODPModelFactory):
class Meta:
model = Record
exclude = ('identifiers', 'is_child_record')
exclude = ('identifiers', 'is_child_record', 'parent_doi')

identifiers = factory.LazyFunction(lambda: choice(('doi', 'sid', 'both')))
doi = factory.LazyAttributeSequence(lambda r, n: f'10.5555/test-{n}' if r.identifiers in ('doi', 'both') else None)
sid = factory.LazyAttributeSequence(lambda r, n: f'test-{n}' if r.doi is None or r.identifiers in ('sid', 'both') else None)
metadata_ = factory.LazyAttributeSequence(lambda r, n: {'doi': r.doi, 'foo': f'test-{n}'} if r.doi else {'foo': f'test-{n}'})
parent_doi = None
metadata_ = factory.LazyAttributeSequence(create_metadata)
validity = {}
collection = factory.SubFactory(CollectionFactory)
schema_id = factory.LazyFunction(lambda: choice(('SAEON.DataCite4', 'SAEON.ISO19115')))
Expand Down

0 comments on commit aa5bd9e

Please sign in to comment.