Skip to content

Commit

Permalink
integrating doi with metax
Browse files Browse the repository at this point in the history
  • Loading branch information
blankdots committed Mar 7, 2022
1 parent a0390ed commit e67d7d1
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 93 deletions.
19 changes: 15 additions & 4 deletions metadata_backend/api/handlers/folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,13 @@ def _prepare_doi_update(self, folder: Dict) -> Tuple[Dict, List]:
"doi": _study_doi,
"prefix": _study_doi.split("/")[0],
"suffix": _study_doi.split("/")[1],
"types": _study["types"],
# "url": _study["url"],
"types": {
"bibtex": "misc",
"citeproc": "collection",
"schemaOrg": "Collection",
"resourceTypeGeneral": "Collection",
},
"url": _study["url"],
"identifiers": [_study["identifier"]],
},
"id": _study_doi,
Expand All @@ -98,8 +103,14 @@ def _prepare_doi_update(self, folder: Dict) -> Tuple[Dict, List]:
"doi": _doi,
"prefix": _doi.split("/")[0],
"suffix": _doi.split("/")[1],
"types": ds["types"],
# "url": ds["url"],
"types": {
"ris": "DATA",
"bibtex": "misc",
"citeproc": "dataset",
"schemaOrg": "Dataset",
"resourceTypeGeneral": "Dataset",
},
"url": ds["url"],
"identifiers": [ds["identifier"]],
},
"id": _doi,
Expand Down
90 changes: 29 additions & 61 deletions metadata_backend/api/handlers/object.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,44 +19,6 @@
class ObjectAPIHandler(RESTAPIHandler):
"""API Handler for Objects."""

async def _draft_doi(self, schema_type: str) -> Dict:
"""Create draft DOI for study and dataset.
The Draft DOI will be created only on POST and the data added to the
folder. Any update of this should not be possible.
:param schema_type: schema can be either study or dataset
:returns: Dict with DOI of the study or dataset as well as the types.
"""
doi_ops = DOIHandler()
_doi_data = await doi_ops.create_draft(prefix=schema_type)

LOG.debug(f"doi created with doi: {_doi_data['fullDOI']}")

data = {
"identifier": {
"identifierType": "DOI",
"doi": _doi_data["fullDOI"],
}
}
if schema_type == "study":
data["types"] = {
"bibtex": "misc",
"citeproc": "collection",
"schemaOrg": "Collection",
"resourceTypeGeneral": "Collection",
}
elif schema_type == "dataset":
data["types"] = {
"ris": "DATA",
"bibtex": "misc",
"citeproc": "dataset",
"schemaOrg": "Dataset",
"resourceTypeGeneral": "Dataset",
}

return data

async def _handle_query(self, req: Request) -> Response:
"""Handle query results.
Expand Down Expand Up @@ -227,12 +189,6 @@ async def post_object(self, req: Request) -> Response:
patch = self._prepare_folder_patch_new_object(collection, ids, patch_params)
await folder_op.update_folder(folder_id, patch, schema_type)

# we don't create DOIs for drafts and we restrict doi creation to
# study and datasets
if not req.path.startswith("/drafts") and schema_type in _allowed_doi:
doi_patch = await self._prepare_folder_patch_doi(schema_type, ids)
await folder_op.update_folder(folder_id, doi_patch)

body = ujson.dumps(data, escape_forward_slashes=False)

return web.Response(
Expand Down Expand Up @@ -536,13 +492,15 @@ async def _update_metax_dataset(self, req: Request, collection: str, accession_i
"""
metax_service = MetaxServiceHandler(req)
operator = Operator(req.app["db_client"])

object_data, _ = await operator.read_metadata_object(collection, accession_id)
# MYPY related if statement, Operator (when not XMLOperator) always returns object_data as dict
if isinstance(object_data, Dict):
LOG.info("Updating draft dataset to Metax.")
metax_id = await metax_service.update_draft_dataset(collection, object_data)
else:
raise ValueError("Object's data must be dictionary")

return metax_id

async def _delete_metax_dataset(self, req: Request, metax_id: str) -> None:
Expand All @@ -555,33 +513,43 @@ async def _delete_metax_dataset(self, req: Request, metax_id: str) -> None:
metax_service = MetaxServiceHandler(req)
await metax_service.delete_draft_dataset(metax_id)

# TODO: to be replaced with real doi fetching
async def create_doi(self) -> str:
"""Temporary function for random DOI creation.
async def _draft_doi(self, schema_type: str) -> str:
"""Create draft DOI for study and dataset.
The Draft DOI will be created only on POST and the data added to the
folder. Any update of this should not be possible.
:returns: Temporary DOI string
:param schema_type: schema can be either study or dataset
:returns: Dict with DOI of the study or dataset as well as the types.
"""
from uuid import uuid4
doi_ops = DOIHandler()
_doi_data = await doi_ops.create_draft(prefix=schema_type)

rand = str(uuid4()).split("-")[1:3]
return f"10.{rand[0]}/{rand[1]}"
LOG.debug(f"doi created with doi: {_doi_data['fullDOI']}")

async def _prepare_folder_patch_doi(self, schema: str, ids: List) -> List:
return _doi_data["fullDOI"]

def _prepare_folder_patch_doi(self, schema: str, doi: str, url: str) -> List:
"""Prepare patch operation for updating object's doi information in a folder.
:param schema: schema of object to be updated
:param ids: object IDs
:returns: dict with patch operation
"""
patch = []
for id in ids:
_data = await self._draft_doi(schema)
_data["accessionId"] = id["accessionId"]
if schema == "study":
patch_op = {"op": "add", "path": "/extraInfo/studyIdentifier", "value": _data}
patch.append(patch_op)
elif schema == "dataset":
patch_op = {"op": "add", "path": "/extraInfo/datasetIdentifiers/-", "value": _data}
patch.append(patch_op)

data = {
"identifier": {
"identifierType": "DOI",
"doi": doi,
},
"url": url,
}
if schema == "study":
patch_op = {"op": "add", "path": "/extraInfo/studyIdentifier", "value": data}
patch.append(patch_op)
elif schema == "dataset":
patch_op = {"op": "add", "path": "/extraInfo/datasetIdentifiers/-", "value": data}
patch.append(patch_op)

return patch
10 changes: 0 additions & 10 deletions metadata_backend/helpers/schemas/folders.json
Original file line number Diff line number Diff line change
Expand Up @@ -941,7 +941,6 @@
"studyIdentifier": {
"type": "object",
"required": [
"accessionId",
"identifier",
"url"
],
Expand All @@ -964,10 +963,6 @@
}
}
},
"accessionId": {
"type": "string",
"title": "Internal accessionId for the study"
},
"url": {
"type": "string",
"title": "URL of the digital location of the object"
Expand All @@ -983,7 +978,6 @@
"items": {
"type": "object",
"required": [
"accessionId",
"identifier",
"url"
],
Expand All @@ -1006,10 +1000,6 @@
}
}
},
"accessionId": {
"type": "string",
"title": "Internal accessionid for the dataset"
},
"url": {
"type": "string",
"title": "URL of the digital location of the object"
Expand Down
66 changes: 48 additions & 18 deletions tests/integration/run_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,13 +840,14 @@ async def test_getting_all_objects_from_schema_works(sess, folder_id):
await asyncio.gather(*[delete_object(sess, "sample", accession_id) for accession_id, _ in files])


async def test_metax_crud(sess, folder_id):
async def test_metax_crud(sess, metax_folder):
"""Test Metax service with study and dataset POST, PATCH, PUBLISH and DELETE reqs.
:param sess: HTTP session in which request call is made
:param folder_id: id of the folder where objects reside
"""
# POST to object endpoint creates draft dataset in Metax for Study and Dataset
folder_id = await post_folder(sess, metax_folder)
ids = []
xml_files = set()
for schema, filename, update_filename in {
Expand All @@ -857,15 +858,6 @@ async def test_metax_crud(sess, folder_id):
xml_files.add((schema, accession_id, update_filename))
ids.append([schema, accession_id])

json_files = set()
for schema, filename, update_filename in {
("study", "SRP000539.json", "patch.json"),
("dataset", "dataset.json", "dataset_patch.json"),
}:
accession_id = await post_object_json(sess, schema, folder_id, filename)
json_files.add((schema, accession_id, filename, update_filename))
ids.append([schema, accession_id])

for object in ids:
schema, accession_id = object
async with sess.get(f"{objects_url}/{schema}/{accession_id}") as resp:
Expand All @@ -887,10 +879,6 @@ async def test_metax_crud(sess, folder_id):
# PUT and PATCH to object endpoint updates draft dataset in Metax for Study and Dataset
for schema, accession_id, filename in xml_files:
await put_object_xml(sess, schema, accession_id, filename)
for schema, accession_id, filename, _ in json_files:
await put_object_json(sess, schema, accession_id, filename)
for schema, accession_id, _, filename in json_files:
await patch_object_json(sess, schema, accession_id, filename)

for _, _, metax_id in ids:
async with sess.get(f"{metax_url}/{metax_id}") as metax_resp:
Expand All @@ -903,11 +891,45 @@ async def test_metax_crud(sess, folder_id):
# DELETE object from Metax
for schema, accession_id, _ in xml_files:
await delete_object(sess, schema, accession_id)
for schema, accession_id, _, _ in json_files:
await delete_object(sess, schema, accession_id)

for _, _, metax_id in ids:
async with sess.get(f"{metax_url}/{metax_id}") as metax_resp:
assert metax_resp.status == 404, f"HTTP Status code error - expected 404 Not Found, got {resp.status}"
ids2 = []
json_files = set()
for schema, filename, update_filename in {
("study", "SRP000539.json", "patch.json"),
("dataset", "dataset.json", "dataset_patch.json"),
}:
accession_id = await post_object_json(sess, schema, folder_id, filename)
json_files.add((schema, accession_id, filename, update_filename))
ids2.append([schema, accession_id])

for object in ids2:
schema, accession_id = object
async with sess.get(f"{objects_url}/{schema}/{accession_id}") as resp:
assert resp.status == 200, f"HTTP Status code error, got {resp.status}"
res = await resp.json()
try:
metax_id = res["metaxIdentifier"]["identifier"]
except KeyError:
assert False, "Metax ID was not in response data"
object.append(metax_id)
async with sess.get(f"{metax_url}/{metax_id}") as metax_resp:
assert metax_resp.status == 200, f"HTTP Status code error, got {resp.status}"
metax_res = await metax_resp.json()
assert (
res.get("doi", None) == metax_res["research_dataset"]["preferred_identifier"]
), "Object's DOI was not in Metax response data preferred_identifier"
assert metax_res.get("date_modified", None) is None

for schema, accession_id, filename, _ in json_files:
await put_object_json(sess, schema, accession_id, filename)
for schema, accession_id, _, filename in json_files:
await patch_object_json(sess, schema, accession_id, filename)

for schema, accession_id, _, _ in json_files:
await delete_object(sess, schema, accession_id)


async def test_metax_id_not_updated_on_patch(sess, folder_id):
Expand Down Expand Up @@ -950,6 +972,13 @@ async def test_metax_publish_dataset(sess, folder_id):
res = await resp.json()
object.append(res["metaxIdentifier"]["identifier"])

# Publish the folder
# add a study and dataset for publishing a folder
doi_data_raw = await create_request_json_data("doi", "test_doi.json")
doi_data = json.loads(doi_data_raw)
patch_add_doi = [{"op": "add", "path": "/doiInfo", "value": doi_data}]
folder_id = await patch_folder(sess, folder_id, patch_add_doi)

await publish_folder(sess, folder_id)

# TODO: This must be updated as Metax identifier will be moved to folder from object after publishing
Expand Down Expand Up @@ -1774,10 +1803,11 @@ async def main():
"name": "basic test pagination",
"description": "basic test pagination folder",
}
await test_metax_crud(sess, metax_folder)
metax_folder_id = await post_folder(sess, metax_folder)
await test_metax_crud(sess, metax_folder_id)
await test_metax_id_not_updated_on_patch(sess, metax_folder_id)
await test_metax_publish_dataset(sess, metax_folder_id)
metax_folder_id2 = await post_folder(sess, metax_folder)
await test_metax_publish_dataset(sess, metax_folder_id2)

# Test add, modify, validate and release action with submissions
LOG.debug("=== Testing actions within submissions ===")
Expand Down

0 comments on commit e67d7d1

Please sign in to comment.