From f1d89aba69e5c75df9d0104145cce5ee6abf2146 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 16 Feb 2022 16:24:24 +0200 Subject: [PATCH] create draft dois for study and datasets restrict one study per folder. deprecate creating draft when publishing folder --- metadata_backend/api/handlers/folder.py | 17 ----- metadata_backend/api/handlers/object.py | 84 ++++++++++++++++++++++++- metadata_backend/helpers/doi.py | 6 +- tests/test_doi.py | 4 +- tests/test_handlers.py | 29 ++++----- 5 files changed, 101 insertions(+), 39 deletions(-) diff --git a/metadata_backend/api/handlers/folder.py b/metadata_backend/api/handlers/folder.py index a651e55ce..121e7e413 100644 --- a/metadata_backend/api/handlers/folder.py +++ b/metadata_backend/api/handlers/folder.py @@ -11,7 +11,6 @@ from multidict import CIMultiDict from ...conf.conf import publisher -from ...helpers.doi import DOIHandler from ...helpers.logger import LOG from ...helpers.validator import JSONValidator from ..middlewares import get_session @@ -260,9 +259,6 @@ async def publish_folder(self, req: Request) -> Response: obj_ops = Operator(db_client) # Create draft DOI and delete draft objects from the folder - doi = DOIHandler() - doi_data = await doi.create_draft_doi() - identifier = {"identifierType": "DOI", "doi": doi_data["fullDOI"]} for obj in folder["drafts"]: await obj_ops.delete_metadata_object(obj["schema"], obj["accessionId"]) @@ -272,20 +268,7 @@ async def publish_folder(self, req: Request) -> Response: {"op": "replace", "path": "/published", "value": True}, {"op": "replace", "path": "/drafts", "value": []}, {"op": "add", "path": "/datePublished", "value": int(datetime.now().timestamp())}, - {"op": "add", "path": "/extraInfo/identifier", "value": identifier}, - {"op": "add", "path": "/extraInfo/url", "value": doi_data["dataset"]}, {"op": "add", "path": "/extraInfo/publisher", "value": publisher}, - { - "op": "add", - "path": "/extraInfo/types", - "value": { - "ris": "DATA", - "bibtex": "misc", - "citeproc": "dataset", - "schemaOrg": "Dataset", - "resourceTypeGeneral": "Dataset", - }, - }, {"op": "add", "path": "/extraInfo/publicationYear", "value": date.today().year}, ] new_folder = await operator.update_folder(folder_id, patch) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index b278dbec4..b3402ab75 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -12,11 +12,57 @@ from ..operators import FolderOperator, Operator, XMLOperator from .common import multipart_content from .restapi import RESTAPIHandler +from ...helpers.doi import DOIHandler class ObjectAPIHandler(RESTAPIHandler): """API Handler for Objects.""" + def __init__(self) -> None: + """Init Object handler.""" + super().__init__() + self.doi = DOIHandler() + + async def _draft_doi(self, schema_type: str) -> Dict: + """Create draft DOI for study and dataset. + + The Draft DOI will be created only on POST and the data added to the + folder. Any update of this should not be possible. + + :param schema_type: schema can be either study or dataset + :returns: Dict with DOI of the study or dataset as well as the types. + """ + _doi_data = await self.doi.create_draft(prefix=schema_type) + + LOG.debug(f"doi created with doi: {_doi_data['fullDOI']}") + + data: Dict = {} + if schema_type == "study": + data["identifier"] = { + "identifierType": "DOI", + "doi": _doi_data["fullDOI"], + } + data["types"] = { + "bibtex": "misc", + "citeproc": "collection", + "schemaOrg": "Collection", + "resourceTypeGeneral": "Collection", + } + elif schema_type == "dataset": + data["identifier"] = { + "identifierType": "DOI", + "doi": _doi_data["fullDOI"], + } + data["types"] = { + "ris": "DATA", + "bibtex": "misc", + "citeproc": "dataset", + "schemaOrg": "Dataset", + "resourceTypeGeneral": "Dataset", + } + + return data + async def _handle_query(self, req: Request) -> Response: """Handle query results. @@ -100,6 +146,7 @@ async def post_object(self, req: Request) -> Response: :returns: JSON response containing accessionId for submitted object """ _allowed_csv = ["sample"] + _allowed_doi = {"study", "dataset"} schema_type = req.match_info["schema"] folder_id = req.query.get("folder", "") @@ -112,6 +159,16 @@ async def post_object(self, req: Request) -> Response: collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type db_client = req.app["db_client"] + folder_op = FolderOperator(db_client) + + # we need to check if there is already a study in a folder + # we only allow one study per folder + if not req.path.startswith("/drafts") and schema_type == "study": + _ids = await folder_op.get_collection_objects(folder_id, collection) + if len(_ids) == 1: + reason = "Only one study is allowed per submission." + raise web.HTTPBadRequest(reason=reason) + content: Union[Dict[str, Any], str, List[Tuple[Any, str]]] operator: Union[Operator, XMLOperator] if req.content_type == "multipart/form-data": @@ -156,10 +213,15 @@ async def post_object(self, req: Request) -> Response: # Gathering data for object to be added to folder ids = [dict(data, **{"title": title})] - folder_op = FolderOperator(db_client) patch = self._prepare_folder_patch_new_object(collection, ids, patch_params) await folder_op.update_folder(folder_id, patch) + # we don't create DOIs for drafts and we restrict doi creation to + # study and datasets + if not req.path.startswith("/drafts") and schema_type in _allowed_doi: + doi_patch = await self._prepare_folder_patch_doi(schema_type, ids) + await folder_op.update_folder(folder_id, doi_patch) + body = ujson.dumps(data, escape_forward_slashes=False) return web.Response( @@ -387,3 +449,23 @@ def _prepare_folder_patch_update_object( } ) return [patch_op] + + async def _prepare_folder_patch_doi(self, schema: str, ids: List) -> List: + """Prepare patch operation for updating object's doi information in a folder. + + :param schema: schema of object to be updated + :param ids: object IDs + :returns: dict with patch operation + """ + patch = [] + for id in ids: + _data = await self._draft_doi(schema) + _data["accessionId"] = id["accessionId"] + if schema == "study": + patch_op = {"op": "add", "path": "/extraInfo/studyIdentifier", "value": _data} + patch.append(patch_op) + elif schema == "dataset": + patch_op = {"op": "add", "path": "/extraInfo/datasetIdentifiers/-", "value": _data} + patch.append(patch_op) + + return patch diff --git a/metadata_backend/helpers/doi.py b/metadata_backend/helpers/doi.py index 769a47ab0..c3de8ac51 100644 --- a/metadata_backend/helpers/doi.py +++ b/metadata_backend/helpers/doi.py @@ -3,7 +3,7 @@ The DOI handler from SDA orchestration was used as reference: https://github.com/neicnordic/sda-orchestration/blob/master/sda_orchestrator/utils/id_ops.py """ -from typing import Dict +from typing import Dict, Union from uuid import uuid4 from aiohttp import web, ClientSession, BasicAuth @@ -23,10 +23,10 @@ def __init__(self) -> None: self.doi_key = conf.doi_key self.doi_url = f"{conf.datacite_url.rstrip('/')}/{self.doi_prefix}" - async def create_draft_doi(self) -> Dict: + async def create_draft(self, prefix: Union[str, None] = None) -> Dict: """Generate random suffix and POST request a draft DOI to DataCite DOI API.""" suffix = uuid4().hex[:10] - doi_suffix = f"{suffix[:4]}-{suffix[4:]}" + doi_suffix = f"{prefix}.{suffix[:4]}-{suffix[4:]}" if prefix else f"{suffix[:4]}-{suffix[4:]}" headers = {"Content-Type": "application/json"} doi_payload = {"data": {"type": "dois", "attributes": {"doi": f"{self.doi_prefix}/{doi_suffix}"}}} diff --git a/tests/test_doi.py b/tests/test_doi.py index 9a18defe0..0e8818aa6 100644 --- a/tests/test_doi.py +++ b/tests/test_doi.py @@ -19,7 +19,7 @@ async def test_400_is_raised(self): with patch("aiohttp.ClientSession.post") as mocked_post: mocked_post.return_value.status_code = 400 with self.assertRaises(web.HTTPBadRequest) as err: - await self.doi.create_draft_doi() + await self.doi.create_draft() self.assertEqual(str(err.exception), "DOI API draft creation request failed with code: 400") async def test_create_doi_draft_works(self): @@ -39,7 +39,7 @@ async def test_create_doi_draft_works(self): } } - output = await self.doi.create_draft_doi() + output = await self.doi.create_draft() assert mocked_post.called result = {"fullDOI": "10.xxxx/yyyyy", "dataset": "https://doi.org/10.xxxx/yyyyy"} self.assertEqual(output, result) diff --git a/tests/test_handlers.py b/tests/test_handlers.py index cf7173f7e..79d032e22 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -359,6 +359,8 @@ async def setUpAsync(self): await super().setUpAsync() + self._mock_draf_doi = "metadata_backend.api.handlers.object.ObjectAPIHandler._draft_doi" + class_xmloperator = "metadata_backend.api.handlers.object.XMLOperator" self.patch_xmloperator = patch(class_xmloperator, **self.xmloperator_config, spec=True) self.MockedXMLOperator = self.patch_xmloperator.start() @@ -387,10 +389,11 @@ async def test_submit_object_works(self): """Test that submission is handled, XMLOperator is called.""" files = [("study", "SRP000539.xml")] data = self.create_submission_data(files) - response = await self.client.post("/objects/study", params={"folder": "some id"}, data=data) - self.assertEqual(response.status, 201) - self.assertIn(self.test_ega_string, await response.text()) - self.MockedXMLOperator().create_metadata_object.assert_called_once() + with patch(self._mock_draf_doi, return_value=self._draf_doi_data): + response = await self.client.post("/objects/study", params={"folder": "some id"}, data=data) + self.assertEqual(response.status, 201) + self.assertIn(self.test_ega_string, await response.text()) + self.MockedXMLOperator().create_metadata_object.assert_called_once() async def test_submit_object_works_with_json(self): """Test that JSON submission is handled, operator is called.""" @@ -403,10 +406,11 @@ async def test_submit_object_works_with_json(self): "studyAbstract": "abstract description for testing", }, } - response = await self.client.post("/objects/study", params={"folder": "some id"}, json=json_req) - self.assertEqual(response.status, 201) - self.assertIn(self.test_ega_string, await response.text()) - self.MockedOperator().create_metadata_object.assert_called_once() + with patch(self._mock_draf_doi, return_value=self._draf_doi_data): + response = await self.client.post("/objects/study", params={"folder": "some id"}, json=json_req) + self.assertEqual(response.status, 201) + self.assertIn(self.test_ega_string, await response.text()) + self.MockedOperator().create_metadata_object.assert_called_once() async def test_submit_object_missing_field_json(self): """Test that JSON has missing property.""" @@ -418,6 +422,7 @@ async def test_submit_object_missing_field_json(self): async def test_submit_object_bad_field_json(self): """Test that JSON has bad studyType.""" + self.MockedDoiHandler().create_draft.return_value = self.test_draft_doi json_req = { "centerName": "GEO", "alias": "GSE10966", @@ -812,11 +817,6 @@ async def setUpAsync(self): await super().setUpAsync() - self.test_draft_doi = {"fullDOI": "10.xxxx/yyyyy", "dataset": "https://doi.org/10.xxxx/yyyyy"} - class_doihandler = "metadata_backend.api.handlers.folder.DOIHandler" - self.patch_doihandler = patch(class_doihandler, spec=True) - self.MockedDoiHandler = self.patch_doihandler.start() - class_folderoperator = "metadata_backend.api.handlers.folder.FolderOperator" self.patch_folderoperator = patch(class_folderoperator, **self.folderoperator_config, spec=True) self.MockedFolderOperator = self.patch_folderoperator.start() @@ -832,7 +832,6 @@ async def setUpAsync(self): async def tearDownAsync(self): """Cleanup mocked stuff.""" await super().tearDownAsync() - self.patch_doihandler.stop() self.patch_folderoperator.stop() self.patch_useroperator.stop() self.patch_operator.stop() @@ -943,10 +942,8 @@ async def test_update_folder_passes(self): async def test_folder_is_published(self): """Test that folder would be published and DOI would be added.""" - self.MockedDoiHandler().create_draft_doi.return_value = self.test_draft_doi self.MockedFolderOperator().update_folder.return_value = self.folder_id response = await self.client.patch("/publish/FOL12345678") - self.MockedDoiHandler().create_draft_doi.assert_called_once() self.MockedFolderOperator().update_folder.assert_called_once() self.assertEqual(response.status, 200) json_resp = await response.json()