Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add study and dataset identifiers to folder schema #332

Merged
merged 24 commits into from
Mar 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
fc4c73b
Update swagger doc
genie9 Jan 26, 2022
9cb8520
add study and dataset identifiers to folder schema
blankdots Jan 10, 2022
ef01c6a
make dataset description mandatory
blankdots Jan 25, 2022
2c6faee
make study abstract mandatory
blankdots Jan 25, 2022
779a12e
add keywords to folder doi info
blankdots Jan 25, 2022
e5efb18
not all functions need to be async
blankdots Feb 15, 2022
135ba97
correct error reasons for operators
blankdots Feb 16, 2022
48e4d04
create draft dois for study and datasets
blankdots Feb 16, 2022
c9e5904
Resource Type formatted for front-end display
blankdots Feb 17, 2022
106e0a6
multilevel patch objects
blankdots Feb 17, 2022
06a76b0
refactor doi in objects to make it easier to test
blankdots Feb 17, 2022
554c08e
mock doi API PUT request for publishing DOI
blankdots Feb 17, 2022
dbcbfa5
publish Study and Dataset when publishing folders
blankdots Feb 17, 2022
e91b222
integration tests for publishing folder adjusted
blankdots Feb 17, 2022
4dd96db
adjust message logs for integration tests
blankdots Feb 17, 2022
075171d
attempt to curb race condition with study POST
blankdots Feb 18, 2022
504f9c2
not all functions need to be async
blankdots Feb 15, 2022
8810ed0
create draft dois for study and datasets
blankdots Feb 16, 2022
a516043
publish Study and Dataset when publishing folders
blankdots Feb 17, 2022
4bbc8fa
attempt to curb race condition with study POST
blankdots Feb 18, 2022
3328fc3
check values in set instead of list
blankdots Feb 15, 2022
a0390ed
add to set instead of extending list
blankdots Feb 28, 2022
e67d7d1
integrating doi with metax
blankdots Mar 2, 2022
d386c7d
rebase with metax integrations
blankdots Mar 7, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/specification.yml
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,11 @@ paths:
- Submission
summary: Submit data to a specific schema
parameters:
- in: query
name: folder
schema:
type: string
description: The folder ID where object belongs to.
- name: schema
in: path
description: Name of the Metadata schema.
Expand Down
178 changes: 150 additions & 28 deletions metadata_backend/api/handlers/folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,18 @@
from datetime import date, datetime
from distutils.util import strtobool
from math import ceil
from typing import Any
from typing import Any, Dict, List, Tuple

import ujson
from aiohttp import web
from aiohttp.web import Request, Response
from multidict import CIMultiDict

from ...conf.conf import publisher
from ...helpers.doi import DOIHandler
from ...helpers.logger import LOG
from ...helpers.validator import JSONValidator
from ...helpers.metax_api_handler import MetaxServiceHandler
from ...helpers.doi import DOIHandler
from ..middlewares import get_session
from ..operators import FolderOperator, Operator, UserOperator
from .restapi import RESTAPIHandler
Expand All @@ -23,6 +23,135 @@
class FolderAPIHandler(RESTAPIHandler):
"""API Handler for folders."""

def _prepare_doi_update(self, folder: Dict) -> Tuple[Dict, List]:
"""Prepare dictionary with values for the Datacite DOI update.

We need to prepare data for Study and Datasets, publish doi for each,
and create links (relatedIdentifiers) between Study and Datasets.
All the required information should be in the folder ``doiInfo``,
as well as ``extraInfo`` which contains the draft DOIs created for the Study
and each Dataset.

:param folder: Folder data
:returns: Tuple with the Study and list of Datasets.
"""

_general_info = {
"attributes": {
"publisher": publisher,
"publicationYear": date.today().year,
"event": "publish",
"schemaVersion": "https://schema.datacite.org/meta/kernel-4",
},
}

study = {}
datasets = []

# we need to re-format these for Datacite, as in the JSON schemas
# we split the words so that front-end will display them nicely
_info = folder["doiInfo"]
if "relatedIdentifiers" in _info:
for d in _info["relatedIdentifiers"]:
d.update((k, "".join(v.split())) for k, v in d.items() if k in {"resourceTypeGeneral", "relationType"})

if "contributors" in _info:
for d in _info["contributors"]:
d.update((k, "".join(v.split())) for k, v in d.items() if k == "contributorType")

if "descriptions" in _info:
for d in _info["descriptions"]:
d.update((k, "".join(v.split())) for k, v in d.items() if k == "descriptionType")

if "fundingReferences" in _info:
for d in _info["fundingReferences"]:
d.update((k, "".join(v.split())) for k, v in d.items() if k == "funderIdentifierType")
# need to add titles and descriptions for datasets and study
try:
# keywords are only required for Metax integration
# thus we remove them
_info.pop("keywords", None)
_general_info["attributes"].update(_info)

_study = folder["extraInfo"]["studyIdentifier"]
_study_doi = _study["identifier"]["doi"]
study = {
"attributes": {
"doi": _study_doi,
"prefix": _study_doi.split("/")[0],
"suffix": _study_doi.split("/")[1],
"types": {
"bibtex": "misc",
"citeproc": "collection",
"schemaOrg": "Collection",
"resourceTypeGeneral": "Collection",
},
"url": _study["url"],
"identifiers": [_study["identifier"]],
},
"id": _study_doi,
"type": "dois",
}

study.update(_general_info)

_datasets = folder["extraInfo"]["datasetIdentifiers"]
for ds in _datasets:
_doi = ds["identifier"]["doi"]
_tmp = {
"attributes": {
"doi": _doi,
"prefix": _doi.split("/")[0],
"suffix": _doi.split("/")[1],
"types": {
"ris": "DATA",
"bibtex": "misc",
"citeproc": "dataset",
"schemaOrg": "Dataset",
"resourceTypeGeneral": "Dataset",
},
"url": ds["url"],
"identifiers": [ds["identifier"]],
},
"id": _doi,
"type": "dois",
}
_tmp.update(_general_info)

# A Dataset is described by a Study
if "relatedIdentifiers" not in _tmp["attributes"]:
_tmp["attributes"]["relatedIdentifiers"] = []

_tmp["attributes"]["relatedIdentifiers"].append(
{
"relationType": "IsDescribedBy",
"relatedIdentifier": _study_doi,
"resourceTypeGeneral": "Collection",
"relatedIdentifierType": "DOI",
}
)

datasets.append(_tmp)

# A Study describes a Dataset
if "relatedIdentifiers" not in study["attributes"]:
study["attributes"]["relatedIdentifiers"] = []

study["attributes"]["relatedIdentifiers"].append(
{
"relationType": "Describes",
"relatedIdentifier": _doi,
"resourceTypeGeneral": "Dataset",
"relatedIdentifierType": "DOI",
}
)
except Exception as e:
reason = f"Could not construct DOI data, reason: {e}"
LOG.error(reason)
raise web.HTTPBadRequest(reason=reason)

return (study, datasets)

def _check_patch_folder(self, patch_ops: Any) -> None:
"""Check patch operations in request are valid.

Expand All @@ -34,29 +163,29 @@ def _check_patch_folder(self, patch_ops: Any) -> None:
:raises: HTTPUnauthorized if request tries to do anything else than add or replace
:returns: None
"""
_required_paths = ["/name", "/description"]
_required_values = ["schema", "accessionId"]
_arrays = ["/metadataObjects/-", "/drafts/-", "/doiInfo"]
_required_paths = {"/name", "/description"}
_required_values = {"schema", "accessionId"}
_arrays = {"/metadataObjects/-", "/drafts/-", "/doiInfo"}
_tags = re.compile("^/(metadataObjects|drafts)/[0-9]*/(tags)$")

for op in patch_ops:
if _tags.match(op["path"]):
LOG.info(f"{op['op']} on tags in folder")
if "submissionType" in op["value"].keys() and op["value"]["submissionType"] not in [
if "submissionType" in op["value"].keys() and op["value"]["submissionType"] not in {
"XML",
"CSV",
"Form",
]:
}:
reason = "submissionType is restricted to either 'CSV', 'XML' or 'Form' values."
LOG.error(reason)
raise web.HTTPBadRequest(reason=reason)
pass
else:
if all(i not in op["path"] for i in _required_paths + _arrays):
if all(i not in op["path"] for i in set.union(_required_paths, _arrays)):
reason = f"Request contains '{op['path']}' key that cannot be updated to folders."
LOG.error(reason)
raise web.HTTPBadRequest(reason=reason)
if op["op"] in ["remove", "copy", "test", "move"]:
if op["op"] in {"remove", "copy", "test", "move"}:
reason = f"{op['op']} on {op['path']} is not allowed."
LOG.error(reason)
raise web.HTTPUnauthorized(reason=reason)
Expand All @@ -74,7 +203,7 @@ def _check_patch_folder(self, patch_ops: Any) -> None:
if (
"tags" in item
and "submissionType" in item["tags"]
and item["tags"]["submissionType"] not in ["XML", "Form"]
and item["tags"]["submissionType"] not in {"XML", "CSV", "Form"}
):
reason = "submissionType is restricted to either 'XML' or 'Form' values."
LOG.error(reason)
Expand All @@ -99,7 +228,7 @@ async def get_folders(self, req: Request) -> Response:
# Check if only published or draft folders are requestsed
if "published" in req.query:
pub_param = req.query.get("published", "").title()
if pub_param in ["True", "False"]:
if pub_param in {"True", "False"}:
folder_query["published"] = {"$eq": bool(strtobool(pub_param))}
else:
reason = "'published' parameter must be either 'true' or 'false'"
Expand Down Expand Up @@ -151,7 +280,7 @@ async def get_folders(self, req: Request) -> Response:
)

url = f"{req.scheme}://{req.host}{req.path}"
link_headers = await self._header_links(url, page, per_page, total_folders)
link_headers = self._header_links(url, page, per_page, total_folders)
LOG.debug(f"Pagination header links: {link_headers}")
LOG.info(f"Querying for user's folders resulted in {total_folders} folders")
return web.Response(
Expand Down Expand Up @@ -258,12 +387,18 @@ async def publish_folder(self, req: Request) -> Response:

folder = await operator.read_folder(folder_id)

# we first try to publish the DOI before actually publishing the folder
study, datasets = self._prepare_doi_update(folder)

doi_ops = DOIHandler()

await doi_ops.set_state(study)
for ds in datasets:
await doi_ops.set_state(ds)

obj_ops = Operator(db_client)

# Create draft DOI and delete draft objects from the folder
doi = DOIHandler()
doi_data = await doi.create_draft_doi()
identifier = {"identifierType": "DOI", "doi": doi_data["fullDOI"]}

for obj in folder["drafts"]:
await obj_ops.delete_metadata_object(obj["schema"], obj["accessionId"])
Expand All @@ -273,20 +408,7 @@ async def publish_folder(self, req: Request) -> Response:
{"op": "replace", "path": "/published", "value": True},
{"op": "replace", "path": "/drafts", "value": []},
{"op": "add", "path": "/datePublished", "value": int(datetime.now().timestamp())},
{"op": "add", "path": "/extraInfo/identifier", "value": identifier},
{"op": "add", "path": "/extraInfo/url", "value": doi_data["dataset"]},
{"op": "add", "path": "/extraInfo/publisher", "value": publisher},
{
"op": "add",
"path": "/extraInfo/types",
"value": {
"ris": "DATA",
"bibtex": "misc",
"citeproc": "dataset",
"schemaOrg": "Dataset",
"resourceTypeGeneral": "Dataset",
},
},
{"op": "add", "path": "/extraInfo/publicationYear", "value": date.today().year},
]
new_folder = await operator.update_folder(folder_id, patch)
Expand Down
Loading