From a613169bf79a87cfd428d62dc4b1f38af101cd27 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 3 Sep 2021 00:45:27 +0300 Subject: [PATCH 001/336] fix typos for operators.py and test_parser --- metadata_backend/api/operators.py | 2 +- tests/test_parser.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 9d5924b3a..7535b15cf 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -524,7 +524,7 @@ async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: str :returns: Accession Id for object inserted to database """ db_client = self.db_service.db_client - # remove `drafs-` from schema type + # remove `draft-` from schema type schema = schema_type[6:] if schema_type.startswith("draft") else schema_type data_as_json = XMLToJSONParser().parse(schema, data) accession_id = await Operator(db_client)._format_data_to_create_and_add_to_db(schema_type, data_as_json) diff --git a/tests/test_parser.py b/tests/test_parser.py index 8b0f63544..3ce80b8b7 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -9,7 +9,7 @@ class ParserTestCase(unittest.TestCase): - """API endpoint class test cases.""" + """Parser Test Cases.""" TESTFILES_ROOT = Path(__file__).parent / "test_files" From 539b15925450b87495e44d8ed5726fb0050238fe Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 3 Sep 2021 00:52:25 +0300 Subject: [PATCH 002/336] switch to ujson --- metadata_backend/api/handlers.py | 49 ++++++++++++++++----------- metadata_backend/api/health.py | 6 ++-- metadata_backend/api/middlewares.py | 11 +++--- metadata_backend/conf/conf.py | 4 +-- metadata_backend/helpers/logger.py | 4 +-- metadata_backend/helpers/validator.py | 10 +++--- tests/mockups.py | 4 +-- tests/test_auth.py | 6 ++-- tests/test_handlers.py | 2 +- 9 files changed, 54 insertions(+), 42 deletions(-) diff --git a/metadata_backend/api/handlers.py b/metadata_backend/api/handlers.py index a0ce381f0..58454e51d 100644 --- a/metadata_backend/api/handlers.py +++ b/metadata_backend/api/handlers.py @@ -1,4 +1,5 @@ """Handle HTTP methods for server.""" +import ujson import json import re import mimetypes @@ -179,7 +180,7 @@ async def get_schema_types(self, req: Request) -> Response: :param req: GET Request :returns: JSON list of schema types """ - types_json = json.dumps([x["description"] for x in schema_types.values()]) + types_json = ujson.dumps([x["description"] for x in schema_types.values()], escape_forward_slashes=False) LOG.info(f"GET schema types. Retrieved {len(schema_types)} schemas.") return web.Response(body=types_json, status=200, content_type="application/json") @@ -197,7 +198,9 @@ async def get_json_schema(self, req: Request) -> Response: try: schema = JSONSchemaLoader().get_schema(schema_type) LOG.info(f"{schema_type} schema loaded.") - return web.Response(body=json.dumps(schema), status=200, content_type="application/json") + return web.Response( + body=ujson.dumps(schema, escape_forward_slashes=False), status=200, content_type="application/json" + ) except SchemaNotFoundException as error: reason = f"{error} ({schema_type})" @@ -249,7 +252,7 @@ async def _handle_query(self, req: Request) -> Response: collection, req.query, page, per_page, filter_list ) - result = json.dumps( + result = ujson.dumps( { "page": { "page": page_num, @@ -258,7 +261,8 @@ async def _handle_query(self, req: Request) -> Response: "totalObjects": total_objects, }, "objects": data, - } + }, + escape_forward_slashes=False, ) url = f"{req.scheme}://{req.host}{req.path}" link_headers = await self._header_links(url, page_num, per_page, total_objects) @@ -275,7 +279,7 @@ async def get_object(self, req: Request) -> Response: """Get one metadata object by its accession id. Returns original XML object from backup if format query parameter is - set, otherwise json. + set, otherwise JSON. :param req: GET request :returns: JSON or XML response containing metadata object @@ -296,7 +300,7 @@ async def get_object(self, req: Request) -> Response: data, content_type = await operator.read_metadata_object(type_collection, accession_id) - data = data if req_format == "xml" else json.dumps(data) + data = data if req_format == "xml" else ujson.dumps(data, escape_forward_slashes=False) LOG.info(f"GET object with accesssion ID {accession_id} from schema {collection}.") return web.Response(body=data, status=200, content_type=content_type) @@ -328,7 +332,7 @@ async def post_object(self, req: Request) -> Response: accession_id = await operator.create_metadata_object(collection, content) - body = json.dumps({"accessionId": accession_id}) + body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) url = f"{req.scheme}://{req.host}{req.path}" location_headers = CIMultiDict(Location=f"{url}{accession_id}") LOG.info(f"POST object with accesssion ID {accession_id} in schema {collection} was successful.") @@ -427,7 +431,7 @@ async def put_object(self, req: Request) -> Response: accession_id = await operator.replace_metadata_object(collection, accession_id, content) - body = json.dumps({"accessionId": accession_id}) + body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) LOG.info(f"PUT object with accession ID {accession_id} in schema {collection} was successful.") return web.Response(body=body, status=200, content_type="application/json") @@ -468,7 +472,7 @@ async def patch_object(self, req: Request) -> Response: accession_id = await operator.update_metadata_object(collection, accession_id, content) - body = json.dumps({"accessionId": accession_id}) + body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) LOG.info(f"PATCH object with accession ID {accession_id} in schema {collection} was successful.") return web.Response(body=body, status=200, content_type="application/json") @@ -557,7 +561,7 @@ async def get_folders(self, req: Request) -> Response: folder_operator = FolderOperator(db_client) folders, total_folders = await folder_operator.query_folders(folder_query, page, per_page) - result = json.dumps( + result = ujson.dumps( { "page": { "page": page, @@ -566,7 +570,8 @@ async def get_folders(self, req: Request) -> Response: "totalFolders": total_folders, }, "folders": folders, - } + }, + escape_forward_slashes=False, ) url = f"{req.scheme}://{req.host}{req.path}" @@ -599,7 +604,7 @@ async def post_folder(self, req: Request) -> Response: current_user = get_session(req)["user_info"] await user_op.assign_objects(current_user, "folders", [folder]) - body = json.dumps({"folderId": folder}) + body = ujson.dumps({"folderId": folder}, escape_forward_slashes=False) url = f"{req.scheme}://{req.host}{req.path}" location_headers = CIMultiDict(Location=f"{url}/{folder}") @@ -624,7 +629,9 @@ async def get_folder(self, req: Request) -> Response: folder = await operator.read_folder(folder_id) LOG.info(f"GET folder with ID {folder_id} was successful.") - return web.Response(body=json.dumps(folder), status=200, content_type="application/json") + return web.Response( + body=ujson.dumps(folder, escape_forward_slashes=False), status=200, content_type="application/json" + ) async def patch_folder(self, req: Request) -> Response: """Update object folder with a specific folder id. @@ -654,7 +661,7 @@ async def patch_folder(self, req: Request) -> Response: upd_folder = await operator.update_folder(folder_id, patch_ops if isinstance(patch_ops, list) else [patch_ops]) - body = json.dumps({"folderId": upd_folder}) + body = ujson.dumps({"folderId": upd_folder}, escape_forward_slashes=False) LOG.info(f"PATCH folder with ID {upd_folder} was successful.") return web.Response(body=body, status=200, content_type="application/json") @@ -686,7 +693,7 @@ async def publish_folder(self, req: Request) -> Response: ] new_folder = await operator.update_folder(folder_id, patch) - body = json.dumps({"folderId": new_folder}) + body = ujson.dumps({"folderId": new_folder}, escape_forward_slashes=False) LOG.info(f"Patching folder with ID {new_folder} was successful.") return web.Response(body=body, status=200, content_type="application/json") @@ -797,7 +804,7 @@ async def get_user(self, req: Request) -> Response: # Return only list of drafts or list of folder IDs owned by the user result, link_headers = await self._get_user_items(req, current_user, item_type) return web.Response( - body=json.dumps(result), + body=ujson.dumps(result, escape_forward_slashes=False), status=200, headers=link_headers, content_type="application/json", @@ -808,7 +815,9 @@ async def get_user(self, req: Request) -> Response: operator = UserOperator(db_client) user = await operator.read_user(current_user) LOG.info(f"GET user with ID {user_id} was successful.") - return web.Response(body=json.dumps(user), status=200, content_type="application/json") + return web.Response( + body=ujson.dumps(user, escape_forward_slashes=False), status=200, content_type="application/json" + ) async def patch_user(self, req: Request) -> Response: """Update user object with a specific user ID. @@ -831,7 +840,7 @@ async def patch_user(self, req: Request) -> Response: current_user = get_session(req)["user_info"] user = await operator.update_user(current_user, patch_ops if isinstance(patch_ops, list) else [patch_ops]) - body = json.dumps({"userId": user}) + body = ujson.dumps({"userId": user}) LOG.info(f"PATCH user with ID {user} was successful.") return web.Response(body=body, status=200, content_type="application/json") @@ -991,7 +1000,7 @@ async def submit(self, req: Request) -> Response: result = await self._execute_action(schema_type, content_xml, db_client, action) results.append(result) - body = json.dumps(results) + body = ujson.dumps(results, escape_forward_slashes=False) LOG.info(f"Processed a submission of {len(results)} actions.") return web.Response(body=body, status=200, content_type="application/json") @@ -1067,7 +1076,7 @@ async def _execute_action(self, schema: str, content: str, db_client: AsyncIOMot elif action == "validate": validator = await self._perform_validation(schema, content) - return json.loads(validator.resp_body) + return ujson.loads(validator.resp_body) else: reason = f"Action {action} in XML is not supported." diff --git a/metadata_backend/api/health.py b/metadata_backend/api/health.py index bc56100a0..8e5a0e617 100644 --- a/metadata_backend/api/health.py +++ b/metadata_backend/api/health.py @@ -1,5 +1,5 @@ """Handle health check endpoint.""" -import json +import ujson import time from typing import Dict, Union, Any @@ -35,7 +35,9 @@ async def get_health_status(self, req: Request) -> Response: full_status["services"] = services LOG.info("Health status collected.") - return web.Response(body=json.dumps(full_status), status=200, content_type="application/json") + return web.Response( + body=ujson.dumps(full_status, escape_forward_slashes=False), status=200, content_type="application/json" + ) async def create_test_db_client(self) -> AsyncIOMotorClient: """Initialize a new database client to test Mongo connection. diff --git a/metadata_backend/api/middlewares.py b/metadata_backend/api/middlewares.py index 0b04cf7b1..657b8e96d 100644 --- a/metadata_backend/api/middlewares.py +++ b/metadata_backend/api/middlewares.py @@ -1,5 +1,5 @@ """Middleware methods for server.""" -import json +import ujson from http import HTTPStatus from typing import Callable, Tuple from cryptography.fernet import InvalidToken @@ -161,7 +161,7 @@ def generate_cookie(request: Request) -> Tuple[dict, str]: } # Return a tuple of the session as an encrypted JSON string, and the # cookie itself - return (cookie, request.app["Crypt"].encrypt(json.dumps(cookie).encode("utf-8")).decode("utf-8")) + return (cookie, request.app["Crypt"].encrypt(ujson.dumps(cookie).encode("utf-8")).decode("utf-8")) def decrypt_cookie(request: web.Request) -> dict: @@ -176,7 +176,7 @@ def decrypt_cookie(request: web.Request) -> dict: raise web.HTTPUnauthorized() try: cookie_json = request.app["Crypt"].decrypt(request.cookies["MTD_SESSION"].encode("utf-8")).decode("utf-8") - cookie = json.loads(cookie_json) + cookie = ujson.loads(cookie_json) LOG.debug(f"Decrypted cookie: {cookie}") return cookie except InvalidToken: @@ -229,7 +229,7 @@ def _json_exception(status: int, exception: web.HTTPException, url: URL) -> str: :param url: Request URL that caused the exception :returns: Problem detail JSON object as a string """ - body = json.dumps( + body = ujson.dumps( { "type": "about:blank", # Replace type value above with an URL to @@ -237,6 +237,7 @@ def _json_exception(status: int, exception: web.HTTPException, url: URL) -> str: "title": HTTPStatus(status).phrase, "detail": exception.reason, "instance": url.path, # optional - } + }, + escape_forward_slashes=False, ) return body diff --git a/metadata_backend/conf/conf.py b/metadata_backend/conf/conf.py index ebe4e0b35..6cecb7f96 100644 --- a/metadata_backend/conf/conf.py +++ b/metadata_backend/conf/conf.py @@ -31,7 +31,7 @@ and inserted here in projects Dockerfile. """ -import json +import ujson import os from pathlib import Path from distutils.util import strtobool @@ -107,7 +107,7 @@ def create_db_client() -> AsyncIOMotorClient: # Default schemas will be ENA schemas path_to_schema_file = Path(__file__).parent / "ena_schemas.json" with open(path_to_schema_file) as schema_file: - schema_types = json.load(schema_file) + schema_types = ujson.load(schema_file) # 3) Define mapping between url query parameters and mongodb queries diff --git a/metadata_backend/helpers/logger.py b/metadata_backend/helpers/logger.py index ffb86ed51..34af3161d 100644 --- a/metadata_backend/helpers/logger.py +++ b/metadata_backend/helpers/logger.py @@ -1,6 +1,6 @@ """Logging formatting and functions for debugging.""" -import json +import ujson import logging from typing import Any, Dict import os @@ -31,4 +31,4 @@ def pprint_json(content: Dict) -> None: :param content: JSON-formatted content to be printed """ - LOG.info(json.dumps(content, indent=4)) + LOG.info(ujson.dumps(content, indent=4, escape_forward_slashes=False)) diff --git a/metadata_backend/helpers/validator.py b/metadata_backend/helpers/validator.py index 4e969b2b7..beb919239 100644 --- a/metadata_backend/helpers/validator.py +++ b/metadata_backend/helpers/validator.py @@ -1,6 +1,6 @@ """Utility classes for validating XML or JSON files.""" -import json +import ujson import re from io import StringIO from typing import Any, Dict @@ -38,7 +38,7 @@ def resp_body(self) -> str: try: self.schema.validate(self.xml_content) LOG.info("Submitted file is totally valid.") - return json.dumps({"isValid": True}) + return ujson.dumps({"isValid": True}) except ParseError as error: reason = self._parse_error_reason(error) @@ -48,7 +48,7 @@ def resp_body(self) -> str: instance = re.sub(r"^.*?<", "<", line) # strip whitespaces LOG.info("Submitted file does not not contain valid XML syntax.") - return json.dumps({"isValid": False, "detail": {"reason": reason, "instance": instance}}) + return ujson.dumps({"isValid": False, "detail": {"reason": reason, "instance": instance}}) except XMLSchemaValidationError as error: # Parse reason and instance from the validation error message @@ -60,7 +60,7 @@ def resp_body(self) -> str: reason = re.sub("<[^>]*>", instance_parent + " ", reason) LOG.info("Submitted file is not valid against schema.") - return json.dumps({"isValid": False, "detail": {"reason": reason, "instance": instance}}) + return ujson.dumps({"isValid": False, "detail": {"reason": reason, "instance": instance}}) except URLError as error: reason = f"Faulty file was provided. {error.reason}." @@ -76,7 +76,7 @@ def _parse_error_reason(self, error: ParseError) -> str: @property def is_valid(self) -> bool: """Quick method for checking validation result.""" - resp = json.loads(self.resp_body) + resp = ujson.loads(self.resp_body) return resp["isValid"] diff --git a/tests/mockups.py b/tests/mockups.py index f3c77d6b3..905a4653d 100644 --- a/tests/mockups.py +++ b/tests/mockups.py @@ -3,7 +3,7 @@ import hashlib from os import urandom import yarl -import json +import ujson import cryptography.fernet from cryptography.hazmat.primitives import serialization @@ -112,7 +112,7 @@ def add_csrf_to_cookie(cookie, req, bad_sign=False): def encrypt_cookie(cookie, req): """Add encrypted cookie to request.""" - cookie_crypted = req.app["Crypt"].encrypt(json.dumps(cookie).encode("utf-8")).decode("utf-8") + cookie_crypted = req.app["Crypt"].encrypt(ujson.dumps(cookie).encode("utf-8")).decode("utf-8") req.cookies["MTD_SESSION"] = cookie_crypted diff --git a/tests/test_auth.py b/tests/test_auth.py index 4262efff6..d9df000c4 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -16,7 +16,7 @@ jwt_data_bad_nonce, ) from unittest import IsolatedAsyncioTestCase -import json +import ujson class AccessHandlerFailTestCase(AioHTTPTestCase): @@ -127,11 +127,11 @@ async def test_jwk_key(self): "alg": "HS256", "k": "hJtXIZ2uSN5kbQfbtTNWbpdmhkV8FJG-Onbc6mxCcYg", } - resp = MockResponse(json.dumps(data), 200) + resp = MockResponse(ujson.dumps(data), 200) with patch("aiohttp.ClientSession.get", return_value=resp): result = await self.AccessHandler._get_key() - self.assertEqual(result, json.dumps(data)) + self.assertEqual(result, ujson.dumps(data)) async def test_set_user_fail(self): """Test set user raises exception.""" diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 1bbe85595..0b8e1d536 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -499,7 +499,7 @@ async def test_validation_passes_for_valid_xml(self): data = self.create_submission_data(files) response = await self.client.post("/validate", data=data) self.assertEqual(response.status, 200) - self.assertIn('{"isValid": true}', await response.text()) + self.assertIn('{"isValid":true}', await response.text()) @unittest_run_loop async def test_validation_fails_bad_schema(self): From 293209b220c2c4bcdadffb42a889e45f51e02822 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 3 Sep 2021 02:28:54 +0300 Subject: [PATCH 003/336] add templates api endpoint add information to specification add new template handlers adjust operators and interaction with drafts --- docs/specification.yml | 132 ++++++++++++++++++- metadata_backend/api/handlers.py | 134 ++++++++++++++++---- metadata_backend/api/middlewares.py | 1 + metadata_backend/api/operators.py | 14 +- metadata_backend/helpers/schemas/users.json | 3 +- metadata_backend/server.py | 28 +++- tests/integration/run_tests.py | 92 ++++++++++---- tests/test_handlers.py | 28 ++-- tests/test_server.py | 2 +- tox.ini | 1 + 10 files changed, 355 insertions(+), 80 deletions(-) diff --git a/docs/specification.yml b/docs/specification.yml index f054db993..510e32bb6 100644 --- a/docs/specification.yml +++ b/docs/specification.yml @@ -295,7 +295,7 @@ paths: parameters: - name: schema in: path - description: Title of the XML schema. + description: Name of the Metadata schema. schema: type: string required: true @@ -529,7 +529,7 @@ paths: parameters: - name: schema in: path - description: Title of the XML schema. + description: Name of the Metadata schema. schema: type: string required: true @@ -749,6 +749,134 @@ paths: application/json: schema: $ref: "#/components/schemas/403Forbidden" + /templates/{schema}: + post: + tags: + - Submission + summary: Submit data to a specific schema + parameters: + - name: schema + in: path + description: Title of the template schema. + schema: + type: string + required: true + requestBody: + content: + application/json: + schema: + type: object + responses: + 201: + description: Created + content: + application/json: + schema: + $ref: "#/components/schemas/ObjectCreated" + 400: + description: Bad Request + content: + application/json: + schema: + $ref: "#/components/schemas/400BadRequest" + 401: + description: Unauthorized + content: + application/json: + schema: + $ref: "#/components/schemas/401Unauthorized" + 403: + description: Forbidden + content: + application/json: + schema: + $ref: "#/components/schemas/403Forbidden" + /templates/{schema}/{accessionId}: + get: + tags: + - Query + summary: List of object by accession ID. + parameters: + - name: schema + in: path + description: Unique id of the targeted service. + schema: + type: string + required: true + - name: accessionId + in: path + description: filter objects in schema using accession ID + schema: + type: string + required: true + responses: + 200: + description: OK + content: + application/json: + schema: + $ref: "#/components/schemas/Object" + text/xml: + schema: + type: string + format: binary + 401: + description: Unauthorized + content: + application/json: + schema: + $ref: "#/components/schemas/401Unauthorized" + 403: + description: Forbidden + content: + application/json: + schema: + $ref: "#/components/schemas/403Forbidden" + 404: + description: Not Found + content: + application/json: + schema: + $ref: "#/components/schemas/404NotFound" + delete: + tags: + - Manage + summary: Delete object from a schema with a specified accession ID + parameters: + - name: schema + in: path + description: Unique id of the targeted service. + schema: + type: string + required: true + - name: accessionId + in: path + description: filter objects in schema using accession ID + schema: + type: string + required: true + responses: + 204: + description: No Content + 400: + description: Bad Request + content: + application/json: + schema: + $ref: "#/components/schemas/400BadRequest" + 401: + description: Unauthorized + content: + application/json: + schema: + $ref: "#/components/schemas/401Unauthorized" + 403: + description: Forbidden + content: + application/json: + schema: + $ref: "#/components/schemas/403Forbidden" + /folders: get: tags: diff --git a/metadata_backend/api/handlers.py b/metadata_backend/api/handlers.py index 58454e51d..f0bebf4ea 100644 --- a/metadata_backend/api/handlers.py +++ b/metadata_backend/api/handlers.py @@ -90,8 +90,9 @@ async def _handle_check_ownedby_user(self, req: Request, collection: str, access elif check: # if the draft object is found in folder we just need to check if the folder belongs to user _check = await user_op.check_user_has_doc("folders", current_user, folder_id) - elif collection.startswith("draft"): - # if collection is draft but not found in a folder we also check if object is in drafts of the user + elif collection.startswith("template"): + # if collection is template but not found in a folder + # we also check if object is in templates of the user # they will be here if they will not be deleted after publish _check = await user_op.check_user_has_doc(collection, current_user, accession_id) else: @@ -334,7 +335,7 @@ async def post_object(self, req: Request) -> Response: body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) url = f"{req.scheme}://{req.host}{req.path}" - location_headers = CIMultiDict(Location=f"{url}{accession_id}") + location_headers = CIMultiDict(Location=f"{url}/{accession_id}") LOG.info(f"POST object with accesssion ID {accession_id} in schema {collection} was successful.") return web.Response( body=body, @@ -381,15 +382,9 @@ async def delete_object(self, req: Request) -> Response: raise web.HTTPUnauthorized(reason=reason) await folder_op.remove_object(folder_id, collection, accession_id) else: - user_op = UserOperator(db_client) - current_user = get_session(req)["user_info"] - check_user = await user_op.check_user_has_doc(collection, current_user, accession_id) - if check_user: - await user_op.remove_objects(current_user, "drafts", [accession_id]) - else: - reason = "This object does not seem to belong to any user." - LOG.error(reason) - raise web.HTTPUnprocessableEntity(reason=reason) + reason = "This object does not seem to belong to any user." + LOG.error(reason) + raise web.HTTPUnprocessableEntity(reason=reason) accession_id = await Operator(db_client).delete_metadata_object(collection, accession_id) @@ -477,6 +472,101 @@ async def patch_object(self, req: Request) -> Response: return web.Response(body=body, status=200, content_type="application/json") +class TemplatesAPIHandler(RESTAPIHandler): + """API Handler for Templates.""" + + async def get_template(self, req: Request) -> Response: + """Get one metadata object by its accession id. + + Returns JSON. + + :param req: GET request + :returns: JSON response containing template object + """ + accession_id = req.match_info["accessionId"] + schema_type = req.match_info["schema"] + self._check_schema_exists(schema_type) + collection = f"template-{schema_type}" + + db_client = req.app["db_client"] + operator = Operator(db_client) + + await operator.check_exists(collection, accession_id) + + await self._handle_check_ownedby_user(req, collection, accession_id) + + data, content_type = await operator.read_metadata_object(collection, accession_id) + + data = ujson.dumps(data, escape_forward_slashes=False) + LOG.info(f"GET object with accesssion ID {accession_id} from schema {collection}.") + return web.Response(body=data, status=200, content_type=content_type) + + async def post_template(self, req: Request) -> Response: + """Save metadata object to database. + + For JSON request body we validate it is consistent with the + associated JSON schema. + + :param req: POST request + :returns: JSON response containing accessionId for submitted object + """ + schema_type = req.match_info["schema"] + self._check_schema_exists(schema_type) + collection = f"template-{schema_type}" + + db_client = req.app["db_client"] + content = await self._get_data(req) + + operator = Operator(db_client) + + accession_id = await operator.create_metadata_object(collection, content) + + body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) + url = f"{req.scheme}://{req.host}{req.path}" + location_headers = CIMultiDict(Location=f"{url}/{accession_id}") + LOG.info(f"POST object with accesssion ID {accession_id} in schema {collection} was successful.") + return web.Response( + body=body, + status=201, + headers=location_headers, + content_type="application/json", + ) + + async def delete_template(self, req: Request) -> Response: + """Delete metadata object from database. + + :param req: DELETE request + :raises: HTTPUnauthorized if folder published + :raises: HTTPUnprocessableEntity if object does not belong to current user + :returns: HTTPNoContent response + """ + schema_type = req.match_info["schema"] + self._check_schema_exists(schema_type) + collection = f"template-{schema_type}" + + accession_id = req.match_info["accessionId"] + db_client = req.app["db_client"] + + await Operator(db_client).check_exists(collection, accession_id) + + await self._handle_check_ownedby_user(req, collection, accession_id) + + user_op = UserOperator(db_client) + current_user = get_session(req)["user_info"] + check_user = await user_op.check_user_has_doc(collection, current_user, accession_id) + if check_user: + await user_op.remove_objects(current_user, "templates", [accession_id]) + else: + reason = "This object does not seem to belong to any user." + LOG.error(reason) + raise web.HTTPUnprocessableEntity(reason=reason) + + accession_id = await Operator(db_client).delete_metadata_object(collection, accession_id) + + LOG.info(f"DELETE object with accession ID {accession_id} in schema {collection} was successful.") + return web.Response(status=204) + + class FolderAPIHandler(RESTAPIHandler): """API Handler for folders.""" @@ -744,9 +834,9 @@ def _check_patch_user(self, patch_ops: Any) -> None: :raises: HTTPUnauthorized if request tries to do anything else than add or replace :returns: None """ - _arrays = ["/drafts/-", "/folders/-"] + _arrays = ["/templates/-", "/folders/-"] _required_values = ["schema", "accessionId"] - _tags = re.compile("^/(drafts)/[0-9]*/(tags)$") + _tags = re.compile("^/(templates)/[0-9]*/(tags)$") for op in patch_ops: if _tags.match(op["path"]): LOG.info(f"{op['op']} on tags in folder") @@ -769,7 +859,7 @@ def _check_patch_user(self, patch_ops: Any) -> None: reason = "We only accept string folder IDs." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - if op["path"] == "/drafts/-": + if op["path"] == "/templates/-": _ops = op["value"] if isinstance(op["value"], list) else [op["value"]] for item in _ops: if not all(key in item.keys() for key in _required_values): @@ -790,7 +880,7 @@ async def get_user(self, req: Request) -> Response: :param req: GET request :raises: HTTPUnauthorized if not current user - :returns: JSON response containing user object or list of user drafts or user folders by id + :returns: JSON response containing user object or list of user templates or user folders by id """ user_id = req.match_info["userId"] if user_id != "current": @@ -801,7 +891,7 @@ async def get_user(self, req: Request) -> Response: item_type = req.query.get("items", "").lower() if item_type: - # Return only list of drafts or list of folder IDs owned by the user + # Return only list of templates or list of folder IDs owned by the user result, link_headers = await self._get_user_items(req, current_user, item_type) return web.Response( body=ujson.dumps(result, escape_forward_slashes=False), @@ -810,7 +900,7 @@ async def get_user(self, req: Request) -> Response: content_type="application/json", ) else: - # Return whole user object if drafts or folders are not specified in query + # Return whole user object if templates or folders are not specified in query db_client = req.app["db_client"] operator = UserOperator(db_client) user = await operator.read_user(current_user) @@ -870,7 +960,7 @@ async def delete_user(self, req: Request) -> Response: await obj_ops.delete_metadata_object(obj["schema"], obj["accessionId"]) await fold_ops.delete_folder(folder_id) - for tmpl in user["drafts"]: + for tmpl in user["templates"]: await obj_ops.delete_metadata_object(tmpl["schema"], tmpl["accessionId"]) await operator.delete_user(current_user) @@ -896,13 +986,13 @@ async def _get_user_items(self, req: Request, user: Dict, item_type: str) -> Tup :param req: GET request :param user: User object - :param item_type: Name of the items ("drafts" or "folders") + :param item_type: Name of the items ("templates" or "folders") :raises: HTTPUnauthorized if not current user :returns: Paginated list of user draft templates and link header """ # Check item_type parameter is not faulty - if item_type not in ["drafts", "folders"]: - reason = f"{item_type} is a faulty item parameter. Should be either folders or drafts" + if item_type not in ["templates", "folders"]: + reason = f"{item_type} is a faulty item parameter. Should be either folders or templates" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) diff --git a/metadata_backend/api/middlewares.py b/metadata_backend/api/middlewares.py index 657b8e96d..beaae4127 100644 --- a/metadata_backend/api/middlewares.py +++ b/metadata_backend/api/middlewares.py @@ -84,6 +84,7 @@ async def check_login(request: Request, handler: Callable) -> StreamResponse: controlled_paths = [ "/schemas", "/drafts", + "/templates", "/validate", "/publish", "/submit", diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 7535b15cf..dade951fd 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -843,7 +843,7 @@ def __init__(self, db_client: AsyncIOMotorClient) -> None: self.db_service = DBService(mongo_database, db_client) async def check_user_has_doc(self, collection: str, user_id: str, accession_id: str) -> bool: - """Check a folder/draft belongs to user. + """Check a folder/template belongs to user. :param collection: collection it belongs to, it would be used as path :param user_id: user_id from session @@ -852,8 +852,8 @@ async def check_user_has_doc(self, collection: str, user_id: str, accession_id: :returns: True if accession_id belongs to user """ try: - if collection.startswith("draft"): - user_query = {"drafts": {"$elemMatch": {"accessionId": accession_id}}, "userId": user_id} + if collection.startswith("template"): + user_query = {"templates": {"$elemMatch": {"accessionId": accession_id}}, "userId": user_id} else: user_query = {"folders": {"$elemMatch": {"$eq": accession_id}}, "userId": user_id} user_cursor = self.db_service.query("user", user_query) @@ -891,7 +891,7 @@ async def create_user(self, data: Tuple) -> str: LOG.info(f"User with identifier: {external_id} exists, no need to create.") return existing_user_id else: - user_data["drafts"] = [] + user_data["templates"] = [] user_data["folders"] = [] user_data["userId"] = user_id = self._generate_user_id() user_data["name"] = name @@ -1019,7 +1019,7 @@ async def assign_objects(self, user_id: str, collection: str, object_ids: List) async def remove_objects(self, user_id: str, collection: str, object_ids: List) -> None: """Remove object from user. - An object can be folder(s) or draft(s). + An object can be folder(s) or template(s). :param user_id: ID of user to update :param collection: collection where to remove the id from @@ -1031,8 +1031,8 @@ async def remove_objects(self, user_id: str, collection: str, object_ids: List) try: await self._check_user_exists(user_id) for obj in object_ids: - if collection == "drafts": - remove_content = {"drafts": {"accessionId": obj}} + if collection == "templates": + remove_content = {"templates": {"accessionId": obj}} else: remove_content = {"folders": obj} await self.db_service.remove("user", user_id, remove_content) diff --git a/metadata_backend/helpers/schemas/users.json b/metadata_backend/helpers/schemas/users.json index 0f021b1eb..aed6a5a4e 100644 --- a/metadata_backend/helpers/schemas/users.json +++ b/metadata_backend/helpers/schemas/users.json @@ -14,7 +14,7 @@ "type": "string", "title": "User Name" }, - "drafts": { + "templates": { "type": "array", "title": "User templates schema", "items": { @@ -42,7 +42,6 @@ "type": "string", "title": "Type of submission", "enum": [ - "XML", "Form" ] } diff --git a/metadata_backend/server.py b/metadata_backend/server.py index 49de9e973..5fa1ec96c 100644 --- a/metadata_backend/server.py +++ b/metadata_backend/server.py @@ -15,6 +15,7 @@ FolderAPIHandler, UserAPIHandler, ObjectAPIHandler, + TemplatesAPIHandler, ) from .api.auth import AccessHandler from .api.middlewares import http_error_handler, check_login @@ -64,35 +65,48 @@ async def init() -> web.Application: server.middlewares.append(http_error_handler) server.middlewares.append(check_login) - _handler = RESTAPIHandler() + _schema = RESTAPIHandler() _object = ObjectAPIHandler() _folder = FolderAPIHandler() _user = UserAPIHandler() _submission = SubmissionAPIHandler() + _template = TemplatesAPIHandler() api_routes = [ - web.get("/schemas", _handler.get_schema_types), - web.get("/schemas/{schema}", _handler.get_json_schema), - web.get("/objects/{schema}/{accessionId}", _object.get_object), - web.delete("/objects/{schema}/{accessionId}", _object.delete_object), + # retrieve schema and informations about it + web.get("/schemas", _schema.get_schema_types), + web.get("/schemas/{schema}", _schema.get_json_schema), + # metadata objects operations web.get("/objects/{schema}", _object.query_objects), web.post("/objects/{schema}", _object.post_object), + web.get("/objects/{schema}/{accessionId}", _object.get_object), web.put("/objects/{schema}/{accessionId}", _object.put_object), + web.patch("/objects/{schema}/{accessionId}", _object.patch_object), + web.delete("/objects/{schema}/{accessionId}", _object.delete_object), + # drafts objects operations + web.post("/drafts/{schema}", _object.post_object), web.get("/drafts/{schema}/{accessionId}", _object.get_object), web.put("/drafts/{schema}/{accessionId}", _object.put_object), web.patch("/drafts/{schema}/{accessionId}", _object.patch_object), - web.patch("/objects/{schema}/{accessionId}", _object.patch_object), web.delete("/drafts/{schema}/{accessionId}", _object.delete_object), - web.post("/drafts/{schema}", _object.post_object), + # template objects operations + web.post("/templates/{schema}", _template.post_template), + web.get("/templates/{schema}/{accessionId}", _template.get_template), + web.delete("/templates/{schema}/{accessionId}", _template.delete_template), + # folders/submissions operations web.get("/folders", _folder.get_folders), web.post("/folders", _folder.post_folder), web.get("/folders/{folderId}", _folder.get_folder), web.patch("/folders/{folderId}", _folder.patch_folder), web.delete("/folders/{folderId}", _folder.delete_folder), + # publish submissions web.patch("/publish/{folderId}", _folder.publish_folder), + # users operations web.get("/users/{userId}", _user.get_user), web.patch("/users/{userId}", _user.patch_user), web.delete("/users/{userId}", _user.delete_user), + # submit web.post("/submit", _submission.submit), + # validate web.post("/validate", _submission.validate), ] server.router.add_routes(api_routes) diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index f1b56c7aa..5d87bd695 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -12,7 +12,7 @@ import urllib import xml.etree.ElementTree as ET -import aiofiles +import aiofiles # type: ignore import aiohttp from aiohttp import FormData @@ -51,6 +51,7 @@ mock_auth_url = "http://localhost:8000" objects_url = f"{base_url}/objects" drafts_url = f"{base_url}/drafts" +templates_url = f"{base_url}/templates" folders_url = f"{base_url}/folders" users_url = f"{base_url}/users" submit_url = f"{base_url}/submit" @@ -287,6 +288,47 @@ async def delete_draft(sess, schema, draft_id): assert resp.status == 204, "HTTP Status code error" +async def post_template_json(sess, schema, filename): + """Post one metadata object within session, returns accessionId. + + :param sess: HTTP session in which request call is made + :param schema: name of the schema (folder) used for testing + :param filename: name of the file used for testing. + """ + request_data = await create_request_json_data(schema, filename) + async with sess.post(f"{templates_url}/{schema}", data=request_data) as resp: + LOG.debug(f"Adding new template object to {schema}, via JSON file {filename}") + assert resp.status == 201, "HTTP Status code error" + ans = await resp.json() + return ans["accessionId"] + + +async def get_template(sess, schema, template_id): + """Get and return a drafted metadata object. + + :param sess: HTTP session in which request call is made + :param schema: name of the schema (folder) used for testing + :param draft_id: id of the draft + """ + async with sess.get(f"{templates_url}/{schema}/{template_id}") as resp: + LOG.debug(f"Checking that {template_id} JSON exists") + assert resp.status == 200, "HTTP Status code error" + ans = await resp.json() + return json.dumps(ans) + + +async def delete_template(sess, schema, template_id): + """Delete metadata object within session. + + :param sess: HTTP session in which request call is made + :param schema: name of the schema (folder) used for testing + :param draft_id: id of the draft + """ + async with sess.delete(f"{templates_url}/{schema}/{template_id}") as resp: + LOG.debug(f"Deleting template object {template_id} from {schema}") + assert resp.status == 204, "HTTP Status code error" + + async def post_folder(sess, data): """Post one object folder within session, returns folderId. @@ -858,7 +900,7 @@ async def test_getting_paginated_folders(sess): async def test_getting_user_items(sess): - """Test querying user's drafts or folders in the user object with GET user request. + """Test querying user's templates or folders in the user object with GET user request. :param sess: HTTP session in which request call is made """ @@ -869,33 +911,33 @@ async def test_getting_user_items(sess): response = await resp.json() real_user_id = response["userId"] - # Patch user to have a draft - draft_id = await post_draft_json(sess, "study", "SRP000539.json") - patch_drafts_user = [ - {"op": "add", "path": "/drafts/-", "value": {"accessionId": draft_id, "schema": "draft-study"}} + # Patch user to have a templates + template_id = await post_template_json(sess, "study", "SRP000539.json") + patch_templates_user = [ + {"op": "add", "path": "/templates/-", "value": {"accessionId": template_id, "schema": "template-study"}} ] - await patch_user(sess, user_id, real_user_id, patch_drafts_user) + await patch_user(sess, user_id, real_user_id, patch_templates_user) # Test querying for list of user draft templates - async with sess.get(f"{users_url}/{user_id}?items=drafts") as resp: - LOG.debug(f"Reading user {user_id} drafts") + async with sess.get(f"{users_url}/{user_id}?items=templates") as resp: + LOG.debug(f"Reading user {user_id} templates") assert resp.status == 200, "HTTP Status code error" ans = await resp.json() assert ans["page"]["page"] == 1 assert ans["page"]["size"] == 5 assert ans["page"]["totalPages"] == 1 - assert ans["page"]["totalDrafts"] == 1 - assert len(ans["drafts"]) == 1 + assert ans["page"]["totalTemplates"] == 1 + assert len(ans["templates"]) == 1 - async with sess.get(f"{users_url}/{user_id}?items=drafts&per_page=3") as resp: - LOG.debug(f"Reading user {user_id} drafts") + async with sess.get(f"{users_url}/{user_id}?items=templates&per_page=3") as resp: + LOG.debug(f"Reading user {user_id} templates") assert resp.status == 200, "HTTP Status code error" ans = await resp.json() assert ans["page"]["page"] == 1 assert ans["page"]["size"] == 3 - assert len(ans["drafts"]) == 1 + assert len(ans["templates"]) == 1 - await delete_draft(sess, "study", draft_id) # Future tests will assume the drafts key is empty + await delete_template(sess, "study", template_id) # Future tests will assume the templates key is empty # Test querying for the list of folder IDs async with sess.get(f"{users_url}/{user_id}?items=folders") as resp: @@ -936,7 +978,7 @@ async def test_crud_users_works(sess): res = await resp.json() assert res["userId"] == real_user_id, "user id does not match" assert res["name"] == f"{test_user_given} {test_user_family}", "user name mismatch" - assert res["drafts"] == [], "user drafts content mismatch" + assert res["templates"] == [], "user templates content mismatch" assert folder_id in res["folders"], "folder added missing mismatch" folder_published = {"name": "Another test Folder", "description": "Test published folder does not get deleted"} @@ -961,22 +1003,22 @@ async def test_crud_users_works(sess): res = await resp.json() assert delete_folder_id not in res["folders"], "delete folder still exists at user" - draft_id = await post_draft_json(sess, "study", "SRP000539.json") - patch_drafts_user = [ - {"op": "add", "path": "/drafts/-", "value": {"accessionId": draft_id, "schema": "draft-study"}} + template_id = await post_template_json(sess, "study", "SRP000539.json") + patch_templates_user = [ + {"op": "add", "path": "/templates/-", "value": {"accessionId": template_id, "schema": "template-study"}} ] - await patch_user(sess, user_id, real_user_id, patch_drafts_user) + await patch_user(sess, user_id, real_user_id, patch_templates_user) async with sess.get(f"{users_url}/{user_id}") as resp: - LOG.debug(f"Checking that draft {draft_id} was added") + LOG.debug(f"Checking that template: {template_id} was added") res = await resp.json() - assert res["drafts"][0]["accessionId"] == draft_id, "draft added does not exists" + assert res["templates"][0]["accessionId"] == template_id, "added template does not exists" - await delete_draft(sess, "study", draft_id) + await delete_template(sess, "study", template_id) async with sess.get(f"{users_url}/{user_id}") as resp: - LOG.debug(f"Checking that draft {draft_id} was added") + LOG.debug(f"Checking that template {template_id} was added") res = await resp.json() - assert len(res["drafts"]) == 0, "draft was not deleted from users" + assert len(res["templates"]) == 0, "template was not deleted from users" # Delete user await delete_user(sess, user_id) diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 0b8e1d536..6db123428 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -57,7 +57,7 @@ async def setUpAsync(self): self.test_user = { "userId": self.user_id, "name": "tester", - "drafts": [], + "templates": [], "folders": ["FOL12345678"], } @@ -719,7 +719,7 @@ async def test_get_user_works(self): @unittest_run_loop async def test_get_user_drafts_with_no_drafts(self): """Test getting user drafts when user has no drafts.""" - response = await self.client.get("/users/current?items=drafts") + response = await self.client.get("/users/current?items=templates") self.assertEqual(response.status, 200) self.MockedUserOperator().filter_user.assert_called_once() json_resp = await response.json() @@ -728,19 +728,19 @@ async def test_get_user_drafts_with_no_drafts(self): "page": 1, "size": 5, "totalPages": 0, - "totalDrafts": 0, + "totalTemplates": 0, }, - "drafts": [], + "templates": [], } self.assertEqual(json_resp, result) @unittest_run_loop - async def test_get_user_drafts_with_1_draft(self): - """Test getting user drafts when user has 1 draft.""" + async def test_get_user_templates_with_1_template(self): + """Test getting user templates when user has 1 draft.""" user = self.test_user - user["drafts"].append(self.metadata_json) - self.MockedUserOperator().filter_user.return_value = (user["drafts"], 1) - response = await self.client.get("/users/current?items=drafts") + user["templates"].append(self.metadata_json) + self.MockedUserOperator().filter_user.return_value = (user["templates"], 1) + response = await self.client.get("/users/current?items=templates") self.assertEqual(response.status, 200) self.MockedUserOperator().filter_user.assert_called_once() json_resp = await response.json() @@ -749,9 +749,9 @@ async def test_get_user_drafts_with_1_draft(self): "page": 1, "size": 5, "totalPages": 1, - "totalDrafts": 1, + "totalTemplates": 1, }, - "drafts": [self.metadata_json], + "templates": [self.metadata_json], } self.assertEqual(json_resp, result) @@ -776,12 +776,12 @@ async def test_get_user_folder_list(self): @unittest_run_loop async def test_get_user_items_with_bad_param(self): - """Test that error is raised if items parameter in query is not drafts or folders.""" + """Test that error is raised if items parameter in query is not templates or folders.""" response = await self.client.get("/users/current?items=wrong_thing") self.assertEqual(response.status, 400) json_resp = await response.json() self.assertEqual( - json_resp["detail"], "wrong_thing is a faulty item parameter. Should be either folders or drafts" + json_resp["detail"], "wrong_thing is a faulty item parameter. Should be either folders or templates" ) @unittest_run_loop @@ -807,7 +807,7 @@ async def test_update_user_fails_with_wrong_key(self): async def test_update_user_passes(self): """Test that user object would update with correct keys.""" self.MockedUserOperator().update_user.return_value = self.user_id - data = [{"op": "add", "path": "/drafts/-", "value": [{"accessionId": "3", "schema": "sample"}]}] + data = [{"op": "add", "path": "/templates/-", "value": [{"accessionId": "3", "schema": "sample"}]}] response = await self.client.patch("/users/current", json=data) self.MockedUserOperator().update_user.assert_called_once() self.assertEqual(response.status, 200) diff --git a/tests/test_server.py b/tests/test_server.py index dd3e29f8b..603dfcfbd 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -43,7 +43,7 @@ async def test_init(self): async def test_api_routes_are_set(self): """Test correct amount of api (no frontend) routes is set.""" server = await self.get_application() - self.assertIs(len(server.router.resources()), 19) + self.assertIs(len(server.router.resources()), 18) @unittest_run_loop async def test_frontend_routes_are_set(self): diff --git a/tox.ini b/tox.ini index b724108db..1f1e93206 100644 --- a/tox.ini +++ b/tox.ini @@ -30,6 +30,7 @@ deps = -rrequirements.txt mypy types-python-dateutil + types-ujson # Mypy fails if 3rd party library doesn't have type hints configured. # Alternative to ignoring imports would be to write custom stub files, which # could be done at some point. From c69fc8aea0fdd8991eb7e5f13a82eaae44870ae7 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 3 Sep 2021 02:29:12 +0300 Subject: [PATCH 004/336] add requirements for ujson build --- Dockerfile | 4 ++-- requirements.txt | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index b317c8c81..c004c5f9e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,7 +18,7 @@ FROM python:3.8-alpine3.13 as BUILD-BACKEND RUN apk add --update \ && apk add --no-cache build-base curl-dev linux-headers bash git musl-dev libffi-dev \ - && apk add --no-cache python3-dev openssl-dev rust cargo \ + && apk add --no-cache python3-dev openssl-dev rust cargo libstdc++ \ && rm -rf /var/cache/apk/* COPY requirements.txt /root/submitter/requirements.txt @@ -34,7 +34,7 @@ RUN pip install --upgrade pip && \ FROM python:3.8-alpine3.13 -RUN apk add --no-cache --update bash +RUN apk add --no-cache --update libstdc++ LABEL maintainer="CSC Developers" LABEL org.label-schema.schema-version="1.0" diff --git a/requirements.txt b/requirements.txt index 191c5e38a..3f19aaf6e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ python-dateutil==2.8.2 uvloop==0.16.0 xmlschema==1.7.0 Authlib==0.15.4 +ujson==4.1.0 From 1d9d4d67a12f717e01e3eddbd1e7aeca05b9b050 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 3 Sep 2021 02:47:57 +0300 Subject: [PATCH 005/336] checking the draft was deleted after publication --- tests/integration/run_tests.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index 5d87bd695..fa600b265 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -197,7 +197,7 @@ async def post_draft_json(sess, schema, filename): return ans["accessionId"] -async def get_draft(sess, schema, draft_id): +async def get_draft(sess, schema, draft_id, expected_status=200): """Get and return a drafted metadata object. :param sess: HTTP session in which request call is made @@ -206,7 +206,7 @@ async def get_draft(sess, schema, draft_id): """ async with sess.get(f"{drafts_url}/{schema}/{draft_id}") as resp: LOG.debug(f"Checking that {draft_id} JSON exists") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == expected_status, "HTTP Status code error" ans = await resp.json() return json.dumps(ans) @@ -704,6 +704,9 @@ async def test_crud_folders_works(sess): # Publish the folder folder_id = await publish_folder(sess, folder_id) + + await get_draft(sess, "sample", draft_id, 404) # checking the draft was deleted after publication + async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was patched") res = await resp.json() From 3743f1fdf057c584402f6ded9514efce7c15edbd Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 3 Sep 2021 16:30:58 +0300 Subject: [PATCH 006/336] add patch & handling of multiple obje in templates --- metadata_backend/api/handlers.py | 50 ++++++++++++++- metadata_backend/api/operators.py | 4 +- metadata_backend/server.py | 1 + tests/integration/run_tests.py | 45 ++++++++----- tests/test_files/study/SRP000539_list.json | 73 ++++++++++++++++++++++ 5 files changed, 153 insertions(+), 20 deletions(-) create mode 100644 tests/test_files/study/SRP000539_list.json diff --git a/metadata_backend/api/handlers.py b/metadata_backend/api/handlers.py index f0bebf4ea..47837016c 100644 --- a/metadata_backend/api/handlers.py +++ b/metadata_backend/api/handlers.py @@ -517,11 +517,29 @@ async def post_template(self, req: Request) -> Response: db_client = req.app["db_client"] content = await self._get_data(req) + user_op = UserOperator(db_client) + current_user = get_session(req)["user_info"] + operator = Operator(db_client) - accession_id = await operator.create_metadata_object(collection, content) + if isinstance(content, list): + tmpl_list = [] + for tmpl in content: + accession_id = await operator.create_metadata_object(collection, tmpl) + await user_op.assign_objects( + current_user, "templates", [{"accessionId": accession_id, "schema": collection}] + ) + tmpl_list.append({"accessionId": accession_id}) + + body = ujson.dumps(tmpl_list, escape_forward_slashes=False) + else: + accession_id = await operator.create_metadata_object(collection, content) + await user_op.assign_objects( + current_user, "templates", [{"accessionId": accession_id, "schema": collection}] + ) + + body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) - body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) url = f"{req.scheme}://{req.host}{req.path}" location_headers = CIMultiDict(Location=f"{url}/{accession_id}") LOG.info(f"POST object with accesssion ID {accession_id} in schema {collection} was successful.") @@ -532,6 +550,34 @@ async def post_template(self, req: Request) -> Response: content_type="application/json", ) + async def patch_template(self, req: Request) -> Response: + """Update metadata object in database. + + :param req: PATCH request + :raises: HTTPUnauthorized if object is in published folder + :returns: JSON response containing accessionId for submitted object + """ + schema_type = req.match_info["schema"] + accession_id = req.match_info["accessionId"] + self._check_schema_exists(schema_type) + collection = f"template-{schema_type}" + + db_client = req.app["db_client"] + operator: Union[Operator, XMLOperator] + + content = await self._get_data(req) + operator = Operator(db_client) + + await operator.check_exists(collection, accession_id) + + await self._handle_check_ownedby_user(req, collection, accession_id) + + accession_id = await operator.update_metadata_object(collection, accession_id, content) + + body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) + LOG.info(f"PATCH object with accession ID {accession_id} in schema {collection} was successful.") + return web.Response(body=body, status=200, content_type="application/json") + async def delete_template(self, req: Request) -> Response: """Delete metadata object from database. diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index dade951fd..8c3d014eb 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -991,12 +991,12 @@ async def update_user(self, user_id: str, patch: List) -> str: async def assign_objects(self, user_id: str, collection: str, object_ids: List) -> None: """Assing object to user. - An object can be folder(s) or draft(s). + An object can be folder(s) or templates(s). :param user_id: ID of user to update :param collection: collection where to remove the id from :param object_ids: ID or list of IDs of folder(s) to assign - :raises: HTTPBadRequest if assigning drafts/folders to user was not successful + :raises: HTTPBadRequest if assigning templates/folders to user was not successful returns: None """ try: diff --git a/metadata_backend/server.py b/metadata_backend/server.py index 5fa1ec96c..c39d3364e 100644 --- a/metadata_backend/server.py +++ b/metadata_backend/server.py @@ -91,6 +91,7 @@ async def init() -> web.Application: # template objects operations web.post("/templates/{schema}", _template.post_template), web.get("/templates/{schema}/{accessionId}", _template.get_template), + web.patch("/templates/{schema}/{accessionId}", _template.patch_template), web.delete("/templates/{schema}/{accessionId}", _template.delete_template), # folders/submissions operations web.get("/folders", _folder.get_folders), diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index fa600b265..e351f35c7 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -300,7 +300,10 @@ async def post_template_json(sess, schema, filename): LOG.debug(f"Adding new template object to {schema}, via JSON file {filename}") assert resp.status == 201, "HTTP Status code error" ans = await resp.json() - return ans["accessionId"] + if isinstance(ans, list): + return ans + else: + return ans["accessionId"] async def get_template(sess, schema, template_id): @@ -308,7 +311,7 @@ async def get_template(sess, schema, template_id): :param sess: HTTP session in which request call is made :param schema: name of the schema (folder) used for testing - :param draft_id: id of the draft + :param template_id: id of the draft """ async with sess.get(f"{templates_url}/{schema}/{template_id}") as resp: LOG.debug(f"Checking that {template_id} JSON exists") @@ -317,12 +320,29 @@ async def get_template(sess, schema, template_id): return json.dumps(ans) +async def patch_template(sess, schema, template_id, update_filename): + """Patch one metadata object within session, return accessionId. + + :param sess: HTTP session in which request call is made + :param schema: name of the schema (folder) used for testing + :param template_id: id of the draft + :param update_filename: name of the file used to use for updating data. + """ + request_data = await create_request_json_data(schema, update_filename) + async with sess.patch(f"{templates_url}/{schema}/{template_id}", data=request_data) as resp: + LOG.debug(f"Update draft object in {schema}") + assert resp.status == 200, "HTTP Status code error" + ans_put = await resp.json() + assert ans_put["accessionId"] == template_id, "accession ID error" + return ans_put["accessionId"] + + async def delete_template(sess, schema, template_id): """Delete metadata object within session. :param sess: HTTP session in which request call is made :param schema: name of the schema (folder) used for testing - :param draft_id: id of the draft + :param template_id: id of the draft """ async with sess.delete(f"{templates_url}/{schema}/{template_id}") as resp: LOG.debug(f"Deleting template object {template_id} from {schema}") @@ -911,15 +931,9 @@ async def test_getting_user_items(sess): async with sess.get(f"{users_url}/{user_id}") as resp: LOG.debug(f"Reading user {user_id}") assert resp.status == 200, "HTTP Status code error" - response = await resp.json() - real_user_id = response["userId"] - # Patch user to have a templates + # Add template to user template_id = await post_template_json(sess, "study", "SRP000539.json") - patch_templates_user = [ - {"op": "add", "path": "/templates/-", "value": {"accessionId": template_id, "schema": "template-study"}} - ] - await patch_user(sess, user_id, real_user_id, patch_templates_user) # Test querying for list of user draft templates async with sess.get(f"{users_url}/{user_id}?items=templates") as resp: @@ -974,8 +988,7 @@ async def test_crud_users_works(sess): # Add user to session and create a patch to add folder to user folder_not_published = {"name": "Mock User Folder", "description": "Mock folder for testing users"} folder_id = await post_folder(sess, folder_not_published) - patch_add_folder = [{"op": "add", "path": "/folders/-", "value": [folder_id]}] - await patch_user(sess, user_id, real_user_id, patch_add_folder) + async with sess.get(f"{users_url}/{user_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was added") res = await resp.json() @@ -1007,10 +1020,7 @@ async def test_crud_users_works(sess): assert delete_folder_id not in res["folders"], "delete folder still exists at user" template_id = await post_template_json(sess, "study", "SRP000539.json") - patch_templates_user = [ - {"op": "add", "path": "/templates/-", "value": {"accessionId": template_id, "schema": "template-study"}} - ] - await patch_user(sess, user_id, real_user_id, patch_templates_user) + await patch_template(sess, "study", template_id, "patch.json") async with sess.get(f"{users_url}/{user_id}") as resp: LOG.debug(f"Checking that template: {template_id} was added") res = await resp.json() @@ -1023,6 +1033,9 @@ async def test_crud_users_works(sess): res = await resp.json() assert len(res["templates"]) == 0, "template was not deleted from users" + template_ids = await post_template_json(sess, "study", "SRP000539_list.json") + assert len(template_ids) == 2, "templates could not be added as batch" + # Delete user await delete_user(sess, user_id) # 401 means API is innacessible thus session ended diff --git a/tests/test_files/study/SRP000539_list.json b/tests/test_files/study/SRP000539_list.json new file mode 100644 index 000000000..b852eed88 --- /dev/null +++ b/tests/test_files/study/SRP000539_list.json @@ -0,0 +1,73 @@ +[{ + "centerName": "GEO", + "alias": "GSE10966", + "identifiers": { + "primaryId": "SRP000539", + "externalId": [ + { + "namespace": "BioProject", + "label": "primary", + "value": "PRJNA108793" + }, + { + "namespace": "GEO", + "value": "GSE10966" + } + ] + }, + "descriptor": { + "studyTitle": "Highly integrated epigenome maps in Arabidopsis - whole genome shotgun bisulfite sequencing", + "studyType": "Other", + "studyAbstract": "Part of a set of highly integrated epigenome maps for Arabidopsis thaliana. Keywords: Illumina high-throughput bisulfite sequencing Overall design: Whole genome shotgun bisulfite sequencing of wildtype Arabidopsis plants (Columbia-0), and met1, drm1 drm2 cmt3, and ros1 dml2 dml3 null mutants using the Illumina Genetic Analyzer.", + "centerProjectName": "GSE10966" + }, + "studyLinks": [ + { + "xrefDb": "pubmed", + "xrefId": "18423832" + } + ], + "studyAttributes": [ + { + "tag": "parent_bioproject", + "value": "PRJNA107265" + } + ] + }, { + "centerName": "GEO", + "alias": "GSE10967", + "identifiers": { + "primaryId": "SRP000538", + "externalId": [ + { + "namespace": "BioProject", + "label": "primary", + "value": "PRJNA108793" + }, + { + "namespace": "GEO", + "value": "GSE10966" + } + ] + }, + "descriptor": { + "studyTitle": "Highly integrated epigenome maps in Arabidopsis - whole genome shotgun bisulfite sequencing", + "studyType": "Other", + "studyAbstract": "Part of a set of highly integrated epigenome maps for Arabidopsis thaliana. Keywords: Illumina high-throughput bisulfite sequencing Overall design: Whole genome shotgun bisulfite sequencing of wildtype Arabidopsis plants (Columbia-0), and met1, drm1 drm2 cmt3, and ros1 dml2 dml3 null mutants using the Illumina Genetic Analyzer.", + "centerProjectName": "GSE10966" + }, + "studyLinks": [ + { + "xrefDb": "pubmed", + "xrefId": "18423832" + } + ], + "studyAttributes": [ + { + "tag": "parent_bioproject", + "value": "PRJNA107265" + } + ] + } + + ] From 8f9824e543be92a01f688312d9452aef7a1af965 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 6 Sep 2021 09:04:38 +0000 Subject: [PATCH 007/336] Bump xmlschema from 1.7.0 to 1.7.1 Bumps [xmlschema](https://github.com/sissaschool/xmlschema) from 1.7.0 to 1.7.1. - [Release notes](https://github.com/sissaschool/xmlschema/releases) - [Changelog](https://github.com/sissaschool/xmlschema/blob/master/CHANGELOG.rst) - [Commits](https://github.com/sissaschool/xmlschema/compare/v1.7.0...v1.7.1) --- updated-dependencies: - dependency-name: xmlschema dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 191c5e38a..7501b05e5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,5 +5,5 @@ jsonschema==3.2.0 motor==2.5.1 python-dateutil==2.8.2 uvloop==0.16.0 -xmlschema==1.7.0 +xmlschema==1.7.1 Authlib==0.15.4 From e580b1da4678c47a7fc2bb8a214bff642130d985 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 20 Sep 2021 09:03:50 +0000 Subject: [PATCH 008/336] Bump sphinx-rtd-theme from 0.5.2 to 1.0.0 Bumps [sphinx-rtd-theme](https://github.com/readthedocs/sphinx_rtd_theme) from 0.5.2 to 1.0.0. - [Release notes](https://github.com/readthedocs/sphinx_rtd_theme/releases) - [Changelog](https://github.com/readthedocs/sphinx_rtd_theme/blob/master/docs/changelog.rst) - [Commits](https://github.com/readthedocs/sphinx_rtd_theme/compare/0.5.2...1.0.0) --- updated-dependencies: - dependency-name: sphinx-rtd-theme dependency-type: direct:development update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b336b3302..ea019f6b9 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ install_requires=requirements, extras_require={ "test": ["coverage==5.5", "coveralls==3.2.0", "pytest==6.2.4", "pytest-cov==2.12.1", "tox==3.24.3"], - "docs": ["sphinx >= 1.4", "sphinx_rtd_theme==0.5.2"], + "docs": ["sphinx >= 1.4", "sphinx_rtd_theme==1.0.0"], }, package_data={ "": [ From 15a461b0dca6597c134789f3cab30c365efe079d Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 27 Sep 2021 14:06:57 +0300 Subject: [PATCH 009/336] fix type for parser --- metadata_backend/helpers/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata_backend/helpers/parser.py b/metadata_backend/helpers/parser.py index ed8177412..efdfe54b7 100644 --- a/metadata_backend/helpers/parser.py +++ b/metadata_backend/helpers/parser.py @@ -76,7 +76,7 @@ def _flatten(self, data: Any) -> Union[Dict, List, str, None]: refs = ["analysisRef", "sampleRef", "runRef", "experimentRef"] - children = self.dict() + children: Any = self.dict() for key, value, _ in self.map_content(data.content): key = self._to_camel(key.lower()) From d5eaf327c3967be8c4174fc1dd1dd72020c5ad6e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 Sep 2021 11:09:26 +0000 Subject: [PATCH 010/336] Bump pytest from 6.2.4 to 6.2.5 Bumps [pytest](https://github.com/pytest-dev/pytest) from 6.2.4 to 6.2.5. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/6.2.4...6.2.5) --- updated-dependencies: - dependency-name: pytest dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ea019f6b9..1c2636e6b 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ packages=find_packages(exclude=["tests"]), install_requires=requirements, extras_require={ - "test": ["coverage==5.5", "coveralls==3.2.0", "pytest==6.2.4", "pytest-cov==2.12.1", "tox==3.24.3"], + "test": ["coverage==5.5", "coveralls==3.2.0", "pytest==6.2.5", "pytest-cov==2.12.1", "tox==3.24.3"], "docs": ["sphinx >= 1.4", "sphinx_rtd_theme==1.0.0"], }, package_data={ From 598fa2dfdbff46238ef8521a93f9cb313ba5bc70 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 Sep 2021 11:20:24 +0000 Subject: [PATCH 011/336] Bump tox from 3.24.3 to 3.24.4 Bumps [tox](https://github.com/tox-dev/tox) from 3.24.3 to 3.24.4. - [Release notes](https://github.com/tox-dev/tox/releases) - [Changelog](https://github.com/tox-dev/tox/blob/master/docs/changelog.rst) - [Commits](https://github.com/tox-dev/tox/compare/3.24.3...3.24.4) --- updated-dependencies: - dependency-name: tox dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1c2636e6b..2af26d70d 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ packages=find_packages(exclude=["tests"]), install_requires=requirements, extras_require={ - "test": ["coverage==5.5", "coveralls==3.2.0", "pytest==6.2.5", "pytest-cov==2.12.1", "tox==3.24.3"], + "test": ["coverage==5.5", "coveralls==3.2.0", "pytest==6.2.5", "pytest-cov==2.12.1", "tox==3.24.4"], "docs": ["sphinx >= 1.4", "sphinx_rtd_theme==1.0.0"], }, package_data={ From 5356321c8325f679ed3121a8c9aa9c0fd32dae0e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 4 Oct 2021 09:04:20 +0000 Subject: [PATCH 012/336] Bump cryptography from 3.4.8 to 35.0.0 Bumps [cryptography](https://github.com/pyca/cryptography) from 3.4.8 to 35.0.0. - [Release notes](https://github.com/pyca/cryptography/releases) - [Changelog](https://github.com/pyca/cryptography/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pyca/cryptography/compare/3.4.8...35.0.0) --- updated-dependencies: - dependency-name: cryptography dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 7501b05e5..3a055b0e1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ aiohttp==3.7.4.post0 -cryptography==3.4.8 +cryptography==35.0.0 gunicorn==20.1.0 jsonschema==3.2.0 motor==2.5.1 From 6983909d8873fb3751fcc4712f9b39b39a4e1060 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 4 Oct 2021 09:04:26 +0000 Subject: [PATCH 013/336] Bump xmlschema from 1.7.1 to 1.8.0 Bumps [xmlschema](https://github.com/sissaschool/xmlschema) from 1.7.1 to 1.8.0. - [Release notes](https://github.com/sissaschool/xmlschema/releases) - [Changelog](https://github.com/sissaschool/xmlschema/blob/master/CHANGELOG.rst) - [Commits](https://github.com/sissaschool/xmlschema/compare/v1.7.1...v1.8.0) --- updated-dependencies: - dependency-name: xmlschema dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 7501b05e5..d500db239 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,5 +5,5 @@ jsonschema==3.2.0 motor==2.5.1 python-dateutil==2.8.2 uvloop==0.16.0 -xmlschema==1.7.1 +xmlschema==1.8.0 Authlib==0.15.4 From 75219722601e4093dfd0f2e03747c6bb1169c1a8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 4 Oct 2021 09:04:30 +0000 Subject: [PATCH 014/336] Bump pytest-cov from 2.12.1 to 3.0.0 Bumps [pytest-cov](https://github.com/pytest-dev/pytest-cov) from 2.12.1 to 3.0.0. - [Release notes](https://github.com/pytest-dev/pytest-cov/releases) - [Changelog](https://github.com/pytest-dev/pytest-cov/blob/master/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest-cov/compare/v2.12.1...v3.0.0) --- updated-dependencies: - dependency-name: pytest-cov dependency-type: direct:development update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2af26d70d..47aacd31f 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ packages=find_packages(exclude=["tests"]), install_requires=requirements, extras_require={ - "test": ["coverage==5.5", "coveralls==3.2.0", "pytest==6.2.5", "pytest-cov==2.12.1", "tox==3.24.4"], + "test": ["coverage==5.5", "coveralls==3.2.0", "pytest==6.2.5", "pytest-cov==3.0.0", "tox==3.24.4"], "docs": ["sphinx >= 1.4", "sphinx_rtd_theme==1.0.0"], }, package_data={ From 7402015c31b915af49f816277115b2735b1b4420 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 4 Oct 2021 15:29:53 +0300 Subject: [PATCH 015/336] typing issues fixes for xmlschema bump to 1.8.0 --- metadata_backend/helpers/parser.py | 25 +++++++++++++++++-------- metadata_backend/helpers/validator.py | 7 ++++--- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/metadata_backend/helpers/parser.py b/metadata_backend/helpers/parser.py index efdfe54b7..cab1b90c0 100644 --- a/metadata_backend/helpers/parser.py +++ b/metadata_backend/helpers/parser.py @@ -1,7 +1,7 @@ """Tool to parse XML files to JSON.""" import re -from typing import Any, Dict, List, Union +from typing import Any, Dict, List, Optional, Type, Union from aiohttp import web from xmlschema import XMLSchema, XMLSchemaConverter, XMLSchemaException, XsdElement, XsdType @@ -21,7 +21,13 @@ class MetadataXMLConverter(XMLSchemaConverter): https://github.com/enasequence/schema/tree/master/src/main/resources/uk/ac/ebi/ena/sra/schema """ - def __init__(self, namespaces: Any = None, dict_class: dict = None, list_class: list = None, **kwargs: Any) -> None: + def __init__( + self, + namespaces: Any = None, + dict_class: Optional[Type[Dict[str, Any]]] = None, + list_class: Optional[Type[List[Any]]] = None, + **kwargs: Any, + ) -> None: """Initialize converter and settings. :param namespaces: Map from namespace prefixes to URI. @@ -280,12 +286,15 @@ def parse(self, schema_type: str, content: str) -> Dict: reason = "Current request could not be processed as the submitted file was not valid" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - result = schema.to_dict(content, converter=MetadataXMLConverter, decimal_type=float, dict_class=dict)[ - schema_type.lower() - ] - if schema_type.lower() != "submission": - JSONValidator(result, schema_type.lower()).validate - return result + # result is of type: + # Union[Any, List[Any], Tuple[None, List[XMLSchemaValidationError]], + # Tuple[Any, List[XMLSchemaValidationError]], Tuple[List[Any], List[XMLSchemaValidationError]]] + # however we expect any type as it is easier to work with + result: Any = schema.to_dict(content, converter=MetadataXMLConverter, decimal_type=float, dict_class=dict) + _schema_type: str = schema_type.lower() + if _schema_type != "submission": + JSONValidator(result[_schema_type], _schema_type).validate + return result[_schema_type] @staticmethod def _load_schema(schema_type: str) -> XMLSchema: diff --git a/metadata_backend/helpers/validator.py b/metadata_backend/helpers/validator.py index 4e969b2b7..05d9fec22 100644 --- a/metadata_backend/helpers/validator.py +++ b/metadata_backend/helpers/validator.py @@ -3,7 +3,7 @@ import json import re from io import StringIO -from typing import Any, Dict +from typing import Any, Dict, cast from urllib.error import URLError from aiohttp import web @@ -52,8 +52,9 @@ def resp_body(self) -> str: except XMLSchemaValidationError as error: # Parse reason and instance from the validation error message - reason = error.reason - instance = ElementTree.tostring(error.elem, encoding="unicode") + reason = str(error.reason) + _elem = cast(ElementTree.Element, error.elem) + instance = ElementTree.tostring(_elem, encoding="unicode") # Replace element address in reason with instance element if "<" and ">" in reason: instance_parent = "".join((instance.split(">")[0], ">")) From d45408b850bb7249ec756848db0f68cb01cc1836 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 4 Oct 2021 15:48:57 +0300 Subject: [PATCH 016/336] move fixed coveralls submission to actions --- .github/workflows/unit.yml | 7 ++++--- tests/coveralls.py | 15 --------------- tox.ini | 3 +-- 3 files changed, 5 insertions(+), 20 deletions(-) delete mode 100644 tests/coveralls.py diff --git a/.github/workflows/unit.yml b/.github/workflows/unit.yml index a1dd9597d..604444476 100644 --- a/.github/workflows/unit.yml +++ b/.github/workflows/unit.yml @@ -27,7 +27,8 @@ jobs: python -m pip install --upgrade pip pip install tox tox-gh-actions - name: Run unit tests for python 3.8 - if: ${{ matrix.python-version == '3.8' }} - env: - COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} run: tox -e py38 + - name: Send coverage to coveralls + uses: coverallsapp/github-action@master + with: + github-token: ${{ secrets.github_token }} diff --git a/tests/coveralls.py b/tests/coveralls.py deleted file mode 100644 index 10ee5e278..000000000 --- a/tests/coveralls.py +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/env/python - -"""Coveralls settings for travis and local usage.""" - -import os -import sys -from subprocess import call - -if __name__ == "__main__": - if "COVERALLS_REPO_TOKEN" in os.environ: - rc = call("coveralls") - sys.stdout.write("Coveralls report from Github Actions.\n") - raise SystemExit(rc) - else: - sys.stdout.write("Not on Github Actions.\n") diff --git a/tox.ini b/tox.ini index b724108db..cc9c00a4b 100644 --- a/tox.ini +++ b/tox.ini @@ -54,8 +54,7 @@ deps = .[test] -rrequirements.txt # Stop after first failure -commands = py.test -x --cov=metadata_backend tests/ - python {toxinidir}/tests/coveralls.py +commands = py.test -x --cov=metadata_backend --cov-report xml --cov-report term tests/ [gh-actions] python = From 4229a8444d8f9a9da4d11cfe905be826590e3193 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 4 Oct 2021 15:49:43 +0300 Subject: [PATCH 017/336] add path to coverage report --- .github/workflows/unit.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/unit.yml b/.github/workflows/unit.yml index 604444476..6f946c049 100644 --- a/.github/workflows/unit.yml +++ b/.github/workflows/unit.yml @@ -32,3 +32,4 @@ jobs: uses: coverallsapp/github-action@master with: github-token: ${{ secrets.github_token }} + path-to-lcov: coverage.xml From f896ed87156c5a5bdfd9b6d8d14142b7901aec3b Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 4 Oct 2021 16:02:10 +0300 Subject: [PATCH 018/336] make use of coveragepy-lcov to convert to lcov --- .github/workflows/unit.yml | 8 +++++--- setup.py | 2 +- tox.ini | 1 - 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/unit.yml b/.github/workflows/unit.yml index 6f946c049..74022bb20 100644 --- a/.github/workflows/unit.yml +++ b/.github/workflows/unit.yml @@ -25,11 +25,13 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install tox tox-gh-actions + pip install tox tox-gh-actions coveragepy-lcov - name: Run unit tests for python 3.8 - run: tox -e py38 + run: | + tox -e py38 + coveragepy-lcov --output_file_path lcov.info - name: Send coverage to coveralls uses: coverallsapp/github-action@master with: github-token: ${{ secrets.github_token }} - path-to-lcov: coverage.xml + path-to-lcov: lcov.info diff --git a/setup.py b/setup.py index 47aacd31f..21e2950c6 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ packages=find_packages(exclude=["tests"]), install_requires=requirements, extras_require={ - "test": ["coverage==5.5", "coveralls==3.2.0", "pytest==6.2.5", "pytest-cov==3.0.0", "tox==3.24.4"], + "test": ["coverage==5.5", "pytest==6.2.5", "pytest-cov==3.0.0", "tox==3.24.4"], "docs": ["sphinx >= 1.4", "sphinx_rtd_theme==1.0.0"], }, package_data={ diff --git a/tox.ini b/tox.ini index cc9c00a4b..84414d445 100644 --- a/tox.ini +++ b/tox.ini @@ -49,7 +49,6 @@ deps = commands = bandit -r metadata_backend/ [testenv] -passenv = COVERALLS_REPO_TOKEN deps = .[test] -rrequirements.txt From 83b9791d0f7c793fca126e862e49d1416e4ac1ee Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 4 Oct 2021 16:15:14 +0300 Subject: [PATCH 019/336] no need to specify output of coverage, use default --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 84414d445..8159d3ddd 100644 --- a/tox.ini +++ b/tox.ini @@ -53,7 +53,7 @@ deps = .[test] -rrequirements.txt # Stop after first failure -commands = py.test -x --cov=metadata_backend --cov-report xml --cov-report term tests/ +commands = py.test -x --cov=metadata_backend tests/ [gh-actions] python = From d0bbb944aa8f895d67ee1ec91531490496698204 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 4 Oct 2021 16:29:28 +0300 Subject: [PATCH 020/336] add test for template tags --- tests/integration/run_tests.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index e351f35c7..fe840450d 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -1008,6 +1008,7 @@ async def test_crud_users_works(sess): folder_not_published = {"name": "Delete Folder", "description": "Mock folder to delete while testing users"} delete_folder_id = await post_folder(sess, folder_not_published) patch_delete_folder = [{"op": "add", "path": "/folders/-", "value": [delete_folder_id]}] + await patch_user(sess, user_id, real_user_id, patch_delete_folder) async with sess.get(f"{users_url}/{user_id}") as resp: LOG.debug(f"Checking that folder {delete_folder_id} was added") @@ -1025,6 +1026,22 @@ async def test_crud_users_works(sess): LOG.debug(f"Checking that template: {template_id} was added") res = await resp.json() assert res["templates"][0]["accessionId"] == template_id, "added template does not exists" + assert "tags" not in res["templates"][0] + + patch_change_tags_object = [ + { + "op": "add", + "path": "/templates/0/tags", + "value": {"displaTitle": "Test"}, + } + ] + await patch_user(sess, user_id, real_user_id, patch_change_tags_object) + + async with sess.get(f"{users_url}/{user_id}") as resp: + LOG.debug(f"Checking that template: {template_id} was added") + res = await resp.json() + assert res["templates"][0]["accessionId"] == template_id, "added template does not exists" + assert res["templates"][0]["tags"]["displaTitle"] == "Test" await delete_template(sess, "study", template_id) From 242d3d9bf621c4ee3c075c76ad2cbcd973555ced Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 4 Oct 2021 13:32:23 +0000 Subject: [PATCH 021/336] Bump jsonschema from 3.2.0 to 4.0.1 Bumps [jsonschema](https://github.com/Julian/jsonschema) from 3.2.0 to 4.0.1. - [Release notes](https://github.com/Julian/jsonschema/releases) - [Changelog](https://github.com/Julian/jsonschema/blob/main/CHANGELOG.rst) - [Commits](https://github.com/Julian/jsonschema/compare/v3.2.0...v4.0.1) --- updated-dependencies: - dependency-name: jsonschema dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 2296a8d63..0fedf5ad1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ aiohttp==3.7.4.post0 cryptography==35.0.0 gunicorn==20.1.0 -jsonschema==3.2.0 +jsonschema==4.0.1 motor==2.5.1 python-dateutil==2.8.2 uvloop==0.16.0 From 9dcf0f9893f763626ac68500812c61326e1671c5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 4 Oct 2021 13:32:29 +0000 Subject: [PATCH 022/336] Bump coverage from 5.5 to 6.0 Bumps [coverage](https://github.com/nedbat/coveragepy) from 5.5 to 6.0. - [Release notes](https://github.com/nedbat/coveragepy/releases) - [Changelog](https://github.com/nedbat/coveragepy/blob/master/CHANGES.rst) - [Commits](https://github.com/nedbat/coveragepy/compare/coverage-5.5...6.0) --- updated-dependencies: - dependency-name: coverage dependency-type: direct:development update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 21e2950c6..34172553d 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ packages=find_packages(exclude=["tests"]), install_requires=requirements, extras_require={ - "test": ["coverage==5.5", "pytest==6.2.5", "pytest-cov==3.0.0", "tox==3.24.4"], + "test": ["coverage==6.0", "pytest==6.2.5", "pytest-cov==3.0.0", "tox==3.24.4"], "docs": ["sphinx >= 1.4", "sphinx_rtd_theme==1.0.0"], }, package_data={ From 15b93b914d539bac996181b376a17b4c5496ed44 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 4 Oct 2021 17:30:38 +0300 Subject: [PATCH 023/336] add option for tags when POSTing templates --- metadata_backend/api/handlers.py | 28 ++-- tests/integration/run_tests.py | 4 +- tests/test_files/study/SRP000539_list.json | 122 +++++++++--------- .../test_files/study/SRP000539_template.json | 38 ++++++ 4 files changed, 123 insertions(+), 69 deletions(-) create mode 100644 tests/test_files/study/SRP000539_template.json diff --git a/metadata_backend/api/handlers.py b/metadata_backend/api/handlers.py index 47837016c..2f68b18d8 100644 --- a/metadata_backend/api/handlers.py +++ b/metadata_backend/api/handlers.py @@ -524,19 +524,29 @@ async def post_template(self, req: Request) -> Response: if isinstance(content, list): tmpl_list = [] - for tmpl in content: - accession_id = await operator.create_metadata_object(collection, tmpl) - await user_op.assign_objects( - current_user, "templates", [{"accessionId": accession_id, "schema": collection}] - ) + for num, tmpl in enumerate(content): + if "template" not in tmpl: + reason = f"template key is missing from request body for element: {num}." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + accession_id = await operator.create_metadata_object(collection, tmpl["template"]) + data = [{"accessionId": accession_id, "schema": collection}] + if "tags" in tmpl: + data[0]["tags"] = tmpl["tags"] + await user_op.assign_objects(current_user, "templates", data) tmpl_list.append({"accessionId": accession_id}) body = ujson.dumps(tmpl_list, escape_forward_slashes=False) else: - accession_id = await operator.create_metadata_object(collection, content) - await user_op.assign_objects( - current_user, "templates", [{"accessionId": accession_id, "schema": collection}] - ) + if "template" not in content: + reason = "template key is missing from request body." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + accession_id = await operator.create_metadata_object(collection, content["template"]) + data = [{"accessionId": accession_id, "schema": collection}] + if "tags" in content: + data[0]["tags"] = content["tags"] + await user_op.assign_objects(current_user, "templates", data) body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index fe840450d..45ed1bc11 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -933,7 +933,7 @@ async def test_getting_user_items(sess): assert resp.status == 200, "HTTP Status code error" # Add template to user - template_id = await post_template_json(sess, "study", "SRP000539.json") + template_id = await post_template_json(sess, "study", "SRP000539_template.json") # Test querying for list of user draft templates async with sess.get(f"{users_url}/{user_id}?items=templates") as resp: @@ -1020,7 +1020,7 @@ async def test_crud_users_works(sess): res = await resp.json() assert delete_folder_id not in res["folders"], "delete folder still exists at user" - template_id = await post_template_json(sess, "study", "SRP000539.json") + template_id = await post_template_json(sess, "study", "SRP000539_template.json") await patch_template(sess, "study", template_id, "patch.json") async with sess.get(f"{users_url}/{user_id}") as resp: LOG.debug(f"Checking that template: {template_id} was added") diff --git a/tests/test_files/study/SRP000539_list.json b/tests/test_files/study/SRP000539_list.json index b852eed88..80f55400a 100644 --- a/tests/test_files/study/SRP000539_list.json +++ b/tests/test_files/study/SRP000539_list.json @@ -1,73 +1,79 @@ -[{ - "centerName": "GEO", - "alias": "GSE10966", - "identifiers": { - "primaryId": "SRP000539", - "externalId": [ - { - "namespace": "BioProject", - "label": "primary", - "value": "PRJNA108793" - }, - { - "namespace": "GEO", - "value": "GSE10966" - } - ] - }, - "descriptor": { - "studyTitle": "Highly integrated epigenome maps in Arabidopsis - whole genome shotgun bisulfite sequencing", - "studyType": "Other", - "studyAbstract": "Part of a set of highly integrated epigenome maps for Arabidopsis thaliana. Keywords: Illumina high-throughput bisulfite sequencing Overall design: Whole genome shotgun bisulfite sequencing of wildtype Arabidopsis plants (Columbia-0), and met1, drm1 drm2 cmt3, and ros1 dml2 dml3 null mutants using the Illumina Genetic Analyzer.", - "centerProjectName": "GSE10966" - }, - "studyLinks": [ +[ + { + "template": { + "centerName": "GEO", + "alias": "GSE10966", + "identifiers": { + "primaryId": "SRP000539", + "externalId": [ + { + "namespace": "BioProject", + "label": "primary", + "value": "PRJNA108793" + }, + { + "namespace": "GEO", + "value": "GSE10966" + } + ] + }, + "descriptor": { + "studyTitle": "Highly integrated epigenome maps in Arabidopsis - whole genome shotgun bisulfite sequencing", + "studyType": "Other", + "studyAbstract": "Part of a set of highly integrated epigenome maps for Arabidopsis thaliana. Keywords: Illumina high-throughput bisulfite sequencing Overall design: Whole genome shotgun bisulfite sequencing of wildtype Arabidopsis plants (Columbia-0), and met1, drm1 drm2 cmt3, and ros1 dml2 dml3 null mutants using the Illumina Genetic Analyzer.", + "centerProjectName": "GSE10966" + }, + "studyLinks": [ { "xrefDb": "pubmed", "xrefId": "18423832" } ], - "studyAttributes": [ - { - "tag": "parent_bioproject", - "value": "PRJNA107265" - } - ] - }, { - "centerName": "GEO", - "alias": "GSE10967", - "identifiers": { - "primaryId": "SRP000538", - "externalId": [ + "studyAttributes": [ { - "namespace": "BioProject", - "label": "primary", - "value": "PRJNA108793" - }, - { - "namespace": "GEO", - "value": "GSE10966" + "tag": "parent_bioproject", + "value": "PRJNA107265" } ] }, - "descriptor": { - "studyTitle": "Highly integrated epigenome maps in Arabidopsis - whole genome shotgun bisulfite sequencing", - "studyType": "Other", - "studyAbstract": "Part of a set of highly integrated epigenome maps for Arabidopsis thaliana. Keywords: Illumina high-throughput bisulfite sequencing Overall design: Whole genome shotgun bisulfite sequencing of wildtype Arabidopsis plants (Columbia-0), and met1, drm1 drm2 cmt3, and ros1 dml2 dml3 null mutants using the Illumina Genetic Analyzer.", - "centerProjectName": "GSE10966" - }, - "studyLinks": [ + "tags": {"Submission": "Form"} + }, + { + "template": { + "centerName": "GEO", + "alias": "GSE10967", + "identifiers": { + "primaryId": "SRP000538", + "externalId": [ + { + "namespace": "BioProject", + "label": "primary", + "value": "PRJNA108793" + }, + { + "namespace": "GEO", + "value": "GSE10966" + } + ] + }, + "descriptor": { + "studyTitle": "Highly integrated epigenome maps in Arabidopsis - whole genome shotgun bisulfite sequencing", + "studyType": "Other", + "studyAbstract": "Part of a set of highly integrated epigenome maps for Arabidopsis thaliana. Keywords: Illumina high-throughput bisulfite sequencing Overall design: Whole genome shotgun bisulfite sequencing of wildtype Arabidopsis plants (Columbia-0), and met1, drm1 drm2 cmt3, and ros1 dml2 dml3 null mutants using the Illumina Genetic Analyzer.", + "centerProjectName": "GSE10966" + }, + "studyLinks": [ { "xrefDb": "pubmed", "xrefId": "18423832" } ], - "studyAttributes": [ - { - "tag": "parent_bioproject", - "value": "PRJNA107265" - } - ] + "studyAttributes": [ + { + "tag": "parent_bioproject", + "value": "PRJNA107265" + } + ] + } } - - ] +] diff --git a/tests/test_files/study/SRP000539_template.json b/tests/test_files/study/SRP000539_template.json new file mode 100644 index 000000000..79be5fe58 --- /dev/null +++ b/tests/test_files/study/SRP000539_template.json @@ -0,0 +1,38 @@ +{ + "template": { + "centerName": "GEO", + "alias": "GSE10966", + "identifiers": { + "primaryId": "SRP000539", + "externalId": [ + { + "namespace": "BioProject", + "label": "primary", + "value": "PRJNA108793" + }, + { + "namespace": "GEO", + "value": "GSE10966" + } + ] + }, + "descriptor": { + "studyTitle": "Highly integrated epigenome maps in Arabidopsis - whole genome shotgun bisulfite sequencing", + "studyType": "Other", + "studyAbstract": "Part of a set of highly integrated epigenome maps for Arabidopsis thaliana. Keywords: Illumina high-throughput bisulfite sequencing Overall design: Whole genome shotgun bisulfite sequencing of wildtype Arabidopsis plants (Columbia-0), and met1, drm1 drm2 cmt3, and ros1 dml2 dml3 null mutants using the Illumina Genetic Analyzer.", + "centerProjectName": "GSE10966" + }, + "studyLinks": [ + { + "xrefDb": "pubmed", + "xrefId": "18423832" + } + ], + "studyAttributes": [ + { + "tag": "parent_bioproject", + "value": "PRJNA107265" + } + ] + } +} From 9b126b2fd1c69f653519ea2cb17e4ef66074d67e Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 4 Oct 2021 17:36:57 +0300 Subject: [PATCH 024/336] test tags for list of templates --- tests/integration/run_tests.py | 5 +++++ tests/test_files/study/SRP000539_list.json | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index 45ed1bc11..a7ffc997c 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -1053,6 +1053,11 @@ async def test_crud_users_works(sess): template_ids = await post_template_json(sess, "study", "SRP000539_list.json") assert len(template_ids) == 2, "templates could not be added as batch" + async with sess.get(f"{users_url}/{user_id}") as resp: + LOG.debug(f"Checking that template {template_id} was added") + res = await resp.json() + assert res["templates"][1]["tags"]["submissionType"] == "Form" + # Delete user await delete_user(sess, user_id) # 401 means API is innacessible thus session ended diff --git a/tests/test_files/study/SRP000539_list.json b/tests/test_files/study/SRP000539_list.json index 80f55400a..59b44201f 100644 --- a/tests/test_files/study/SRP000539_list.json +++ b/tests/test_files/study/SRP000539_list.json @@ -36,7 +36,7 @@ } ] }, - "tags": {"Submission": "Form"} + "tags": {"submissionType": "Form"} }, { "template": { From 85d8f6ec19e3eace8cd0dcdc79846d6c4c2485a7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Oct 2021 09:04:17 +0000 Subject: [PATCH 025/336] Bump jsonschema from 4.0.1 to 4.1.0 Bumps [jsonschema](https://github.com/Julian/jsonschema) from 4.0.1 to 4.1.0. - [Release notes](https://github.com/Julian/jsonschema/releases) - [Changelog](https://github.com/Julian/jsonschema/blob/main/CHANGELOG.rst) - [Commits](https://github.com/Julian/jsonschema/compare/v4.0.1...v4.1.0) --- updated-dependencies: - dependency-name: jsonschema dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index cfced2e39..1abb1833f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ aiohttp==3.7.4.post0 cryptography==35.0.0 gunicorn==20.1.0 -jsonschema==4.0.1 +jsonschema==4.1.0 motor==2.5.1 python-dateutil==2.8.2 uvloop==0.16.0 From 461c7789b4c1001466d3e1f87ade1f9e2b04a80f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Oct 2021 09:04:20 +0000 Subject: [PATCH 026/336] Bump ujson from 4.1.0 to 4.2.0 Bumps [ujson](https://github.com/ultrajson/ultrajson) from 4.1.0 to 4.2.0. - [Release notes](https://github.com/ultrajson/ultrajson/releases) - [Commits](https://github.com/ultrajson/ultrajson/compare/4.1.0...4.2.0) --- updated-dependencies: - dependency-name: ujson dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index cfced2e39..c48f1bd31 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,4 @@ python-dateutil==2.8.2 uvloop==0.16.0 xmlschema==1.8.0 Authlib==0.15.4 -ujson==4.1.0 +ujson==4.2.0 From d7ee7dd10a4cd62dbd9db4bf56f91e460e0abb55 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Oct 2021 09:04:26 +0000 Subject: [PATCH 027/336] Bump coverage from 6.0 to 6.0.1 Bumps [coverage](https://github.com/nedbat/coveragepy) from 6.0 to 6.0.1. - [Release notes](https://github.com/nedbat/coveragepy/releases) - [Changelog](https://github.com/nedbat/coveragepy/blob/master/CHANGES.rst) - [Commits](https://github.com/nedbat/coveragepy/compare/6.0...6.0.1) --- updated-dependencies: - dependency-name: coverage dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 34172553d..1f7221df5 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ packages=find_packages(exclude=["tests"]), install_requires=requirements, extras_require={ - "test": ["coverage==6.0", "pytest==6.2.5", "pytest-cov==3.0.0", "tox==3.24.4"], + "test": ["coverage==6.0.1", "pytest==6.2.5", "pytest-cov==3.0.0", "tox==3.24.4"], "docs": ["sphinx >= 1.4", "sphinx_rtd_theme==1.0.0"], }, package_data={ From d0859d741f78b88e205fc73843ad0619933f7bf9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Oct 2021 09:05:33 +0000 Subject: [PATCH 028/336] Bump coverage from 6.0.1 to 6.0.2 Bumps [coverage](https://github.com/nedbat/coveragepy) from 6.0.1 to 6.0.2. - [Release notes](https://github.com/nedbat/coveragepy/releases) - [Changelog](https://github.com/nedbat/coveragepy/blob/master/CHANGES.rst) - [Commits](https://github.com/nedbat/coveragepy/compare/6.0.1...6.0.2) --- updated-dependencies: - dependency-name: coverage dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1f7221df5..969d8ed60 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ packages=find_packages(exclude=["tests"]), install_requires=requirements, extras_require={ - "test": ["coverage==6.0.1", "pytest==6.2.5", "pytest-cov==3.0.0", "tox==3.24.4"], + "test": ["coverage==6.0.2", "pytest==6.2.5", "pytest-cov==3.0.0", "tox==3.24.4"], "docs": ["sphinx >= 1.4", "sphinx_rtd_theme==1.0.0"], }, package_data={ From 838cb61f512e494ae2131359f93e17453a2da295 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Oct 2021 09:03:55 +0000 Subject: [PATCH 029/336] Bump jsonschema from 4.1.0 to 4.1.2 Bumps [jsonschema](https://github.com/Julian/jsonschema) from 4.1.0 to 4.1.2. - [Release notes](https://github.com/Julian/jsonschema/releases) - [Changelog](https://github.com/Julian/jsonschema/blob/main/CHANGELOG.rst) - [Commits](https://github.com/Julian/jsonschema/compare/v4.1.0...v4.1.2) --- updated-dependencies: - dependency-name: jsonschema dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 96059b65e..17bdb7588 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ aiohttp==3.7.4.post0 cryptography==35.0.0 gunicorn==20.1.0 -jsonschema==4.1.0 +jsonschema==4.1.2 motor==2.5.1 python-dateutil==2.8.2 uvloop==0.16.0 From 6bc21fa58fd139f5dbaf6a276e0dd436cd8dacf6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Oct 2021 09:04:00 +0000 Subject: [PATCH 030/336] Bump authlib from 0.15.4 to 0.15.5 Bumps [authlib](https://github.com/lepture/authlib) from 0.15.4 to 0.15.5. - [Release notes](https://github.com/lepture/authlib/releases) - [Changelog](https://github.com/lepture/authlib/blob/v0.15.5/docs/changelog.rst) - [Commits](https://github.com/lepture/authlib/compare/v0.15.4...v0.15.5) --- updated-dependencies: - dependency-name: authlib dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 96059b65e..f9e9101b2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,5 +6,5 @@ motor==2.5.1 python-dateutil==2.8.2 uvloop==0.16.0 xmlschema==1.8.0 -Authlib==0.15.4 +Authlib==0.15.5 ujson==4.2.0 From ac8087f652b6908e03af10611e58d90ad08520fb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Oct 2021 09:04:04 +0000 Subject: [PATCH 031/336] Bump xmlschema from 1.8.0 to 1.8.1 Bumps [xmlschema](https://github.com/sissaschool/xmlschema) from 1.8.0 to 1.8.1. - [Release notes](https://github.com/sissaschool/xmlschema/releases) - [Changelog](https://github.com/sissaschool/xmlschema/blob/master/CHANGELOG.rst) - [Commits](https://github.com/sissaschool/xmlschema/compare/v1.8.0...v1.8.1) --- updated-dependencies: - dependency-name: xmlschema dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 96059b65e..355f3a22e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,6 @@ jsonschema==4.1.0 motor==2.5.1 python-dateutil==2.8.2 uvloop==0.16.0 -xmlschema==1.8.0 +xmlschema==1.8.1 Authlib==0.15.4 ujson==4.2.0 From c297e21d126f088cb082a37cd0ad80fa467c942f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Nov 2021 09:04:56 +0000 Subject: [PATCH 032/336] Bump aiohttp from 3.7.4.post0 to 3.8.0 Bumps [aiohttp](https://github.com/aio-libs/aiohttp) from 3.7.4.post0 to 3.8.0. - [Release notes](https://github.com/aio-libs/aiohttp/releases) - [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst) - [Commits](https://github.com/aio-libs/aiohttp/compare/v3.7.4.post0...v3.8.0) --- updated-dependencies: - dependency-name: aiohttp dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 49c02922f..0dd823510 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -aiohttp==3.7.4.post0 +aiohttp==3.8.0 cryptography==35.0.0 gunicorn==20.1.0 jsonschema==4.1.2 From b62313b65c49118b55cf44223571c286daf1e2cb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Nov 2021 09:05:05 +0000 Subject: [PATCH 033/336] Bump coverage from 6.0.2 to 6.1.1 Bumps [coverage](https://github.com/nedbat/coveragepy) from 6.0.2 to 6.1.1. - [Release notes](https://github.com/nedbat/coveragepy/releases) - [Changelog](https://github.com/nedbat/coveragepy/blob/master/CHANGES.rst) - [Commits](https://github.com/nedbat/coveragepy/compare/6.0.2...6.1.1) --- updated-dependencies: - dependency-name: coverage dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 969d8ed60..3807c41a9 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ packages=find_packages(exclude=["tests"]), install_requires=requirements, extras_require={ - "test": ["coverage==6.0.2", "pytest==6.2.5", "pytest-cov==3.0.0", "tox==3.24.4"], + "test": ["coverage==6.1.1", "pytest==6.2.5", "pytest-cov==3.0.0", "tox==3.24.4"], "docs": ["sphinx >= 1.4", "sphinx_rtd_theme==1.0.0"], }, package_data={ From a7d8bbc21a2e39d3da4e733e42f1af04d50ebd99 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 1 Nov 2021 22:38:18 +0200 Subject: [PATCH 034/336] fix aiohttp 3.8.0 issues with tests --- tests/test_auth.py | 13 ++++---- tests/test_handlers.py | 62 ++++++--------------------------------- tests/test_health.py | 11 +++++-- tests/test_middlewares.py | 4 +-- tests/test_server.py | 5 +--- 5 files changed, 27 insertions(+), 68 deletions(-) diff --git a/tests/test_auth.py b/tests/test_auth.py index d9df000c4..a17f03c75 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -2,7 +2,7 @@ from aiohttp.web_exceptions import HTTPForbidden, HTTPUnauthorized, HTTPBadRequest from metadata_backend.api.auth import AccessHandler from unittest.mock import MagicMock, patch -from aiohttp.test_utils import AioHTTPTestCase, unittest_run_loop +from aiohttp.test_utils import AioHTTPTestCase from metadata_backend.api.middlewares import generate_cookie from metadata_backend.server import init @@ -31,12 +31,17 @@ async def setUpAsync(self): access_config = {} self.patch_access_handler = patch("metadata_backend.api.auth.AccessHandler", **access_config, spec=True) self.MockedAccessHandler = self.patch_access_handler.start() + self.app = await self.get_application() + self.server = await self.get_server(self.app) + self.client = await self.get_client(self.server) + + await self.client.start_server() async def tearDownAsync(self): """Cleanup mocked stuff.""" self.patch_access_handler.stop() + await self.client.close() - @unittest_run_loop async def test_login_with_default_config_values(self): """Test that login raises 404 when the AUTH_URL env variable is not a proper endpoint.""" self.client.app["OIDC_State"] = set() @@ -47,7 +52,6 @@ async def test_login_with_default_config_values(self): # Also check that we have regisitered oidc state self.assertEqual(1, len(self.client.app["OIDC_State"])) - @unittest_run_loop async def test_callback_fails_without_query_params(self): """Test that callback endpoint raises 400 if no params provided in the request.""" response = await self.client.get("/callback") @@ -55,7 +59,6 @@ async def test_callback_fails_without_query_params(self): resp_json = await response.json() self.assertEqual("AAI response is missing mandatory params, received: ", resp_json["detail"]) - @unittest_run_loop async def test_callback_fails_with_wrong_oidc_state(self): """Test that callback endpoint raises 403 when state in the query is not the same as specified in session.""" self.client.app["Session"] = {} @@ -65,14 +68,12 @@ async def test_callback_fails_with_wrong_oidc_state(self): resp_json = await response.json() self.assertEqual(resp_json["detail"], "Bad user session.") - @unittest_run_loop async def test_callback_(self): """Test that callback.""" self.client.app["OIDC_State"] = set(("mo_state_value",)) response = await self.client.get("/callback?state=mo_state_value&code=code") self.assertIn(response.status, (403, 500)) - @unittest_run_loop async def test_logout_works(self): """Test that logout revokes all tokens.""" request = get_request_with_fernet() diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 6db123428..b37786868 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -4,7 +4,7 @@ from unittest.mock import patch from aiohttp import FormData -from aiohttp.test_utils import AioHTTPTestCase, unittest_run_loop +from aiohttp.test_utils import AioHTTPTestCase from metadata_backend.api.middlewares import generate_cookie from .mockups import get_request_with_fernet @@ -29,6 +29,12 @@ async def setUpAsync(self): methods. Also sets up reusable test variables for different test methods. """ + self.app = await self.get_application() + self.server = await self.get_server(self.app) + self.client = await self.get_client(self.server) + + await self.client.start_server() + self.test_ega_string = "EGA123456" self.query_accessionId = ("EDAG3991701442770179",) self.page_num = 3 @@ -118,6 +124,8 @@ async def tearDownAsync(self): self.patch_folderoperator.stop() self.patch_useroperator.stop() + await self.client.close() + def create_submission_data(self, files): """Create request data from pairs of schemas and filenames.""" data = FormData() @@ -202,7 +210,6 @@ async def fake_useroperator_filter_user(self, query, item_type, page, per_page): """Fake read operation to return mocked user.""" return self.test_user[item_type], len(self.test_user[item_type]) - @unittest_run_loop async def test_submit_endpoint_submission_does_not_fail(self): """Test that submission with valid SUBMISSION.xml does not fail.""" files = [("submission", "ERA521986_valid.xml")] @@ -211,7 +218,6 @@ async def test_submit_endpoint_submission_does_not_fail(self): self.assertEqual(response.status, 200) self.assertEqual(response.content_type, "application/json") - @unittest_run_loop async def test_submit_endpoint_fails_without_submission_xml(self): """Test that basic POST submission fails with no submission.xml. @@ -224,7 +230,6 @@ async def test_submit_endpoint_fails_without_submission_xml(self): self.assertEqual(response.status, 400) self.assertIn(failure_text, await response.text()) - @unittest_run_loop async def test_submit_endpoint_fails_with_many_submission_xmls(self): """Test submission fails when there's too many submission.xml -files. @@ -237,7 +242,6 @@ async def test_submit_endpoint_fails_with_many_submission_xmls(self): self.assertEqual(response.status, 400) self.assertIn(failure_text, await response.text()) - @unittest_run_loop async def test_correct_schema_types_are_returned(self): """Test api endpoint for all schema types.""" response = await self.client.get("/schemas") @@ -257,7 +261,6 @@ async def test_correct_schema_types_are_returned(self): for schema_type in schema_types: self.assertIn(schema_type, response_text) - @unittest_run_loop async def test_correct_study_schema_are_returned(self): """Test api endpoint for study schema types.""" response = await self.client.get("/schemas/study") @@ -265,13 +268,11 @@ async def test_correct_study_schema_are_returned(self): self.assertIn("study", response_text) self.assertNotIn("submission", response_text) - @unittest_run_loop async def test_raises_invalid_schema(self): """Test api endpoint for study schema types.""" response = await self.client.get("/schemas/something") self.assertEqual(response.status, 404) - @unittest_run_loop async def test_raises_not_found_schema(self): """Test api endpoint for study schema types.""" response = await self.client.get("/schemas/project") @@ -279,7 +280,6 @@ async def test_raises_not_found_schema(self): resp_json = await response.json() self.assertEqual(resp_json["detail"], "The provided schema type could not be found. (project)") - @unittest_run_loop async def test_submit_object_works(self): """Test that submission is handled, XMLOperator is called.""" files = [("study", "SRP000539.xml")] @@ -289,7 +289,6 @@ async def test_submit_object_works(self): self.assertIn(self.test_ega_string, await response.text()) self.MockedXMLOperator().create_metadata_object.assert_called_once() - @unittest_run_loop async def test_submit_object_works_with_json(self): """Test that JSON submission is handled, operator is called.""" json_req = { @@ -302,7 +301,6 @@ async def test_submit_object_works_with_json(self): self.assertIn(self.test_ega_string, await response.text()) self.MockedOperator().create_metadata_object.assert_called_once() - @unittest_run_loop async def test_submit_object_missing_field_json(self): """Test that JSON has missing property.""" json_req = {"centerName": "GEO", "alias": "GSE10966"} @@ -311,7 +309,6 @@ async def test_submit_object_missing_field_json(self): self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) - @unittest_run_loop async def test_submit_object_bad_field_json(self): """Test that JSON has bad studyType.""" json_req = { @@ -324,7 +321,6 @@ async def test_submit_object_bad_field_json(self): self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) - @unittest_run_loop async def test_post_object_bad_json(self): """Test that post JSON is badly formated.""" json_req = { @@ -337,7 +333,6 @@ async def test_post_object_bad_json(self): self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) - @unittest_run_loop async def test_put_object_bad_json(self): """Test that put JSON is badly formated.""" json_req = { @@ -351,7 +346,6 @@ async def test_put_object_bad_json(self): self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) - @unittest_run_loop async def test_patch_object_bad_json(self): """Test that patch JSON is badly formated.""" json_req = {"centerName": "GEO", "alias": "GSE10966"} @@ -361,7 +355,6 @@ async def test_patch_object_bad_json(self): self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) - @unittest_run_loop async def test_submit_draft_works_with_json(self): """Test that draft JSON submission is handled, operator is called.""" json_req = { @@ -374,7 +367,6 @@ async def test_submit_draft_works_with_json(self): self.assertIn(self.test_ega_string, await response.text()) self.MockedOperator().create_metadata_object.assert_called_once() - @unittest_run_loop async def test_put_draft_works_with_json(self): """Test that draft JSON put method is handled, operator is called.""" json_req = { @@ -388,7 +380,6 @@ async def test_put_draft_works_with_json(self): self.assertIn(self.test_ega_string, await response.text()) self.MockedOperator().replace_metadata_object.assert_called_once() - @unittest_run_loop async def test_put_draft_works_with_xml(self): """Test that put XML submisssion is handled, XMLOperator is called.""" files = [("study", "SRP000539.xml")] @@ -399,7 +390,6 @@ async def test_put_draft_works_with_xml(self): self.assertIn(self.test_ega_string, await response.text()) self.MockedXMLOperator().replace_metadata_object.assert_called_once() - @unittest_run_loop async def test_patch_draft_works_with_json(self): """Test that draft JSON patch method is handled, operator is called.""" json_req = {"centerName": "GEO", "alias": "GSE10966"} @@ -409,7 +399,6 @@ async def test_patch_draft_works_with_json(self): self.assertIn(self.test_ega_string, await response.text()) self.MockedOperator().update_metadata_object.assert_called_once() - @unittest_run_loop async def test_patch_draft_raises_with_xml(self): """Test that patch XML submisssion raises error.""" files = [("study", "SRP000539.xml")] @@ -418,7 +407,6 @@ async def test_patch_draft_raises_with_xml(self): response = await self.client.patch(call, data=data) self.assertEqual(response.status, 415) - @unittest_run_loop async def test_submit_object_fails_with_too_many_files(self): """Test that sending two files to endpoint results failure.""" files = [("study", "SRP000539.xml"), ("study", "SRP000539_copy.xml")] @@ -428,7 +416,6 @@ async def test_submit_object_fails_with_too_many_files(self): self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) - @unittest_run_loop async def test_get_object(self): """Test that accessionId returns correct JSON object.""" url = f"/objects/study/{self.query_accessionId}" @@ -437,7 +424,6 @@ async def test_get_object(self): self.assertEqual(response.content_type, "application/json") self.assertEqual(self.metadata_json, await response.json()) - @unittest_run_loop async def test_get_draft_object(self): """Test that draft accessionId returns correct JSON object.""" url = f"/drafts/study/{self.query_accessionId}" @@ -446,7 +432,6 @@ async def test_get_draft_object(self): self.assertEqual(response.content_type, "application/json") self.assertEqual(self.metadata_json, await response.json()) - @unittest_run_loop async def test_get_object_as_xml(self): """Test that accessionId with XML query returns XML object.""" url = f"/objects/study/{self.query_accessionId}" @@ -455,7 +440,6 @@ async def test_get_object_as_xml(self): self.assertEqual(response.content_type, "text/xml") self.assertEqual(self.metadata_xml, await response.text()) - @unittest_run_loop async def test_query_is_called_and_returns_json_in_correct_format(self): """Test query method calls operator and returns mocked JSON object.""" url = f"/objects/study?studyType=foo&name=bar&page={self.page_num}" f"&per_page={self.page_size}" @@ -475,7 +459,6 @@ async def test_query_is_called_and_returns_json_in_correct_format(self): self.assertEqual(self.page_num, args[2]) self.assertEqual(self.page_size, args[3]) - @unittest_run_loop async def test_delete_is_called(self): """Test query method calls operator and returns status correctly.""" url = "/objects/study/EGA123456" @@ -483,7 +466,6 @@ async def test_delete_is_called(self): self.assertEqual(response.status, 204) self.MockedOperator().delete_metadata_object.assert_called_once() - @unittest_run_loop async def test_query_fails_with_xml_format(self): """Test query method calls operator and returns status correctly.""" url = "/objects/study?studyType=foo&name=bar&format=xml" @@ -492,7 +474,6 @@ async def test_query_fails_with_xml_format(self): self.assertEqual(response.status, 400) self.assertIn("xml-formatted query results are not supported", json_resp["detail"]) - @unittest_run_loop async def test_validation_passes_for_valid_xml(self): """Test validation endpoint for valid xml.""" files = [("study", "SRP000539.xml")] @@ -501,7 +482,6 @@ async def test_validation_passes_for_valid_xml(self): self.assertEqual(response.status, 200) self.assertIn('{"isValid":true}', await response.text()) - @unittest_run_loop async def test_validation_fails_bad_schema(self): """Test validation fails for bad schema and valid xml.""" files = [("fake", "SRP000539.xml")] @@ -509,7 +489,6 @@ async def test_validation_fails_bad_schema(self): response = await self.client.post("/validate", data=data) self.assertEqual(response.status, 404) - @unittest_run_loop async def test_validation_fails_for_invalid_xml_syntax(self): """Test validation endpoint for XML with bad syntax.""" files = [("study", "SRP000539_invalid.xml")] @@ -519,7 +498,6 @@ async def test_validation_fails_for_invalid_xml_syntax(self): self.assertEqual(response.status, 200) self.assertIn("Faulty XML file was given, mismatched tag", resp_dict["detail"]["reason"]) - @unittest_run_loop async def test_validation_fails_for_invalid_xml(self): """Test validation endpoint for invalid xml.""" files = [("study", "SRP000539_invalid2.xml")] @@ -529,7 +507,6 @@ async def test_validation_fails_for_invalid_xml(self): self.assertEqual(response.status, 200) self.assertIn("value must be one of", resp_dict["detail"]["reason"]) - @unittest_run_loop async def test_validation_fails_with_too_many_files(self): """Test validation endpoint for too many files.""" files = [("submission", "ERA521986_valid.xml"), ("submission", "ERA521986_valid2.xml")] @@ -539,7 +516,6 @@ async def test_validation_fails_with_too_many_files(self): self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) - @unittest_run_loop async def test_operations_fail_for_wrong_schema_type(self): """Test 404 error is raised if incorrect schema name is given.""" get_resp = await self.client.get("/objects/bad_scehma_name/some_id") @@ -567,7 +543,6 @@ async def test_operations_fail_for_wrong_schema_type(self): json_get_resp = await get_resp.json() self.assertIn("Specified schema", json_get_resp["detail"]) - @unittest_run_loop async def test_query_with_invalid_pagination_params(self): """Test that 400s are raised correctly with pagination.""" get_resp = await self.client.get("/objects/study?page=2?title=joo") @@ -577,7 +552,6 @@ async def test_query_with_invalid_pagination_params(self): get_resp = await self.client.get("/objects/study?per_page=0") self.assertEqual(get_resp.status, 400) - @unittest_run_loop async def test_folder_creation_works(self): """Test that folder is created and folder ID returned.""" json_req = {"name": "test", "description": "test folder"} @@ -587,7 +561,6 @@ async def test_folder_creation_works(self): self.assertEqual(response.status, 201) self.assertEqual(json_resp["folderId"], self.folder_id) - @unittest_run_loop async def test_folder_creation_with_missing_data_fails(self): """Test that folder creation fails when missing data in request.""" json_req = {"description": "test folder"} @@ -596,7 +569,6 @@ async def test_folder_creation_with_missing_data_fails(self): self.assertEqual(response.status, 400) self.assertIn("'name' is a required property", json_resp["detail"]) - @unittest_run_loop async def test_folder_creation_with_empty_body_fails(self): """Test that folder creation fails when no data in request.""" response = await self.client.post("/folders") @@ -604,7 +576,6 @@ async def test_folder_creation_with_empty_body_fails(self): self.assertEqual(response.status, 400) self.assertIn("JSON is not correctly formatted.", json_resp["detail"]) - @unittest_run_loop async def test_get_folders_with_1_folder(self): """Test get_folders() endpoint returns list with 1 folder.""" self.MockedFolderOperator().query_folders.return_value = (self.test_folder, 1) @@ -622,7 +593,6 @@ async def test_get_folders_with_1_folder(self): } self.assertEqual(await response.json(), result) - @unittest_run_loop async def test_get_folders_with_no_folders(self): """Test get_folders() endpoint returns empty list.""" self.MockedFolderOperator().query_folders.return_value = ([], 0) @@ -640,7 +610,6 @@ async def test_get_folders_with_no_folders(self): } self.assertEqual(await response.json(), result) - @unittest_run_loop async def test_get_folders_with_bad_params(self): """Test get_folders() with faulty pagination parameters.""" response = await self.client.get("/folders?page=ayylmao") @@ -658,7 +627,6 @@ async def test_get_folders_with_bad_params(self): resp = await response.json() self.assertEqual(resp["detail"], "'published' parameter must be either 'true' or 'false'") - @unittest_run_loop async def test_get_folder_works(self): """Test folder is returned when correct folder id is given.""" response = await self.client.get("/folders/FOL12345678") @@ -667,7 +635,6 @@ async def test_get_folder_works(self): json_resp = await response.json() self.assertEqual(self.test_folder, json_resp) - @unittest_run_loop async def test_update_folder_fails_with_wrong_key(self): """Test that folder does not update when wrong keys are provided.""" data = [{"op": "add", "path": "/objects"}] @@ -677,7 +644,6 @@ async def test_update_folder_fails_with_wrong_key(self): reason = "Request contains '/objects' key that cannot be " "updated to folders." self.assertEqual(reason, json_resp["detail"]) - @unittest_run_loop async def test_update_folder_passes(self): """Test that folder would update with correct keys.""" self.MockedFolderOperator().update_folder.return_value = self.folder_id @@ -688,7 +654,6 @@ async def test_update_folder_passes(self): json_resp = await response.json() self.assertEqual(json_resp["folderId"], self.folder_id) - @unittest_run_loop async def test_folder_is_published(self): """Test that folder would be published.""" self.MockedFolderOperator().update_folder.return_value = self.folder_id @@ -698,7 +663,6 @@ async def test_folder_is_published(self): json_resp = await response.json() self.assertEqual(json_resp["folderId"], self.folder_id) - @unittest_run_loop async def test_folder_deletion_is_called(self): """Test that folder would be deleted.""" self.MockedFolderOperator().read_folder.return_value = self.test_folder @@ -707,7 +671,6 @@ async def test_folder_deletion_is_called(self): self.MockedFolderOperator().delete_folder.assert_called_once() self.assertEqual(response.status, 204) - @unittest_run_loop async def test_get_user_works(self): """Test user object is returned when correct user id is given.""" response = await self.client.get("/users/current") @@ -716,7 +679,6 @@ async def test_get_user_works(self): json_resp = await response.json() self.assertEqual(self.test_user, json_resp) - @unittest_run_loop async def test_get_user_drafts_with_no_drafts(self): """Test getting user drafts when user has no drafts.""" response = await self.client.get("/users/current?items=templates") @@ -734,7 +696,6 @@ async def test_get_user_drafts_with_no_drafts(self): } self.assertEqual(json_resp, result) - @unittest_run_loop async def test_get_user_templates_with_1_template(self): """Test getting user templates when user has 1 draft.""" user = self.test_user @@ -755,7 +716,6 @@ async def test_get_user_templates_with_1_template(self): } self.assertEqual(json_resp, result) - @unittest_run_loop async def test_get_user_folder_list(self): """Test get user with folders url returns a folder ID.""" self.MockedUserOperator().filter_user.return_value = (self.test_user["folders"], 1) @@ -774,7 +734,6 @@ async def test_get_user_folder_list(self): } self.assertEqual(json_resp, result) - @unittest_run_loop async def test_get_user_items_with_bad_param(self): """Test that error is raised if items parameter in query is not templates or folders.""" response = await self.client.get("/users/current?items=wrong_thing") @@ -784,7 +743,6 @@ async def test_get_user_items_with_bad_param(self): json_resp["detail"], "wrong_thing is a faulty item parameter. Should be either folders or templates" ) - @unittest_run_loop async def test_user_deletion_is_called(self): """Test that user object would be deleted.""" self.MockedUserOperator().read_user.return_value = self.test_user @@ -793,7 +751,6 @@ async def test_user_deletion_is_called(self): self.MockedUserOperator().read_user.assert_called_once() self.MockedUserOperator().delete_user.assert_called_once() - @unittest_run_loop async def test_update_user_fails_with_wrong_key(self): """Test that user object does not update when forbidden keys are provided.""" data = [{"op": "add", "path": "/userId"}] @@ -803,7 +760,6 @@ async def test_update_user_fails_with_wrong_key(self): reason = "Request contains '/userId' key that cannot be updated to user object" self.assertEqual(reason, json_resp["detail"]) - @unittest_run_loop async def test_update_user_passes(self): """Test that user object would update with correct keys.""" self.MockedUserOperator().update_user.return_value = self.user_id diff --git a/tests/test_health.py b/tests/test_health.py index db20cbdcc..7002562f2 100644 --- a/tests/test_health.py +++ b/tests/test_health.py @@ -2,7 +2,7 @@ from unittest.mock import patch -from aiohttp.test_utils import AioHTTPTestCase, unittest_run_loop +from aiohttp.test_utils import AioHTTPTestCase from metadata_backend.server import init @@ -24,15 +24,22 @@ async def setUpAsync(self): self.patch_motorclient = patch(class_motorclient, **motorclient_config, spec=True) self.MockedMotorClient = self.patch_motorclient.start() + self.app = await self.get_application() + self.server = await self.get_server(self.app) + self.client = await self.get_client(self.server) + + await self.client.start_server() + async def tearDownAsync(self): """Cleanup mocked stuff.""" self.patch_motorclient.stop() + await self.client.close() + async def fake_asynciomotorclient_server_info(self): """Fake server info method for a motor client.""" return True - @unittest_run_loop async def test_health_check_is_down(self): """Test that the health check returns a partially down status because a mongo db is not connected.""" response = await self.client.get("/health") diff --git a/tests/test_middlewares.py b/tests/test_middlewares.py index ba83a8ee2..b56c521a5 100644 --- a/tests/test_middlewares.py +++ b/tests/test_middlewares.py @@ -2,7 +2,7 @@ import unittest from aiohttp import FormData, web -from aiohttp.test_utils import AioHTTPTestCase, unittest_run_loop +from aiohttp.test_utils import AioHTTPTestCase from metadata_backend.server import init from metadata_backend.api.middlewares import generate_cookie, decrypt_cookie, _check_csrf @@ -17,7 +17,6 @@ async def get_application(self): """Retrieve web Application for test.""" return await init() - @unittest_run_loop async def test_bad_HTTP_request_converts_into_json_response(self): """Test that middleware reformats 400 error with problem details.""" data = _create_improper_data() @@ -29,7 +28,6 @@ async def test_bad_HTTP_request_converts_into_json_response(self): self.assertIn("There must be a submission.xml file in submission.", resp_dict["detail"]) self.assertIn("/submit", resp_dict["instance"]) - @unittest_run_loop async def test_bad_url_returns_json_response(self): """Test that unrouted api url returns a 404 in JSON format.""" response = await self.client.get("/objects/swagadagamaster") diff --git a/tests/test_server.py b/tests/test_server.py index 603dfcfbd..5062927ca 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -6,7 +6,7 @@ from unittest.mock import patch from aiohttp import web -from aiohttp.test_utils import AioHTTPTestCase, unittest_run_loop +from aiohttp.test_utils import AioHTTPTestCase from metadata_backend.server import init, main @@ -33,19 +33,16 @@ async def get_application(self): """Retrieve web Application for test.""" return await init() - @unittest_run_loop async def test_init(self): """Test everything works in init().""" server = await self.get_application() self.assertIs(type(server), web.Application) - @unittest_run_loop async def test_api_routes_are_set(self): """Test correct amount of api (no frontend) routes is set.""" server = await self.get_application() self.assertIs(len(server.router.resources()), 18) - @unittest_run_loop async def test_frontend_routes_are_set(self): """Test correct routes are set when frontend folder is exists.""" frontend_static = "metadata_backend.server.frontend_static_files" From 2d0f715e57e7b130dcad1ea5b7c851c23cc286d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Fri, 27 Aug 2021 14:08:44 +0300 Subject: [PATCH 035/336] create doi helper class --- metadata_backend/helpers/doi.py | 51 +++++++++++++++++++++++++++++++++ requirements.txt | 3 +- 2 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 metadata_backend/helpers/doi.py diff --git a/metadata_backend/helpers/doi.py b/metadata_backend/helpers/doi.py new file mode 100644 index 000000000..dbbe88b59 --- /dev/null +++ b/metadata_backend/helpers/doi.py @@ -0,0 +1,51 @@ +"""Tool for registering DOI at DataCite. + +The DOI handler from SDA orchestration was used as reference: +https://github.com/neicnordic/sda-orchestration/blob/master/sda_orchestrator/utils/id_ops.py +""" +import requests +from typing import Dict +from uuid import uuid4 + +from aiohttp import web + +from ..helpers.logger import LOG +from ..conf import conf + + +class DOIHandler: + """DOI registration methods.""" + + def __init__(self) -> None: + """Get DOI credentials from config.""" + self.doi_api = conf.doi_api + self.doi_prefix = conf.doi_prefix + self.doi_user = conf.doi_user + self.doi_key = conf.doi_key + self.doi_url = f"{conf.datacite_url.rstrip('/')}/{self.doi_prefix}" + + async def create_draft_doi(self) -> Dict: + """Generate random suffix and POST request a draft DOI to DataCite DOI API.""" + suffix = uuid4().hex[:10] + doi_suffix = f"{suffix[:4]}-{suffix[4:]}" + headers = {"Content-Type": "application/json"} + doi_payload = {"data": {"type": "dois", "attributes": {"doi": f"{self.doi_prefix}/{doi_suffix}"}}} + + response = requests.post(self.doi_api, data=doi_payload, headers=headers, auth=(self.doi_user, self.doi_key)) + + if response.status_code == 201: + draft_resp = response.json() + full_doi = draft_resp["data"]["attributes"]["doi"] + returned_suffix = draft_resp["data"]["attributes"]["suffix"] + LOG.debug(f"DOI draft created and response was: {draft_resp}") + LOG.info(f"DOI draft created with doi: {full_doi}.") + doi_data = { + "fullDOI": full_doi, + "dataset": f"{self.doi_url}/{returned_suffix.lower()}", + } + else: + reason = f"DOI API draft creation request failed with code: {response.status_code}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) # 400 is probably not the correct error for this + + return doi_data diff --git a/requirements.txt b/requirements.txt index 0dd823510..59f538922 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ python-dateutil==2.8.2 uvloop==0.16.0 xmlschema==1.8.1 Authlib==0.15.5 -ujson==4.2.0 \ No newline at end of file +ujson==4.2.0 +requests==2.26.0 \ No newline at end of file From f78761c879c2261936528bc6833026ec1f9df8f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Fri, 27 Aug 2021 14:08:54 +0300 Subject: [PATCH 036/336] add config variables --- metadata_backend/conf/conf.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/metadata_backend/conf/conf.py b/metadata_backend/conf/conf.py index 6cecb7f96..7a33fdeab 100644 --- a/metadata_backend/conf/conf.py +++ b/metadata_backend/conf/conf.py @@ -156,3 +156,12 @@ def create_db_client() -> AsyncIOMotorClient: "jwk_server": f'{os.getenv("JWK_URL", "")}', "auth_referer": f'{os.getenv("AUTH_REFERER", "")}', } + + +# 6) Set the base url for DataCite REST API + +doi_api = os.getenv("DOI_API", "") +doi_prefix = os.getenv("DOI_PREFIX", "") +doi_user = os.getenv("DOI_USER", "") +doi_key = os.getenv("DOI_KEY", "") +datacite_url = os.getenv("DATACITE_URL", "https://doi.org") From 276782a82381893b3b7a682c0e63691516f5bf90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Fri, 27 Aug 2021 14:21:35 +0300 Subject: [PATCH 037/336] add unit test case --- tests/test_doi.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 tests/test_doi.py diff --git a/tests/test_doi.py b/tests/test_doi.py new file mode 100644 index 000000000..a69be6470 --- /dev/null +++ b/tests/test_doi.py @@ -0,0 +1,45 @@ +"""Test the DOI registering tool.""" +import unittest +from unittest.mock import patch + +from aiohttp import web + +from metadata_backend.helpers.doi import DOIHandler + + +class DOITestCase(unittest.TestCase): + """DOI registering class test case.""" + + def setUp(self): + """Set class for tests.""" + self.doi = DOIHandler() + + async def test_400_is_raised(self): + """Test 400 is raised when request to DataCite supposedly fails.""" + with patch("metadata_backend.helpers.doi.requests.post") as mocked_post: + mocked_post.return_value.status_code = 400 + with self.assertRaises(web.HTTPBadRequest) as err: + await self.doi.create_draft_doi() + self.assertEqual(str(err.exception), "DOI API draft creation request failed with code: 400") + + async def test_create_doi_draft_works(self): + """Test DOI info is returned correctly when request succeeds.""" + with patch("metadata_backend.helpers.doi.requests.post") as mocked_post: + mocked_post.return_value.status_code = 201 + mocked_post.return_value.json.return_value = { + "data": { + "id": "10.xxxx/yyyyy", + "type": "dois", + "attributes": { + "doi": "10.xxxx/yyyyy", + "prefix": "10.xxxx", + "suffix": "yyyyy", + "identifiers": [{"identifier": "https://doi.org/10.xxxx/yyyyy", "identifierType": "DOI"}], + }, + } + } + + output = await self.doi.create_draft_doi() + assert mocked_post.called + result = {"fullDOI": "10.xxxx/yyyyy", "dataset": "https://doi.org/10.xxxx/yyyyy"} + self.assertEqual(output, result) From cb6b6b88296140dea7c31aac5277bcd9986760d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Mon, 30 Aug 2021 09:26:56 +0300 Subject: [PATCH 038/336] add tox dep to fix mypy error --- tox.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/tox.ini b/tox.ini index e372f4aee..e9fc7f3b5 100644 --- a/tox.ini +++ b/tox.ini @@ -31,6 +31,7 @@ deps = mypy types-python-dateutil types-ujson + types-requests # Mypy fails if 3rd party library doesn't have type hints configured. # Alternative to ignoring imports would be to write custom stub files, which # could be done at some point. From 5e0e8aa0500898639eed3ed6cf87c061c1e575fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Tue, 31 Aug 2021 10:20:20 +0300 Subject: [PATCH 039/336] add draft doi creation and alter previous unit tests --- metadata_backend/api/handlers.py | 11 ++++++++++- metadata_backend/helpers/doi.py | 1 - tests/test_handlers.py | 9 +++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/metadata_backend/api/handlers.py b/metadata_backend/api/handlers.py index 2f68b18d8..251b81506 100644 --- a/metadata_backend/api/handlers.py +++ b/metadata_backend/api/handlers.py @@ -23,6 +23,7 @@ from ..helpers.parser import XMLToJSONParser from ..helpers.schema_loader import JSONSchemaLoader, SchemaNotFoundException, XMLSchemaLoader from ..helpers.validator import JSONValidator, XMLValidator +from ..helpers.doi import DOIHandler from .operators import FolderOperator, Operator, XMLOperator, UserOperator from ..conf.conf import aai_config @@ -740,7 +741,15 @@ async def post_folder(self, req: Request) -> Response: :returns: JSON response containing folder ID for submitted folder """ db_client = req.app["db_client"] - content = await self._get_data(req) + content = await self._get_data(req) # Required properties from the request + + # Create draft DOI and add extra info to content + doi = DOIHandler() + doi_data = await doi.create_draft_doi() + content["extraInfo"] = {} + content["extraInfo"]["identifier"] = {"identifierType": "DOI", "doi": doi_data["fullDOI"]} + content["extraInfo"]["url"] = doi_data["dataset"] + JSONValidator(content, "folders").validate operator = FolderOperator(db_client) diff --git a/metadata_backend/helpers/doi.py b/metadata_backend/helpers/doi.py index dbbe88b59..a86989ac8 100644 --- a/metadata_backend/helpers/doi.py +++ b/metadata_backend/helpers/doi.py @@ -37,7 +37,6 @@ async def create_draft_doi(self) -> Dict: draft_resp = response.json() full_doi = draft_resp["data"]["attributes"]["doi"] returned_suffix = draft_resp["data"]["attributes"]["suffix"] - LOG.debug(f"DOI draft created and response was: {draft_resp}") LOG.info(f"DOI draft created with doi: {full_doi}.") doi_data = { "fullDOI": full_doi, diff --git a/tests/test_handlers.py b/tests/test_handlers.py index b37786868..f6cf3338d 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -66,12 +66,14 @@ async def setUpAsync(self): "templates": [], "folders": ["FOL12345678"], } + self.test_draft_doi = {"fullDOI": "10.xxxx/yyyyy", "dataset": "https://doi.org/10.xxxx/yyyyy"} class_parser = "metadata_backend.api.handlers.XMLToJSONParser" class_operator = "metadata_backend.api.handlers.Operator" class_xmloperator = "metadata_backend.api.handlers.XMLOperator" class_folderoperator = "metadata_backend.api.handlers.FolderOperator" class_useroperator = "metadata_backend.api.handlers.UserOperator" + class_doihandler = "metadata_backend.api.handlers.DOIHandler" operator_config = { "read_metadata_object.side_effect": self.fake_operator_read_metadata_object, "query_metadata_database.side_effect": self.fake_operator_query_metadata_object, @@ -103,11 +105,13 @@ async def setUpAsync(self): self.patch_xmloperator = patch(class_xmloperator, **xmloperator_config, spec=True) self.patch_folderoperator = patch(class_folderoperator, **folderoperator_config, spec=True) self.patch_useroperator = patch(class_useroperator, **useroperator_config, spec=True) + self.patch_doihandler = patch(class_doihandler, spec=True) self.MockedParser = self.patch_parser.start() self.MockedOperator = self.patch_operator.start() self.MockedXMLOperator = self.patch_xmloperator.start() self.MockedFolderOperator = self.patch_folderoperator.start() self.MockedUserOperator = self.patch_useroperator.start() + self.MockedDoiHandler = self.patch_doihandler.start() # Set up authentication request = get_request_with_fernet() @@ -123,6 +127,7 @@ async def tearDownAsync(self): self.patch_xmloperator.stop() self.patch_folderoperator.stop() self.patch_useroperator.stop() + self.patch_doihandler.stop() await self.client.close() @@ -555,8 +560,10 @@ async def test_query_with_invalid_pagination_params(self): async def test_folder_creation_works(self): """Test that folder is created and folder ID returned.""" json_req = {"name": "test", "description": "test folder"} + self.MockedDoiHandler().create_draft_doi.return_value = self.test_draft_doi response = await self.client.post("/folders", json=json_req) json_resp = await response.json() + self.MockedDoiHandler().create_draft_doi.assert_called_once() self.MockedFolderOperator().create_folder.assert_called_once() self.assertEqual(response.status, 201) self.assertEqual(json_resp["folderId"], self.folder_id) @@ -564,8 +571,10 @@ async def test_folder_creation_works(self): async def test_folder_creation_with_missing_data_fails(self): """Test that folder creation fails when missing data in request.""" json_req = {"description": "test folder"} + self.MockedDoiHandler().create_draft_doi.return_value = self.test_draft_doi response = await self.client.post("/folders", json=json_req) json_resp = await response.json() + self.MockedDoiHandler().create_draft_doi.assert_called_once() self.assertEqual(response.status, 400) self.assertIn("'name' is a required property", json_resp["detail"]) From 3baa319e7207a430efddbab52981eae9c959c8fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Fri, 3 Sep 2021 06:22:28 +0300 Subject: [PATCH 040/336] add publisher and resourceType info --- metadata_backend/api/handlers.py | 6 ++++-- metadata_backend/conf/conf.py | 3 ++- metadata_backend/helpers/schemas/folders.json | 5 ++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/metadata_backend/api/handlers.py b/metadata_backend/api/handlers.py index 251b81506..cba51475f 100644 --- a/metadata_backend/api/handlers.py +++ b/metadata_backend/api/handlers.py @@ -26,7 +26,7 @@ from ..helpers.doi import DOIHandler from .operators import FolderOperator, Operator, XMLOperator, UserOperator -from ..conf.conf import aai_config +from ..conf.conf import aai_config, publisher class RESTAPIHandler: @@ -741,7 +741,7 @@ async def post_folder(self, req: Request) -> Response: :returns: JSON response containing folder ID for submitted folder """ db_client = req.app["db_client"] - content = await self._get_data(req) # Required properties from the request + content = await self._get_data(req) # Create draft DOI and add extra info to content doi = DOIHandler() @@ -749,6 +749,8 @@ async def post_folder(self, req: Request) -> Response: content["extraInfo"] = {} content["extraInfo"]["identifier"] = {"identifierType": "DOI", "doi": doi_data["fullDOI"]} content["extraInfo"]["url"] = doi_data["dataset"] + content["extraInfo"]["resourceType"] = {"resourceTypeGeneral": "Dataset"} + content["extraInfo"]["publisher"] = publisher JSONValidator(content, "folders").validate diff --git a/metadata_backend/conf/conf.py b/metadata_backend/conf/conf.py index 7a33fdeab..a5d521364 100644 --- a/metadata_backend/conf/conf.py +++ b/metadata_backend/conf/conf.py @@ -158,10 +158,11 @@ def create_db_client() -> AsyncIOMotorClient: } -# 6) Set the base url for DataCite REST API +# 6) Set the DataCite REST API values doi_api = os.getenv("DOI_API", "") doi_prefix = os.getenv("DOI_PREFIX", "") doi_user = os.getenv("DOI_USER", "") doi_key = os.getenv("DOI_KEY", "") datacite_url = os.getenv("DATACITE_URL", "https://doi.org") +publisher = "CSC - IT Center for Science" diff --git a/metadata_backend/helpers/schemas/folders.json b/metadata_backend/helpers/schemas/folders.json index b1b4a9243..844d68797 100644 --- a/metadata_backend/helpers/schemas/folders.json +++ b/metadata_backend/helpers/schemas/folders.json @@ -508,13 +508,12 @@ "type": "object", "title": "Type info of the resource", "required": [ - "type", "resourceTypeGeneral" ], "properties": { - "type": { + "resourceType": { "type": "string", - "title": "Name of resource type" + "title": "Specified resource type if general type is Other" }, "resourceTypeGeneral": { "type": "string", From a672a1da97ef47aec0bfdde3e544d1b1c133bbf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Fri, 3 Sep 2021 07:21:20 +0300 Subject: [PATCH 041/336] move doi creation to publish_folder and add datePublished --- metadata_backend/api/handlers.py | 21 +++++++++++-------- metadata_backend/helpers/schemas/folders.json | 4 ++++ tests/test_handlers.py | 10 ++++----- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/metadata_backend/api/handlers.py b/metadata_backend/api/handlers.py index cba51475f..f3b6aedad 100644 --- a/metadata_backend/api/handlers.py +++ b/metadata_backend/api/handlers.py @@ -7,6 +7,7 @@ from math import ceil from pathlib import Path from typing import Dict, List, Tuple, Union, cast, AsyncGenerator, Any +from datetime import datetime from aiohttp import BodyPartReader, web from aiohttp.web import Request, Response @@ -743,15 +744,6 @@ async def post_folder(self, req: Request) -> Response: db_client = req.app["db_client"] content = await self._get_data(req) - # Create draft DOI and add extra info to content - doi = DOIHandler() - doi_data = await doi.create_draft_doi() - content["extraInfo"] = {} - content["extraInfo"]["identifier"] = {"identifierType": "DOI", "doi": doi_data["fullDOI"]} - content["extraInfo"]["url"] = doi_data["dataset"] - content["extraInfo"]["resourceType"] = {"resourceTypeGeneral": "Dataset"} - content["extraInfo"]["publisher"] = publisher - JSONValidator(content, "folders").validate operator = FolderOperator(db_client) @@ -840,6 +832,12 @@ async def publish_folder(self, req: Request) -> Response: obj_ops = Operator(db_client) + # Create draft DOI and delete draft objects from the folder + doi = DOIHandler() + doi_data = await doi.create_draft_doi() + identifier = {"identifierType": "DOI", "doi": doi_data["fullDOI"]} + curr_date = datetime.utcnow() + for obj in folder["drafts"]: await obj_ops.delete_metadata_object(obj["schema"], obj["accessionId"]) @@ -847,6 +845,11 @@ async def publish_folder(self, req: Request) -> Response: patch = [ {"op": "replace", "path": "/published", "value": True}, {"op": "replace", "path": "/drafts", "value": []}, + {"op": "add", "path": "/datePublished", "value": curr_date}, + {"op": "add", "path": "/extraInfo/identifier", "value": identifier}, + {"op": "add", "path": "/extraInfo/url", "value": doi_data["dataset"]}, + {"op": "add", "path": "/extraInfo/resourceType", "value": {"resourceTypeGeneral": "Dataset"}}, + {"op": "add", "path": "/extraInfo/resourceType", "value": publisher}, ] new_folder = await operator.update_folder(folder_id, patch) diff --git a/metadata_backend/helpers/schemas/folders.json b/metadata_backend/helpers/schemas/folders.json index 844d68797..3c2c583ca 100644 --- a/metadata_backend/helpers/schemas/folders.json +++ b/metadata_backend/helpers/schemas/folders.json @@ -22,6 +22,10 @@ "type": "integer", "title": "Unix time stamp of creation, used for indexing" }, + "datePublished": { + "type": "integer", + "title": "Unix time stamp of publishing, used for indexing" + }, "published": { "type": "boolean", "title": "Published Folder" diff --git a/tests/test_handlers.py b/tests/test_handlers.py index f6cf3338d..b7d62a419 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -560,10 +560,8 @@ async def test_query_with_invalid_pagination_params(self): async def test_folder_creation_works(self): """Test that folder is created and folder ID returned.""" json_req = {"name": "test", "description": "test folder"} - self.MockedDoiHandler().create_draft_doi.return_value = self.test_draft_doi response = await self.client.post("/folders", json=json_req) json_resp = await response.json() - self.MockedDoiHandler().create_draft_doi.assert_called_once() self.MockedFolderOperator().create_folder.assert_called_once() self.assertEqual(response.status, 201) self.assertEqual(json_resp["folderId"], self.folder_id) @@ -571,10 +569,8 @@ async def test_folder_creation_works(self): async def test_folder_creation_with_missing_data_fails(self): """Test that folder creation fails when missing data in request.""" json_req = {"description": "test folder"} - self.MockedDoiHandler().create_draft_doi.return_value = self.test_draft_doi response = await self.client.post("/folders", json=json_req) json_resp = await response.json() - self.MockedDoiHandler().create_draft_doi.assert_called_once() self.assertEqual(response.status, 400) self.assertIn("'name' is a required property", json_resp["detail"]) @@ -650,7 +646,7 @@ async def test_update_folder_fails_with_wrong_key(self): response = await self.client.patch("/folders/FOL12345678", json=data) self.assertEqual(response.status, 400) json_resp = await response.json() - reason = "Request contains '/objects' key that cannot be " "updated to folders." + reason = "Request contains '/objects' key that cannot be updated to folders." self.assertEqual(reason, json_resp["detail"]) async def test_update_folder_passes(self): @@ -664,9 +660,11 @@ async def test_update_folder_passes(self): self.assertEqual(json_resp["folderId"], self.folder_id) async def test_folder_is_published(self): - """Test that folder would be published.""" + """Test that folder would be published and DOI would be added.""" + self.MockedDoiHandler().create_draft_doi.return_value = self.test_draft_doi self.MockedFolderOperator().update_folder.return_value = self.folder_id response = await self.client.patch("/publish/FOL12345678") + self.MockedDoiHandler().create_draft_doi.assert_called_once() self.MockedFolderOperator().update_folder.assert_called_once() self.assertEqual(response.status, 200) json_resp = await response.json() From f37a964ac8596ec6641d2bec60e8b8ccf56677cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Fri, 3 Sep 2021 08:31:02 +0300 Subject: [PATCH 042/336] small fixes and tests --- metadata_backend/api/handlers.py | 5 ++--- metadata_backend/helpers/doi.py | 2 +- tests/integration/run_tests.py | 4 ++++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/metadata_backend/api/handlers.py b/metadata_backend/api/handlers.py index f3b6aedad..8ce851f4c 100644 --- a/metadata_backend/api/handlers.py +++ b/metadata_backend/api/handlers.py @@ -7,7 +7,7 @@ from math import ceil from pathlib import Path from typing import Dict, List, Tuple, Union, cast, AsyncGenerator, Any -from datetime import datetime +from time import time from aiohttp import BodyPartReader, web from aiohttp.web import Request, Response @@ -836,7 +836,6 @@ async def publish_folder(self, req: Request) -> Response: doi = DOIHandler() doi_data = await doi.create_draft_doi() identifier = {"identifierType": "DOI", "doi": doi_data["fullDOI"]} - curr_date = datetime.utcnow() for obj in folder["drafts"]: await obj_ops.delete_metadata_object(obj["schema"], obj["accessionId"]) @@ -845,7 +844,7 @@ async def publish_folder(self, req: Request) -> Response: patch = [ {"op": "replace", "path": "/published", "value": True}, {"op": "replace", "path": "/drafts", "value": []}, - {"op": "add", "path": "/datePublished", "value": curr_date}, + {"op": "add", "path": "/datePublished", "value": int(time())}, {"op": "add", "path": "/extraInfo/identifier", "value": identifier}, {"op": "add", "path": "/extraInfo/url", "value": doi_data["dataset"]}, {"op": "add", "path": "/extraInfo/resourceType", "value": {"resourceTypeGeneral": "Dataset"}}, diff --git a/metadata_backend/helpers/doi.py b/metadata_backend/helpers/doi.py index a86989ac8..c068713d1 100644 --- a/metadata_backend/helpers/doi.py +++ b/metadata_backend/helpers/doi.py @@ -31,7 +31,7 @@ async def create_draft_doi(self) -> Dict: headers = {"Content-Type": "application/json"} doi_payload = {"data": {"type": "dois", "attributes": {"doi": f"{self.doi_prefix}/{doi_suffix}"}}} - response = requests.post(self.doi_api, data=doi_payload, headers=headers, auth=(self.doi_user, self.doi_key)) + response = requests.post(self.doi_api, json=doi_payload, headers=headers, auth=(self.doi_user, self.doi_key)) if response.status_code == 201: draft_resp = response.json() diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index a7ffc997c..a010f1a2a 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -717,6 +717,7 @@ async def test_crud_folders_works(sess): res = await resp.json() assert res["folderId"] == folder_id, "expected folder id does not match" assert res["published"] is False, "folder is published, expected False" + assert "datePublished" not in res.keys() assert res["drafts"] == [{"accessionId": draft_id, "schema": "draft-sample"}], "folder drafts content mismatch" assert res["metadataObjects"] == [ {"accessionId": accession_id, "schema": "sample"} @@ -728,10 +729,13 @@ async def test_crud_folders_works(sess): await get_draft(sess, "sample", draft_id, 404) # checking the draft was deleted after publication async with sess.get(f"{folders_url}/{folder_id}") as resp: + LOG.debug(resp.status) LOG.debug(f"Checking that folder {folder_id} was patched") res = await resp.json() assert res["folderId"] == folder_id, "expected folder id does not match" assert res["published"] is True, "folder is not published, expected True" + assert "datePublished" in res.keys() + assert "extraInfo" in res.keys() assert res["drafts"] == [], "there are drafts in folder, expected empty" assert res["metadataObjects"] == [ {"accessionId": accession_id, "schema": "sample"} From 84ac6040e637140a63e37b1bcf02a71777945b9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Fri, 3 Sep 2021 13:48:20 +0300 Subject: [PATCH 043/336] change the api call request method --- metadata_backend/helpers/doi.py | 34 ++++++++++++++++----------------- tests/test_doi.py | 6 +++--- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/metadata_backend/helpers/doi.py b/metadata_backend/helpers/doi.py index c068713d1..769a47ab0 100644 --- a/metadata_backend/helpers/doi.py +++ b/metadata_backend/helpers/doi.py @@ -3,11 +3,10 @@ The DOI handler from SDA orchestration was used as reference: https://github.com/neicnordic/sda-orchestration/blob/master/sda_orchestrator/utils/id_ops.py """ -import requests from typing import Dict from uuid import uuid4 -from aiohttp import web +from aiohttp import web, ClientSession, BasicAuth from ..helpers.logger import LOG from ..conf import conf @@ -31,20 +30,21 @@ async def create_draft_doi(self) -> Dict: headers = {"Content-Type": "application/json"} doi_payload = {"data": {"type": "dois", "attributes": {"doi": f"{self.doi_prefix}/{doi_suffix}"}}} - response = requests.post(self.doi_api, json=doi_payload, headers=headers, auth=(self.doi_user, self.doi_key)) - - if response.status_code == 201: - draft_resp = response.json() - full_doi = draft_resp["data"]["attributes"]["doi"] - returned_suffix = draft_resp["data"]["attributes"]["suffix"] - LOG.info(f"DOI draft created with doi: {full_doi}.") - doi_data = { - "fullDOI": full_doi, - "dataset": f"{self.doi_url}/{returned_suffix.lower()}", - } - else: - reason = f"DOI API draft creation request failed with code: {response.status_code}" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) # 400 is probably not the correct error for this + auth = BasicAuth(login=self.doi_user, password=self.doi_key) + async with ClientSession(headers=headers, auth=auth) as session: + async with session.post(self.doi_api, json=doi_payload) as response: + if response.status == 201 or response.status == 200: # This should only ever be 201 + draft_resp = await response.json() + full_doi = draft_resp["data"]["attributes"]["doi"] + returned_suffix = draft_resp["data"]["attributes"]["suffix"] + LOG.info(f"DOI draft created with doi: {full_doi}.") + doi_data = { + "fullDOI": full_doi, + "dataset": f"{self.doi_url}/{returned_suffix.lower()}", + } + else: + reason = f"DOI API draft creation request failed with code: {response.status}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) # 400 might not be the correct error for this return doi_data diff --git a/tests/test_doi.py b/tests/test_doi.py index a69be6470..894a68758 100644 --- a/tests/test_doi.py +++ b/tests/test_doi.py @@ -16,7 +16,7 @@ def setUp(self): async def test_400_is_raised(self): """Test 400 is raised when request to DataCite supposedly fails.""" - with patch("metadata_backend.helpers.doi.requests.post") as mocked_post: + with patch("aiohttp.ClientSession.post") as mocked_post: mocked_post.return_value.status_code = 400 with self.assertRaises(web.HTTPBadRequest) as err: await self.doi.create_draft_doi() @@ -24,8 +24,8 @@ async def test_400_is_raised(self): async def test_create_doi_draft_works(self): """Test DOI info is returned correctly when request succeeds.""" - with patch("metadata_backend.helpers.doi.requests.post") as mocked_post: - mocked_post.return_value.status_code = 201 + with patch("aiohttp.ClientSession.post") as mocked_post: + mocked_post.return_value.status = 201 mocked_post.return_value.json.return_value = { "data": { "id": "10.xxxx/yyyyy", From 6148a40815e0b7d10dd24a80d2b8930282fe9573 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Fri, 3 Sep 2021 13:49:42 +0300 Subject: [PATCH 044/336] create a mock doi web app --- docker-compose.yml | 19 ++++- tests/integration/mock_doi_api.py | 114 ++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+), 1 deletion(-) create mode 100644 tests/integration/mock_doi_api.py diff --git a/docker-compose.yml b/docker-compose.yml index 77678bc3b..513102040 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -27,6 +27,10 @@ services: - "LOG_LEVEL=DEBUG" - "MONGO_DATABASE=default" - "MONGO_AUTHDB=admin" + - "DOI_API=http://mockdoi:8001/dois" + - "DOI_PREFIX=10.xxxx" + - "DOI_USER=user" + - "DOI_KEY=key" database: image: "mongo" container_name: "metadata_submitter_database_dev" @@ -53,5 +57,18 @@ services: volumes: - ./tests/integration/mock_auth.py:/mock_auth.py entrypoint: ["python", "/mock_auth.py", "0.0.0.0", "8000"] + mockdoi: + build: + dockerfile: Dockerfile-dev + context: . + image: cscfi/metadata-submitter-dev + hostname: mockdoi + expose: + - 8001 + ports: + - 8001:8001 + volumes: + - ./tests/integration/mock_doi_api.py:/mock_doi_api.py + entrypoint: ["python", "/mock_doi_api.py", "0.0.0.0", "8001"] volumes: - data: + data: diff --git a/tests/integration/mock_doi_api.py b/tests/integration/mock_doi_api.py new file mode 100644 index 000000000..35e4f49d5 --- /dev/null +++ b/tests/integration/mock_doi_api.py @@ -0,0 +1,114 @@ +"""Mock aiohttp.web server for DOI API calls.""" + +import json +import logging +from datetime import datetime + +from aiohttp import web + + +async def dois(req: web.Request) -> web.Response: + """DOI endpoint.""" + try: + content = await req.json() + except json.decoder.JSONDecodeError as e: + reason = "JSON is not correctly formatted." f" See: {e}" + logging.info(reason) + raise web.HTTPBadRequest(reason=reason) + + try: + attributes = content["data"]["attributes"] + except KeyError: + reason = "Provided payload did not include required attributes." + logging.info(reason) + raise web.HTTPBadRequest(reason=reason) + + data = { + "data": { + "id": "10.xxxx/yyyy", + "type": "dois", + "attributes": { + "doi": "10.xxxx/yyyy", + "prefix": "10.xxxx", + "suffix": "yyyy", + "identifiers": [{"identifier": "https://mock_doi.org/10.xxxx/yyyy", "identifierType": "DOI"}], + "creators": [], + "titles": [], + "publisher": None, + "container": {}, + "publicationYear": None, + "subjects": [], + "contributors": [], + "dates": [], + "language": None, + "types": {}, + "relatedIdentifiers": [], + "sizes": [], + "formats": [], + "version": None, + "rightsList": [], + "descriptions": [], + "geoLocations": [], + "fundingReferences": [], + "xml": None, + "url": None, + "contentUrl": None, + "metadataVersion": 1, + "schemaVersion": "http://mockcite.org/schema/kernel-4", + "source": None, + "isActive": None, + "state": "draft", + "reason": None, + "created": str(datetime.utcnow()), + "registered": None, + "updated": str(datetime.utcnow()), + }, + "relationships": { + "client": {"data": {"id": "datacite.datacite", "type": "clients"}}, + "media": {"data": []}, + }, + }, + "included": [ + { + "id": "mockcite.mockcite", + "type": "clients", + "attributes": { + "name": "MockCite", + "symbol": "MOCKCITE.MOCKCITE", + "year": 2021, + "contactName": "MockCite", + "contactEmail": "support@mock_cite.org", + "description": None, + "domains": "*", + "url": None, + "created": "2010-01-01 12:00:00.000", + "updated": str(datetime.utcnow()), + "isActive": True, + "hasPassword": True, + }, + "relationships": { + "provider": {"data": {"id": "mockcite", "type": "providers"}}, + "prefixes": {"data": [{"id": "10.xxxx", "type": "prefixes"}]}, + }, + } + ], + } + + if "doi" in attributes or "prefix" in attributes: + logging.info(data) + return web.json_response(data) + else: + reason = "Provided payload include faulty attributes." + logging.info(reason) + raise web.HTTPBadRequest(reason=reason) + + +def init() -> web.Application: + """Start server.""" + app = web.Application() + app.router.add_post("/dois", dois) + return app + + +if __name__ == "__main__": + web.run_app(init(), port=8001) From 1b776664f0ed96b7157bdd3fd01b823833e6ed56 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 29 Sep 2021 23:02:04 +0300 Subject: [PATCH 045/336] remove debug log --- tests/integration/run_tests.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index a010f1a2a..6070dc5da 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -729,7 +729,6 @@ async def test_crud_folders_works(sess): await get_draft(sess, "sample", draft_id, 404) # checking the draft was deleted after publication async with sess.get(f"{folders_url}/{folder_id}") as resp: - LOG.debug(resp.status) LOG.debug(f"Checking that folder {folder_id} was patched") res = await resp.json() assert res["folderId"] == folder_id, "expected folder id does not match" From 3e564702621dae3b9c8ec2e9eaa0fae33a22f065 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 29 Sep 2021 23:07:48 +0300 Subject: [PATCH 046/336] add dependencies for tls docker-compose --- docker-compose-tls.yml | 18 ++++++++++++++++++ docker-compose.yml | 1 + 2 files changed, 19 insertions(+) diff --git a/docker-compose-tls.yml b/docker-compose-tls.yml index b5e8cffce..db1dcc722 100644 --- a/docker-compose-tls.yml +++ b/docker-compose-tls.yml @@ -13,6 +13,7 @@ services: depends_on: - database - mockauth + - mockdoi restart: on-failure environment: - "MONGO_HOST=database:27017" @@ -31,6 +32,10 @@ services: - "AUTH_REFERER=http://mockauth:8000" - "JWK_URL=http://mockauth:8000/keyset" - "LOG_LEVEL=DEBUG" + - "DOI_API=http://mockdoi:8001/dois" + - "DOI_PREFIX=10.xxxx" + - "DOI_USER=user" + - "DOI_KEY=key" database: image: "mongo" container_name: "metadata_submitter_database_dev" @@ -59,5 +64,18 @@ services: volumes: - ./tests/integration/mock_auth.py:/mock_auth.py entrypoint: ["python", "/mock_auth.py", "0.0.0.0", "8000"] + mockdoi: + build: + dockerfile: Dockerfile-dev + context: . + image: cscfi/metadata-submitter-dev + hostname: mockdoi + expose: + - 8001 + ports: + - 8001:8001 + volumes: + - ./tests/integration/mock_doi_api.py:/mock_doi_api.py + entrypoint: ["python", "/mock_doi_api.py", "0.0.0.0", "8001"] volumes: data: diff --git a/docker-compose.yml b/docker-compose.yml index 513102040..2e8ad6303 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,6 +11,7 @@ services: depends_on: - database - mockauth + - mockdoi restart: on-failure environment: - "MONGO_HOST=database:27017" From 97d5db98a9562b6fcba182b8a0eb28a780ab0aa0 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 6 Oct 2021 09:59:03 +0300 Subject: [PATCH 047/336] add datacite schema to list of schemas remove null from doi schema --- metadata_backend/conf/conf.py | 2 +- .../conf/{ena_schemas.json => schemas.json} | 80 ++- .../helpers/schemas/datacite.json | 541 ++++++++++++++++++ metadata_backend/helpers/schemas/folders.json | 110 +++- setup.py | 2 +- 5 files changed, 693 insertions(+), 42 deletions(-) rename metadata_backend/conf/{ena_schemas.json => schemas.json} (75%) create mode 100644 metadata_backend/helpers/schemas/datacite.json diff --git a/metadata_backend/conf/conf.py b/metadata_backend/conf/conf.py index a5d521364..1a997b516 100644 --- a/metadata_backend/conf/conf.py +++ b/metadata_backend/conf/conf.py @@ -105,7 +105,7 @@ def create_db_client() -> AsyncIOMotorClient: # 2) Load schema types and descriptions from json # Default schemas will be ENA schemas -path_to_schema_file = Path(__file__).parent / "ena_schemas.json" +path_to_schema_file = Path(__file__).parent / "schemas.json" with open(path_to_schema_file) as schema_file: schema_types = ujson.load(schema_file) diff --git a/metadata_backend/conf/ena_schemas.json b/metadata_backend/conf/schemas.json similarity index 75% rename from metadata_backend/conf/ena_schemas.json rename to metadata_backend/conf/schemas.json index 273b020b8..f536b962d 100644 --- a/metadata_backend/conf/ena_schemas.json +++ b/metadata_backend/conf/schemas.json @@ -1,81 +1,101 @@ -{"submission": - { "priority": 1, +{ + "submission": { + "priority": 1, "description": { "title": "Submission", "detail": "A submission contains submission actions to be performed by the archive. A submission can add more objects to the archive, update already submitted objects or make objects publicly available.", "url": "https://github.com/enasequence/schema/blob/master/src/main/resources/uk/ac/ebi/ena/sra/schema/SRA.submission.xsd" - } + }, + "provider": "ENA" }, - "study": - { "priority": 2, + "study": { + "priority": 2, "description": { "title": "Study", "detail": "A study groups together data submitted to the archive. A study accession is typically used when citing data submitted to ENA. Note that all associated data and other objects are made public when the study is released.", "url": "https://github.com/enasequence/schema/blob/master/src/main/resources/uk/ac/ebi/ena/sra/schema/SRA.study.xsd" - } + }, + "provider": "ENA" }, - "project": - { "priority": 3, + "project": { + "priority": 3, "description": { "title": "Project", "detail": "A project groups together data submitted to the archive. A project accession is typically used when citing data submitted to ENA. Note that all associated data and other objects are made public when the project is released.", "url": "https://github.com/enasequence/schema/blob/master/src/main/resources/uk/ac/ebi/ena/sra/schema/ENA.project.xsd" - } + }, + "provider": "ENA" }, - "sample": - { "priority": 4, + "sample": { + "priority": 4, "description": { "title": "Sample", "detail": "A sample contains information about the sequenced source material. Samples are typically associated with checklists, which define the fields used to annotate the samples.", "url": "https://github.com/enasequence/schema/blob/master/src/main/resources/uk/ac/ebi/ena/sra/schema/SRA.sample.xsd" - } + }, + "provider": "ENA" }, - "experiment": - { "priority": 5, + "experiment": { + "priority": 5, "description": { "title": "Experiment", "detail": "An experiment contain information about a sequencing experiment including library and instrument details.", "url": "https://github.com/enasequence/schema/blob/master/src/main/resources/uk/ac/ebi/ena/sra/schema/SRA.experiment.xsd" - } + }, + "provider": "ENA" }, - "run": - { "priority": 6, + "run": { + "priority": 6, "description": { "title": "Run", "detail": "A run is part of an experiment and refers to data files containing sequence reads.", "url": "https://github.com/enasequence/schema/blob/master/src/main/resources/uk/ac/ebi/ena/sra/schema/SRA.run.xsd" - } + }, + "provider": "ENA" }, - "analysis": - { "priority": 7, + "analysis": { + "priority": 7, "description": { "title": "Analysis", "detail": "An analysis contains secondary analysis results derived from sequence reads (e.g. a genome assembly),", "url": "https://github.com/enasequence/schema/blob/master/src/main/resources/uk/ac/ebi/ena/sra/schema/SRA.analysis.xsd" - } + }, + "provider": "ENA" }, - "dac": - { "priority": 8, + "dac": { + "priority": 8, "description": { "title": "DAC", "detail": "An European Genome-phenome Archive (EGA) data access committee (DAC) is required for authorized access submissions.", "url": "https://github.com/enasequence/schema/blob/master/src/main/resources/uk/ac/ebi/ena/sra/schema/EGA.dac.xsd" - } + }, + "provider": "ENA" }, - "policy": - { "priority": 9, + "policy": { + "priority": 9, "description": { "title": "Policy", "detail": "An European Genome-phenome Archive (EGA) data access policy is required for authorized access submissions.", "url": "https://github.com/enasequence/schema/blob/master/src/main/resources/uk/ac/ebi/ena/sra/schema/EGA.policy.xsd" - } + }, + "provider": "ENA" }, - "dataset": - { "priority": 10, + "dataset": { + "priority": 10, "description": { "title": "Dataset", "detail": "An European Genome-phenome Archive (EGA) data set is required for authorized access submissions.", "url": "https://github.com/enasequence/schema/blob/master/src/main/resources/uk/ac/ebi/ena/sra/schema/EGA.dataset.xsd" - } + }, + "provider": "ENA" + }, + "datacite": { + "priority": 11, + "description": { + "title": "Datacite DOI schema", + "detail": "Derived from the DataCite Metadata Schema whihc is a list of core metadata properties chosen for an accurate and consistent identification of a resource for citation and retrieval purposes, along with recommended use instructions. We only work with a subset of them for this schema.", + "url": "http://schema.datacite.org/" + }, + "provider": "Datacite" } } diff --git a/metadata_backend/helpers/schemas/datacite.json b/metadata_backend/helpers/schemas/datacite.json new file mode 100644 index 000000000..36a1b9e81 --- /dev/null +++ b/metadata_backend/helpers/schemas/datacite.json @@ -0,0 +1,541 @@ +{ + "type": "object", + "title": "The DOI info schema", + "required": [ + "creators", + "subjects" + ], + "properties": { + "creators": { + "type": "array", + "title": "List of creators", + "items": { + "type": "object", + "title": "Main researchers involved with data or the authors of the publication", + "properties": { + "name": { + "type": "string", + "title": "Full name of creator (format: Family, Given)" + }, + "nameType": { + "type": "string", + "title": "Type of name" + }, + "givenName": { + "type": "string", + "title": "First name" + }, + "familyName": { + "type": "string", + "title": "Last name" + }, + "nameIdentifiers": { + "type": "array", + "title": "List of name identifiers", + "items": { + "type": "object", + "title": "Name identifier object", + "properties": { + "schemeUri": { + "oneOf": [ + { + "title": "String value", + "type": "string" + }, + { + "title": "Null value", + "type": "null" + } + ], + "title": "URI (location) of the name identifier scheme" + }, + "nameIdentifier": { + "oneOf": [ + { + "title": "String value", + "type": "string" + }, + { + "title": "Null value", + "type": "null" + } + ], + "title": "URI (location) of name identifier" + }, + "nameIdentifierScheme": { + "oneOf": [ + { + "title": "String value", + "type": "string" + }, + { + "title": "Null value", + "type": "null" + } + ], + "title": "Name of name identifier scheme" + } + } + }, + "uniqueItems": true + }, + "affiliation": { + "type": "array", + "title": "List of affiliations", + "items": { + "type": "object", + "title": "Name affiliation object", + "properties": { + "name": { + "type": "string", + "title": "Name of the place of affiliation" + }, + "schemeUri": { + "type": "string", + "title": "URI (location) of the affiliation scheme" + }, + "affiliationIdentifier": { + "type": "string", + "title": "Location of affiliation identifier" + }, + "affiliationIdentifierScheme": { + "type": "string", + "title": "Name of affiliation identifier scheme" + } + } + }, + "uniqueItems": true + } + }, + "additionalProperties": false + }, + "uniqueItems": true + }, + "subjects": { + "type": "array", + "title": "List of subject identifiers specified by FOS", + "items": { + "type": "object", + "title": "Subject objects", + "required": [ + "subject" + ], + "properties": { + "subject": { + "type": "string", + "title": "FOS identifier" + }, + "subjectScheme": { + "type": "string", + "title": "Subject scheme name" + } + }, + "additionalProperties": true + }, + "uniqueItems": true + }, + "contributors": { + "type": "array", + "title": "List of contributors", + "items": { + "type": "object", + "title": "The institution or person responsible for contributing to the developement of the dataset", + "required": [ + "contributorType" + ], + "properties": { + "name": { + "type": "string", + "title": "Full name of contributor (format: Family, Given)" + }, + "nameType": { + "type": "string", + "title": "Type of name" + }, + "givenName": { + "type": "string", + "title": "First name" + }, + "familyName": { + "type": "string", + "title": "Last name" + }, + "contributorType": { + "type": "string", + "title": "Type of contributor" + }, + "nameIdentifiers": { + "type": "array", + "title": "List of name identifiers", + "items": { + "type": "object", + "title": "Name identifier object", + "properties": { + "schemeUri": { + "oneOf": [ + { + "title": "String value", + "type": "string" + }, + { + "title": "Null value", + "type": "null" + } + ], + "title": "URI (location) of the name identifier scheme" + }, + "nameIdentifier": { + "oneOf": [ + { + "title": "String value", + "type": "string" + }, + { + "title": "Null value", + "type": "null" + } + ], + "title": "Location of name identifier" + }, + "nameIdentifierScheme": { + "oneOf": [ + { + "title": "String value", + "type": "string" + }, + { + "title": "Null value", + "type": "null" + } + ], + "title": "Name of name identifier scheme" + } + } + } + }, + "affiliation": { + "type": "array", + "title": "List of affiliations", + "items": { + "type": "object", + "title": "Name affiliation object", + "properties": { + "name": { + "type": "string", + "title": "Name of the place of affiliation" + }, + "schemeUri": { + "type": "string", + "title": "URI (location) of the affiliation scheme" + }, + "affiliationIdentifier": { + "type": "string", + "title": "Location of affiliation identifier" + }, + "affiliationIdentifierScheme": { + "type": "string", + "title": "Name of affiliation identifier scheme" + } + } + } + } + }, + "additionalProperties": false + }, + "uniqueItems": true + }, + "dates": { + "type": "array", + "title": "List of relevant dates to publication", + "items": { + "type": "object", + "title": "Date object", + "required": [ + "date", + "dateType" + ], + "properties": { + "date": { + "type": "string", + "title": "A standard format for a date value" + }, + "dateType": { + "type": "string", + "title": "Relevance of the date" + }, + "dateInformation": { + "type": "string", + "title": "Specific event of the date" + } + }, + "additionalProperties": false + }, + "uniqueItems": true + }, + "descriptions": { + "type": "array", + "title": "List of descriptions", + "items": { + "type": "object", + "title": "Description object", + "properties": { + "lang": { + "type": "string", + "title": "Language code of the description" + }, + "description": { + "type": "string", + "title": "Additional information that does not fit in any of the other categories" + }, + "descriptionType": { + "type": "string", + "title": "Type of description" + } + }, + "additionalProperties": false + }, + "uniqueItems": true + }, + "geoLocations": { + "type": "array", + "title": "List of GeoLocations", + "items": { + "type": "object", + "title": "GeoLocation object", + "properties": { + "geoLocationPlace": { + "type": "string", + "title": "Spatial region or named place where the data was gathered" + }, + "geoLocationPoint": { + "type": "object", + "title": "A point containing a single latitude-longitude pair", + "properties": { + "pointLongitude": { + "type": "string", + "title": "Longitude coordinate" + }, + "pointLatitude": { + "type": "string", + "title": "Latitude coordinate" + } + }, + "additionalProperties": false + }, + "geoLocationBox": { + "type": "object", + "title": "A box determined by two longitude and two latitude borders", + "properties": { + "westBoundLongitude": { + "type": "string", + "title": "Longitude coordinate of west bound" + }, + "eastBoundLongitude": { + "type": "string", + "title": "Longitude coordinate of east bound" + }, + "southBoundLatitude": { + "type": "string", + "title": "Latitude coordinate of south bound" + }, + "northBoundLatitude": { + "type": "string", + "title": "Latitude coordinate of north bound" + } + } + }, + "geoLocationPolygon": { + "type": "array", + "title": "A drawn polygon area, defined by a set of polygon points", + "items": { + "type": "object", + "title": "Polygon point object", + "properties": { + "pointLongitude": { + "type": "string", + "title": "Longitude coordinate" + }, + "pointLatitude": { + "type": "string", + "title": "Latitude coordinate" + } + } + } + } + }, + "additionalProperties": false + }, + "uniqueItems": true + }, + "language": { + "type": "string", + "title": "Code of the primary language of the resource" + }, + "alternateIdentifiers": { + "type": "array", + "title": "List of alternate identifiers", + "items": { + "type": "object", + "title": "An identifier or identifiers other than the primary Identifier of the resource", + "required": [ + "alternateIdentifier", + "alternateIdentifierType" + ], + "properties": { + "alternateIdentifier": { + "type": "string", + "title": "Alternate identifier info" + }, + "alternateIdentifierType": { + "type": "string", + "title": "Type of alternate identifier" + } + }, + "additionalProperties": false + }, + "uniqueItems": true + }, + "relatedIdentifiers": { + "type": "array", + "title": "List of related identifiers", + "items": { + "type": "object", + "title": "Identifier of related resources", + "required": [ + "relatedIdentifier", + "relatedIdentifierType", + "relationType" + ], + "properties": { + "relatedIdentifier": { + "type": "string", + "title": "Related identifier info" + }, + "relatedIdentifierType": { + "type": "string", + "title": "Type of related identifier" + }, + "relationType": { + "type": "string", + "title": "Specification of the relation" + }, + "relatedMetadataScheme": { + "type": "string", + "title": "Scheme of related metadata" + }, + "schemeUri": { + "type": "string", + "title": "URI (location) of the related metadata scheme" + }, + "schemeType": { + "type": "string", + "title": "Type of the related metadata scheme" + }, + "resourceTypeGeneral": { + "type": "string", + "title": "Optional general type name" + } + }, + "additionalProperties": false + }, + "uniqueItems": true + }, + "sizes": { + "type": "array", + "title": "List of sizes", + "items": { + "type": "string", + "title": "Unstructured size information about the resource" + } + }, + "formats": { + "type": "array", + "title": "List of formats", + "items": { + "type": "string", + "title": "Technical format of the resource" + } + }, + "fundingReferences": { + "type": "array", + "title": "List of funding references", + "itmes": { + "type": "object", + "title": "Information about financial support for the resource", + "required": [ + "funderName", + "funderIdentifier", + "funderIdentifierType" + ], + "properties": { + "funderName": { + "type": "string", + "title": "Name of the funding provider" + }, + "funderIdentifier": { + "type": "string", + "title": "Unique identifier for funding entity" + }, + "funderIdentifierType": { + "type": "string", + "title": "Type of identifier for funding entity" + }, + "schemeUri": { + "oneOf": [ + { + "title": "String value", + "type": "string" + }, + { + "title": "Null value", + "type": "null" + } + ], + "title": "URI (location) of scheme for funder identifier" + }, + "awardNumber": { + "oneOf": [ + { + "title": "String value", + "type": "string" + }, + { + "title": "Null value", + "type": "null" + } + ], + "title": "The code assigned by the funder to a sponsored award" + }, + "awardTitle": { + "oneOf": [ + { + "title": "String value", + "type": "string" + }, + { + "title": "Null value", + "type": "null" + } + ], + "title": "The human readable title of the award" + }, + "awardUri": { + "oneOf": [ + { + "title": "String value", + "type": "string" + }, + { + "title": "Null value", + "type": "null" + } + ], + "title": "URI (location) of the award" + } + }, + "additionalProperties": false + }, + "uniqueItems": true + } + } +} diff --git a/metadata_backend/helpers/schemas/folders.json b/metadata_backend/helpers/schemas/folders.json index 3c2c583ca..b185f9ddc 100644 --- a/metadata_backend/helpers/schemas/folders.json +++ b/metadata_backend/helpers/schemas/folders.json @@ -69,15 +69,42 @@ "title": "Name identifier object", "properties": { "schemeUri": { - "type": ["string", "null"], + "oneOf": [ + { + "title": "String value", + "type": "string" + }, + { + "title": "Null value", + "type": "null" + } + ], "title": "URI (location) of the name identifier scheme" }, "nameIdentifier": { - "type": ["string", "null"], + "oneOf": [ + { + "title": "String value", + "type": "string" + }, + { + "title": "Null value", + "type": "null" + } + ], "title": "URI (location) of name identifier" }, "nameIdentifierScheme": { - "type": ["string", "null"], + "oneOf": [ + { + "title": "String value", + "type": "string" + }, + { + "title": "Null value", + "type": "null" + } + ], "title": "Name of name identifier scheme" } } @@ -177,15 +204,42 @@ "title": "Name identifier object", "properties": { "schemeUri": { - "type": ["string", "null"], + "oneOf": [ + { + "title": "String value", + "type": "string" + }, + { + "title": "Null value", + "type": "null" + } + ], "title": "URI (location) of the name identifier scheme" }, "nameIdentifier": { - "type": ["string", "null"], + "oneOf": [ + { + "title": "String value", + "type": "string" + }, + { + "title": "Null value", + "type": "null" + } + ], "title": "Location of name identifier" }, "nameIdentifierScheme": { - "type": ["string", "null"], + "oneOf": [ + { + "title": "String value", + "type": "string" + }, + { + "title": "Null value", + "type": "null" + } + ], "title": "Name of name identifier scheme" } } @@ -459,19 +513,55 @@ "title": "Type of identifier for funding entity" }, "schemeUri": { - "type": ["string", "null"], + "oneOf": [ + { + "title": "String value", + "type": "string" + }, + { + "title": "Null value", + "type": "null" + } + ], "title": "URI (location) of scheme for funder identifier" }, "awardNumber": { - "type": ["string", "null"], + "oneOf": [ + { + "title": "String value", + "type": "string" + }, + { + "title": "Null value", + "type": "null" + } + ], "title": "The code assigned by the funder to a sponsored award" }, "awardTitle": { - "type": ["string", "null"], + "oneOf": [ + { + "title": "String value", + "type": "string" + }, + { + "title": "Null value", + "type": "null" + } + ], "title": "The human readable title of the award" }, "awardUri": { - "type": ["string", "null"], + "oneOf": [ + { + "title": "String value", + "type": "string" + }, + { + "title": "Null value", + "type": "null" + } + ], "title": "URI (location) of the award" } }, diff --git a/setup.py b/setup.py index 3807c41a9..83af1f723 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ "frontend/static/js/*", "frontend/static/media/*", "frontend/static/css/*", - "conf/ena_schemas.json", + "conf/schemas.json", ] }, include_package_data=True, From 4d4ccf208414a8855a1865a491b32df828fd5399 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 7 Oct 2021 09:55:58 +0300 Subject: [PATCH 048/336] fix typo for items --- metadata_backend/helpers/schemas/datacite.json | 2 +- metadata_backend/helpers/schemas/folders.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata_backend/helpers/schemas/datacite.json b/metadata_backend/helpers/schemas/datacite.json index 36a1b9e81..5219e8d2d 100644 --- a/metadata_backend/helpers/schemas/datacite.json +++ b/metadata_backend/helpers/schemas/datacite.json @@ -459,7 +459,7 @@ "fundingReferences": { "type": "array", "title": "List of funding references", - "itmes": { + "items": { "type": "object", "title": "Information about financial support for the resource", "required": [ diff --git a/metadata_backend/helpers/schemas/folders.json b/metadata_backend/helpers/schemas/folders.json index b185f9ddc..1a04dc7b5 100644 --- a/metadata_backend/helpers/schemas/folders.json +++ b/metadata_backend/helpers/schemas/folders.json @@ -491,7 +491,7 @@ "fundingReferences": { "type": "array", "title": "List of funding references", - "itmes": { + "items": { "type": "object", "title": "Information about financial support for the resource", "required": [ From 4ec37ae31ff37728a0233396af71438d59be07f1 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 21 Oct 2021 15:26:16 +0300 Subject: [PATCH 049/336] complete datacite JSON schema adjust folder datacite info to schema --- .../helpers/schemas/datacite.json | 810 ++++++++++++------ metadata_backend/helpers/schemas/folders.json | 786 ++++++++++++----- 2 files changed, 1114 insertions(+), 482 deletions(-) diff --git a/metadata_backend/helpers/schemas/datacite.json b/metadata_backend/helpers/schemas/datacite.json index 5219e8d2d..17d105684 100644 --- a/metadata_backend/helpers/schemas/datacite.json +++ b/metadata_backend/helpers/schemas/datacite.json @@ -1,6 +1,6 @@ { "type": "object", - "title": "The DOI info schema", + "title": "Datacite DOI Registration Information", "required": [ "creators", "subjects" @@ -8,99 +8,71 @@ "properties": { "creators": { "type": "array", - "title": "List of creators", + "title": "Creators", + "description": "The main researcher(s) involved in producing the data, or the author(s) of the publication.", "items": { "type": "object", - "title": "Main researchers involved with data or the authors of the publication", + "title": "Main researcher(s) involved with data or the author(s) of the publication", "properties": { - "name": { - "type": "string", - "title": "Full name of creator (format: Family, Given)" - }, - "nameType": { - "type": "string", - "title": "Type of name" - }, "givenName": { "type": "string", - "title": "First name" + "title": "Given Name" }, "familyName": { "type": "string", - "title": "Last name" + "title": "Last Name" }, - "nameIdentifiers": { + "name": { + "type": "string", + "title": "Full name (from Given Name and Family Name)." + }, + "affiliation": { "type": "array", - "title": "List of name identifiers", + "title": "Affiliations", "items": { "type": "object", - "title": "Name identifier object", + "title": "Affiliation details", "properties": { + "name": { + "type": "string", + "title": "Name of the place of affiliation" + }, "schemeUri": { - "oneOf": [ - { - "title": "String value", - "type": "string" - }, - { - "title": "Null value", - "type": "null" - } - ], - "title": "URI (location) of the name identifier scheme" + "type": "string", + "title": "URI (location) of the affiliation scheme" }, - "nameIdentifier": { - "oneOf": [ - { - "title": "String value", - "type": "string" - }, - { - "title": "Null value", - "type": "null" - } - ], - "title": "URI (location) of name identifier" + "affiliationIdentifier": { + "type": "string", + "title": "Location of affiliation identifier" }, - "nameIdentifierScheme": { - "oneOf": [ - { - "title": "String value", - "type": "string" - }, - { - "title": "Null value", - "type": "null" - } - ], - "title": "Name of name identifier scheme" + "affiliationIdentifierScheme": { + "type": "string", + "title": "Name of affiliation identifier scheme" } } }, "uniqueItems": true }, - "affiliation": { + "nameIdentifiers": { "type": "array", - "title": "List of affiliations", + "title": "Creator Identifiers", + "description": "Uniquely identifies an individual or legal entity, according to various schemas, e.g. ORCID, ROR or ISNI. Use name identifier expressed as URL.", "items": { "type": "object", - "title": "Name affiliation object", + "title": "Name identifier object", "properties": { - "name": { - "type": "string", - "title": "Name of the place of affiliation" - }, "schemeUri": { "type": "string", - "title": "URI (location) of the affiliation scheme" + "title": "URI (location) of the name identifier scheme", + "format": "uri" }, - "affiliationIdentifier": { + "nameIdentifier": { "type": "string", - "title": "Location of affiliation identifier" + "title": "URI (location) of name identifier" }, - "affiliationIdentifierScheme": { + "nameIdentifierScheme": { "type": "string", - "title": "Name of affiliation identifier scheme" + "title": "Name of name identifier scheme" } } }, @@ -113,112 +85,132 @@ }, "subjects": { "type": "array", - "title": "List of subject identifiers specified by FOS", + "title": "Subjects", + "description": "Subject, keyword, classification code, or key phrase describing the resource specified by OECD Fields of Science and Technology (FOS)", "items": { "type": "object", - "title": "Subject objects", + "title": "Subjects", "required": [ "subject" ], "properties": { "subject": { "type": "string", - "title": "FOS identifier" + "title": "FOS identifier", + "enum": [ + "Natural sciences", + "Mathematics", + "Computer and information sciences", + "Physical sciences", + "Chemical sciences", + "Earth and related environmental sciences", + "Biological sciences", + "Other natural sciences", + "Engineering and technology", + "Civil engineering", + "Electrical engineering, electronic engineering, information engineering", + "Mechanical engineering", + "Chemical engineering", + "Materials engineering", + "Medical engineering", + "Environmental engineering", + "Environmental biotechnology", + "Industrial biotechnology", + "Nano-technology", + "Other engineering and technologies", + "Medical and health sciences", + "Basic medicine", + "Clinical medicine", + "Health sciences", + "Medical biotechnology", + "Other medical sciences", + "Agricultural sciences", + "Agriculture, forestry, and fisheries", + "Animal and dairy science", + "Veterinary science", + "Agricultural biotechnology", + "Other agricultural sciences", + "Social sciences", + "Psychology", + "Economics and business", + "Educational sciences", + "Sociology", + "Law", + "Political science", + "Social and economic geography", + "Media and communications", + "Other social sciences", + "Humanities", + "History and archaeology", + "Languages and literature", + "Philosophy, ethics and religion", + "Arts (arts, history of arts, performing arts, music)", + "Other humanities" + ] }, "subjectScheme": { - "type": "string", - "title": "Subject scheme name" + "title": "Fields of Science and Technology (FOS) scheme", + "type": "string" } }, - "additionalProperties": true + "additionalProperties": false }, "uniqueItems": true }, "contributors": { "type": "array", - "title": "List of contributors", + "title": "Contributors", + "description": "The person(s) responsible for contributing to the development of the dataset.", "items": { "type": "object", - "title": "The institution or person responsible for contributing to the developement of the dataset", + "title": "Contributor", "required": [ "contributorType" ], "properties": { - "name": { - "type": "string", - "title": "Full name of contributor (format: Family, Given)" - }, - "nameType": { - "type": "string", - "title": "Type of name" - }, + "givenName": { "type": "string", - "title": "First name" + "title": "Given Name" }, "familyName": { "type": "string", - "title": "Last name" + "title": "Last Name" }, - "contributorType": { + "name": { "type": "string", - "title": "Type of contributor" + "title": "Full name (from Given Name and Family Name)." }, - "nameIdentifiers": { - "type": "array", - "title": "List of name identifiers", - "items": { - "type": "object", - "title": "Name identifier object", - "properties": { - "schemeUri": { - "oneOf": [ - { - "title": "String value", - "type": "string" - }, - { - "title": "Null value", - "type": "null" - } - ], - "title": "URI (location) of the name identifier scheme" - }, - "nameIdentifier": { - "oneOf": [ - { - "title": "String value", - "type": "string" - }, - { - "title": "Null value", - "type": "null" - } - ], - "title": "Location of name identifier" - }, - "nameIdentifierScheme": { - "oneOf": [ - { - "title": "String value", - "type": "string" - }, - { - "title": "Null value", - "type": "null" - } - ], - "title": "Name of name identifier scheme" - } - } - } + "contributorType": { + "type": "string", + "title": "Type of contributor", + "enum": [ + "Contact Person", + "Data Collector", + "Data Curator", + "Data Manager", + "Distributor", + "Editor", + "Producer", + "Project Leader", + "Project Manager", + "Project Member", + "Related Person", + "Researcher", + "Research Group", + "Rights Holder", + "Sponsor", + "Supervisor", + "Work Package Leader", + "Other" + ] }, "affiliation": { "type": "array", - "title": "List of affiliations", + "title": "Affiliations", "items": { "type": "object", - "title": "Name affiliation object", + "title": "Affiliation details", "properties": { "name": { "type": "string", @@ -237,6 +229,31 @@ "title": "Name of affiliation identifier scheme" } } + }, + "uniqueItems": true + }, + "nameIdentifiers": { + "type": "array", + "title": "Contributor identifiers", + "description": "Uniquely identifies an individual or legal entity, according to various schemas, e.g. ORCID, ROR or ISNI. Use name identifier expressed as URL.", + "items": { + "type": "object", + "title": "Identifier details", + "properties": { + "schemeUri": { + "type": "string", + "title": "URI (location) of the name identifier scheme", + "format": "uri" + }, + "nameIdentifier": { + "type": "string", + "title": "Location of name identifier" + }, + "nameIdentifierScheme": { + "type": "string", + "title": "Name of name identifier scheme" + } + } } } }, @@ -246,10 +263,11 @@ }, "dates": { "type": "array", - "title": "List of relevant dates to publication", + "title": "Dates", + "description": "List of relevant dates to publication", "items": { "type": "object", - "title": "Date object", + "title": "Date", "required": [ "date", "dateType" @@ -257,15 +275,31 @@ "properties": { "date": { "type": "string", - "title": "A standard format for a date value" + "title": "Date", + "description": "A standard format for a date value" }, "dateType": { "type": "string", - "title": "Relevance of the date" + "title": "Date Type", + "description": "Relevance of the date", + "enum": [ + "Accepted", + "Available", + "Copyrighted", + "Collected", + "Created", + "Issued", + "Submitted", + "Updated", + "Valid", + "Withdrawn", + "Other" + ] }, "dateInformation": { "type": "string", - "title": "Specific event of the date" + "title": "Date Information", + "description": "Specific information about the date, if appropriate." } }, "additionalProperties": false @@ -274,22 +308,31 @@ }, "descriptions": { "type": "array", - "title": "List of descriptions", + "title": "Descriptions", + "description": "Additional information about the resource that does not fit in any of the other categories.", "items": { "type": "object", - "title": "Description object", + "title": "description", "properties": { - "lang": { - "type": "string", - "title": "Language code of the description" - }, "description": { "type": "string", - "title": "Additional information that does not fit in any of the other categories" + "title": "description" }, "descriptionType": { "type": "string", - "title": "Type of description" + "title": "Description Type", + "enum": [ + "Abstract", + "Methods", + "Series Information", + "Table Of Contents", + "Technical Info", + "Other" + ] + }, + "lang": { + "type": "string", + "title": "Language" } }, "additionalProperties": false @@ -298,18 +341,20 @@ }, "geoLocations": { "type": "array", - "title": "List of GeoLocations", + "title": "GeoLocations", + "description": "Spatial region or named place where the data was gathered or about which the resource is focused.", "items": { "type": "object", - "title": "GeoLocation object", + "title": "GeoLocation", "properties": { "geoLocationPlace": { "type": "string", - "title": "Spatial region or named place where the data was gathered" + "title": "Geolocation Place" }, "geoLocationPoint": { "type": "object", - "title": "A point containing a single latitude-longitude pair", + "title": "Geolocation Point", + "description": "A point location in space. A point contains a single longitude-latitude pair.", "properties": { "pointLongitude": { "type": "string", @@ -324,7 +369,8 @@ }, "geoLocationBox": { "type": "object", - "title": "A box determined by two longitude and two latitude borders", + "title": "Geolocation Box", + "description": "The spatial limits of a box. A box is defined by two geographic points. Left low corner and right upper corner. Each point is defined by its longitude and latitude.", "properties": { "westBoundLongitude": { "type": "string", @@ -343,24 +389,6 @@ "title": "Latitude coordinate of north bound" } } - }, - "geoLocationPolygon": { - "type": "array", - "title": "A drawn polygon area, defined by a set of polygon points", - "items": { - "type": "object", - "title": "Polygon point object", - "properties": { - "pointLongitude": { - "type": "string", - "title": "Longitude coordinate" - }, - "pointLatitude": { - "type": "string", - "title": "Latitude coordinate" - } - } - } } }, "additionalProperties": false @@ -369,38 +397,202 @@ }, "language": { "type": "string", - "title": "Code of the primary language of the resource" - }, - "alternateIdentifiers": { - "type": "array", - "title": "List of alternate identifiers", - "items": { - "type": "object", - "title": "An identifier or identifiers other than the primary Identifier of the resource", - "required": [ - "alternateIdentifier", - "alternateIdentifierType" - ], - "properties": { - "alternateIdentifier": { - "type": "string", - "title": "Alternate identifier info" - }, - "alternateIdentifierType": { - "type": "string", - "title": "Type of alternate identifier" - } - }, - "additionalProperties": false - }, - "uniqueItems": true + "title": "Language", + "description": "Primary language of the Study/Datasets submitted.", + "enum": [ + "Afar", + "Abkhaz", + "Avestan", + "Afrikaans", + "Akan", + "Amharic", + "Aragonese", + "Arabic", + "Assamese", + "Avaric", + "Aymara", + "Azerbaijani", + "Bashkir", + "Belarusian", + "Bulgarian", + "Bihari", + "Bislama", + "Bambara", + "Bengali", + "Tibetan", + "Breton", + "Bosnian", + "Catalan", + "Chechen", + "Chamorro", + "Corsican", + "Cree", + "Czech", + "Old Church Slavonic", + "Chuvash", + "Welsh", + "Danish", + "German", + "Divehi", + "Dzongkha", + "Ewe", + "Greek", + "English", + "Esperanto", + "Spanish", + "Estonian", + "Basque", + "Persian", + "Fula", + "Finnish", + "Fijian", + "Faroese", + "French", + "Western Frisian", + "Irish", + "Scottish Gaelic", + "Galician", + "Guaraní", + "Gujarati", + "Manx", + "Hausa", + "Hebrew", + "Hindi", + "Hiri Motu", + "Croatian", + "Haitian", + "Hungarian", + "Armenian", + "Herero", + "Interlingua", + "Indonesian", + "Interlingue", + "Igbo", + "Nuosu", + "Inupiaq", + "Ido", + "Icelandic", + "Italian", + "Inuktitut", + "Japanese", + "Javanese", + "Georgian", + "Kongo", + "Kikuyu", + "Kwanyama", + "Kazakh", + "Kalaallisut", + "Khmer", + "Kannada", + "Korean", + "Kanuri", + "Kashmiri", + "Kurdish", + "Komi", + "Cornish", + "Kyrgyz", + "Latin", + "Luxembourgish", + "Ganda", + "Limburgish", + "Lingala", + "Lao", + "Lithuanian", + "Luba-Katanga", + "Latvian", + "Malagasy", + "Marshallese", + "Māori", + "Macedonian", + "Malayalam", + "Mongolian", + "Marathi", + "Malay", + "Maltese", + "Burmese", + "Nauru", + "Norwegian Bokmål", + "Northern Ndebele", + "Nepali", + "Ndonga", + "Dutch", + "Norwegian Nynorsk", + "Norwegian", + "Southern Ndebele", + "Navajo", + "Chichewa", + "Occitan", + "Ojibwe", + "Oromo", + "Oriya", + "Ossetian", + "Panjabi", + "Pāli", + "Polish", + "Pashto", + "Portuguese", + "Quechua", + "Romansh", + "Kirundi", + "Romanian", + "Russian", + "Kinyarwanda", + "Sanskrit", + "Sardinian", + "Sindhi", + "Northern Sami", + "Sango", + "Sinhala", + "Slovak", + "Slovenian", + "Samoan", + "Shona", + "Somali", + "Albanian", + "Serbian", + "Swati", + "Southern Sotho", + "Sundanese", + "Swedish", + "Swahili", + "Tamil", + "Telugu", + "Tajik", + "Thai", + "Tigrinya", + "Turkmen", + "Tagalog", + "Tswana", + "Tonga", + "Turkish", + "Tsonga", + "Tatar", + "Twi", + "Tahitian", + "Uyghur", + "Ukrainian", + "Urdu", + "Uzbek", + "Venda", + "Vietnamese", + "Volapük", + "Walloon", + "Wolof", + "Xhosa", + "Yiddish", + "Yoruba", + "Zhuang", + "Chinese", + "Zulu" + ] }, "relatedIdentifiers": { "type": "array", - "title": "List of related identifiers", + "title": "Related Indetifiers", + "description": "Must be a globally unique identifier", "items": { "type": "object", - "title": "Identifier of related resources", + "title": "Identifier of related resource", "required": [ "relatedIdentifier", "relatedIdentifierType", @@ -409,31 +601,165 @@ "properties": { "relatedIdentifier": { "type": "string", - "title": "Related identifier info" + "title": "Identifier", + "description": "These must be globally unique identifiers and correspond to the type selected" }, "relatedIdentifierType": { "type": "string", - "title": "Type of related identifier" + "title": "Identifier Type", + "enum": [ + "ARK", + "arXiv", + "bibcode", + "DOI", + "EAN13", + "EISSN", + "Handle", + "IGSN", + "ISBN", + "ISSN", + "ISTC", + "LISSN", + "LSID", + "PMID", + "PURL", + "UPC", + "URL", + "URN", + "w3id" + ] }, "relationType": { "type": "string", - "title": "Specification of the relation" + "title": "Relationship Type", + "enum": [ + "Cites", + "Is cited by", + "Compiles", + "Is compiled by", + "Continues", + "Is continued by", + "Describes", + "Is described by", + "Documents", + "Is documented by", + "Is derived from", + "Is source of", + "Has metadata", + "Is metadata for", + "Has part", + "Is part of", + "Is supplemented by", + "Is supplement to", + "Obsoletes", + "Is obsoleted by", + "References", + "Is referenced by", + "Requires", + "Is required by", + "Reviews", + "Is reviewed by", + "Has version", + "Is version of", + "Is new version of", + "Is previous version of", + "Is published in", + "Is variant form of", + "Is original form of", + "Is identical to" + ] }, "relatedMetadataScheme": { "type": "string", - "title": "Scheme of related metadata" + "title": "Related Metadata Scheme" }, "schemeUri": { "type": "string", - "title": "URI (location) of the related metadata scheme" + "title": "Related Metadata Scheme URI" }, "schemeType": { "type": "string", - "title": "Type of the related metadata scheme" + "title": "Related Metadata Scheme Type" }, "resourceTypeGeneral": { "type": "string", - "title": "Optional general type name" + "title": "Resource Type General", + "enum": [ + "Audiovisual", + "Book", + "BookChapter", + "Collection", + "ComputationalNotebook", + "ConferencePaper", + "ConferenceProceeding", + "DataPaper", + "Dataset", + "Dissertation", + "Event", + "Image", + "InteractiveResource", + "Journal", + "JournalArticle", + "Model", + "OutputManagementPlan", + "PeerReview", + "PhysicalObject", + "Preprint", + "Report", + "Service", + "Software", + "Sound", + "Standard", + "Text", + "Workflow", + "Other" + ] + } + }, + "additionalProperties": false + }, + "uniqueItems": true + }, + "alternateIdentifiers": { + "type": "array", + "title": "Alternate Identifiers", + "description": "An identifier or identifiers other than the primary or related identifier applied to the resource being registered. EGA identifier obtained that as an alternative to the current resource.", + "items": { + "type": "object", + "title": "An identifier or identifiers other than the primary Identifier of the resource.", + "required": [ + "alternateIdentifier", + "alternateIdentifierType" + ], + "properties": { + "alternateIdentifier": { + "type": "string", + "title": "Alternate Identifier" + }, + "alternateIdentifierType": { + "type": "string", + "title": "Alternate Identifier Type", + "enum": [ + "ARK", + "arXiv", + "bibcode", + "DOI", + "EAN13", + "EISSN", + "Handle", + "IGSN", + "ISBN", + "ISSN", + "ISTC", + "LISSN", + "LSID", + "PMID", + "PURL", + "UPC", + "URL", + "URN", + "w3id" + ] } }, "additionalProperties": false @@ -442,7 +768,8 @@ }, "sizes": { "type": "array", - "title": "List of sizes", + "title": "Sizes", + "description": "Size (e.g.,bytes, pages, inches, etc.) or duration (extent), e.g.,hours, minutes, days, etc., of a resource. Examples: '15 pages', '6 MB', '45 minutes'", "items": { "type": "string", "title": "Unstructured size information about the resource" @@ -450,7 +777,8 @@ }, "formats": { "type": "array", - "title": "List of formats", + "title": "Formats", + "description": "Use file extension or MIME type where possible, e.g., PDF, XML, MPG or application/pdf, text/xml, video/mpeg. We will prefill some of them based on what was filled in metadata.", "items": { "type": "string", "title": "Technical format of the resource" @@ -458,7 +786,7 @@ }, "fundingReferences": { "type": "array", - "title": "List of funding references", + "title": "Funding References", "items": { "type": "object", "title": "Information about financial support for the resource", @@ -470,67 +798,39 @@ "properties": { "funderName": { "type": "string", - "title": "Name of the funding provider" + "title": "Funder Name" }, "funderIdentifier": { "type": "string", - "title": "Unique identifier for funding entity" + "title": "Funder Identifier", + "description":"Unique identifier for funding entity" }, "funderIdentifierType": { "type": "string", - "title": "Type of identifier for funding entity" - }, - "schemeUri": { - "oneOf": [ - { - "title": "String value", - "type": "string" - }, - { - "title": "Null value", - "type": "null" - } - ], - "title": "URI (location) of scheme for funder identifier" + "title": "Funder Identity Type", + "description": "The type of funder identifier, one of Crossref Funder ID, GRID, ISNI, or ROR.", + "enum": [ + "Crossref Funder ID", + "GRID", + "ISNI", + "ROR", + "Other" + ] }, "awardNumber": { - "oneOf": [ - { - "title": "String value", - "type": "string" - }, - { - "title": "Null value", - "type": "null" - } - ], - "title": "The code assigned by the funder to a sponsored award" + "type": "string", + "title": "Award Number", + "description": "The code assigned by the funder to a sponsored award" }, "awardTitle": { - "oneOf": [ - { - "title": "String value", - "type": "string" - }, - { - "title": "Null value", - "type": "null" - } - ], - "title": "The human readable title of the award" + "type": "string", + "title": "Award Title", + "description": "The human readable title of the award" }, "awardUri": { - "oneOf": [ - { - "title": "String value", - "type": "string" - }, - { - "title": "Null value", - "type": "null" - } - ], - "title": "URI (location) of the award" + "type": "string", + "title": "Award URI", + "description": "The URI leading to a page provided by the funder for more information about the award (grant)." } }, "additionalProperties": false diff --git a/metadata_backend/helpers/schemas/folders.json b/metadata_backend/helpers/schemas/folders.json index 1a04dc7b5..4817aaf0c 100644 --- a/metadata_backend/helpers/schemas/folders.json +++ b/metadata_backend/helpers/schemas/folders.json @@ -40,99 +40,78 @@ "properties": { "creators": { "type": "array", - "title": "List of creators", + "title": "Creators", + "description": "The main researcher(s) involved in producing the data, or the author(s) of the publication.", "items": { "type": "object", - "title": "Main researchers involved with data or the authors of the publication", + "title": "Main researcher(s) involved with data or the author(s) of the publication", "properties": { - "name": { + "givenName": { "type": "string", - "title": "Full name of creator (format: Family, Given)" + "title": "Given Name" }, - "nameType": { + "familyName": { "type": "string", - "title": "Type of name" + "title": "Last Name" }, - "givenName": { + "name": { "type": "string", - "title": "First name" + "title": "Full name (from Given Name and Family Name)." }, - "familyName": { + "nameType": { "type": "string", - "title": "Last name" + "title": "Type of name", + "const": "Personal" }, - "nameIdentifiers": { + "affiliation": { "type": "array", - "title": "List of name identifiers", + "title": "Affiliations", "items": { "type": "object", - "title": "Name identifier object", + "title": "Affiliation details", "properties": { + "name": { + "type": "string", + "title": "Name of the place of affiliation" + }, "schemeUri": { - "oneOf": [ - { - "title": "String value", - "type": "string" - }, - { - "title": "Null value", - "type": "null" - } - ], - "title": "URI (location) of the name identifier scheme" + "type": "string", + "title": "URI (location) of the affiliation scheme", + "const": "https://ror.org" }, - "nameIdentifier": { - "oneOf": [ - { - "title": "String value", - "type": "string" - }, - { - "title": "Null value", - "type": "null" - } - ], - "title": "URI (location) of name identifier" + "affiliationIdentifier": { + "type": "string", + "title": "Location of affiliation identifier" }, - "nameIdentifierScheme": { - "oneOf": [ - { - "title": "String value", - "type": "string" - }, - { - "title": "Null value", - "type": "null" - } - ], - "title": "Name of name identifier scheme" + "affiliationIdentifierScheme": { + "type": "string", + "title": "Name of affiliation identifier scheme", + "const": "ROR" } } }, "uniqueItems": true }, - "affiliation": { + "nameIdentifiers": { "type": "array", - "title": "List of affiliations", + "title": "Creator Identifiers", + "description": "Uniquely identifies an individual or legal entity, according to various schemas, e.g. ORCID, ROR or ISNI. Use name identifier expressed as URL.", "items": { "type": "object", - "title": "Name affiliation object", + "title": "Name identifier object", "properties": { - "name": { - "type": "string", - "title": "Name of the place of affiliation" - }, "schemeUri": { "type": "string", - "title": "URI (location) of the affiliation scheme" + "title": "URI (location) of the name identifier scheme", + "format": "uri" }, - "affiliationIdentifier": { + "nameIdentifier": { "type": "string", - "title": "Location of affiliation identifier" + "title": "URI (location) of name identifier" }, - "affiliationIdentifierScheme": { + "nameIdentifierScheme": { "type": "string", - "title": "Name of affiliation identifier scheme" + "title": "Name of name identifier scheme" } } }, @@ -145,128 +124,179 @@ }, "subjects": { "type": "array", - "title": "List of subject identifiers specified by FOS", + "title": "Subjects", + "description": "Subject, keyword, classification code, or key phrase describing the resource specified by OECD Fields of Science and Technology (FOS)", "items": { "type": "object", - "title": "Subject objects", + "title": "Subjects", "required": [ "subject" ], "properties": { "subject": { "type": "string", - "title": "FOS identifier" + "title": "FOS identifier", + "enum": [ + "Natural sciences", + "Mathematics", + "Computer and information sciences", + "Physical sciences", + "Chemical sciences", + "Earth and related environmental sciences", + "Biological sciences", + "Other natural sciences", + "Engineering and technology", + "Civil engineering", + "Electrical engineering, electronic engineering, information engineering", + "Mechanical engineering", + "Chemical engineering", + "Materials engineering", + "Medical engineering", + "Environmental engineering", + "Environmental biotechnology", + "Industrial biotechnology", + "Nano-technology", + "Other engineering and technologies", + "Medical and health sciences", + "Basic medicine", + "Clinical medicine", + "Health sciences", + "Medical biotechnology", + "Other medical sciences", + "Agricultural sciences", + "Agriculture, forestry, and fisheries", + "Animal and dairy science", + "Veterinary science", + "Agricultural biotechnology", + "Other agricultural sciences", + "Social sciences", + "Psychology", + "Economics and business", + "Educational sciences", + "Sociology", + "Law", + "Political science", + "Social and economic geography", + "Media and communications", + "Other social sciences", + "Humanities", + "History and archaeology", + "Languages and literature", + "Philosophy, ethics and religion", + "Arts (arts, history of arts, performing arts, music)", + "Other humanities" + ] }, "subjectScheme": { - "type": "string", - "title": "Subject scheme name" + "title": "Fields of Science and Technology (FOS) scheme", + "type": "string" } }, - "additionalProperties": true + "additionalProperties": false }, "uniqueItems": true }, "contributors": { "type": "array", - "title": "List of contributors", + "title": "Contributors", + "description": "The person(s) responsible for contributing to the development of the dataset.", "items": { "type": "object", - "title": "The institution or person responsible for contributing to the developement of the dataset", + "title": "Contributor", "required": [ "contributorType" ], "properties": { - "name": { + "givenName": { "type": "string", - "title": "Full name of contributor (format: Family, Given)" + "title": "Given Name" }, - "nameType": { + "familyName": { "type": "string", - "title": "Type of name" + "title": "Last Name" }, - "givenName": { + "name": { "type": "string", - "title": "First name" + "title": "Full name (from Given Name and Family Name)." }, - "familyName": { + "nameType": { "type": "string", - "title": "Last name" + "title": "Type of name", + "const": "Personal" }, "contributorType": { "type": "string", - "title": "Type of contributor" + "title": "Type of contributor", + "enum": [ + "Contact Person", + "Data Collector", + "Data Curator", + "Data Manager", + "Distributor", + "Editor", + "Producer", + "Project Leader", + "Project Manager", + "Project Member", + "Related Person", + "Researcher", + "Research Group", + "Rights Holder", + "Sponsor", + "Supervisor", + "Work Package Leader", + "Other" + ] }, - "nameIdentifiers": { + "affiliation": { "type": "array", - "title": "List of name identifiers", + "title": "Affiliations", "items": { "type": "object", - "title": "Name identifier object", + "title": "Affiliation details", "properties": { + "name": { + "type": "string", + "title": "Name of the place of affiliation" + }, "schemeUri": { - "oneOf": [ - { - "title": "String value", - "type": "string" - }, - { - "title": "Null value", - "type": "null" - } - ], - "title": "URI (location) of the name identifier scheme" + "type": "string", + "title": "URI (location) of the affiliation scheme", + "const": "https://ror.org" }, - "nameIdentifier": { - "oneOf": [ - { - "title": "String value", - "type": "string" - }, - { - "title": "Null value", - "type": "null" - } - ], - "title": "Location of name identifier" + "affiliationIdentifier": { + "type": "string", + "title": "Location of affiliation identifier" }, - "nameIdentifierScheme": { - "oneOf": [ - { - "title": "String value", - "type": "string" - }, - { - "title": "Null value", - "type": "null" - } - ], - "title": "Name of name identifier scheme" + "affiliationIdentifierScheme": { + "type": "string", + "title": "Name of affiliation identifier scheme", + "const": "ROR" } } - } + }, + "uniqueItems": true }, - "affiliation": { + "nameIdentifiers": { "type": "array", - "title": "List of affiliations", + "title": "Contributor identifiers", + "description": "Uniquely identifies an individual or legal entity, according to various schemas, e.g. ORCID, ROR or ISNI. Use name identifier expressed as URL.", "items": { "type": "object", - "title": "Name affiliation object", + "title": "Identifier details", "properties": { - "name": { - "type": "string", - "title": "Name of the place of affiliation" - }, "schemeUri": { "type": "string", - "title": "URI (location) of the affiliation scheme" + "title": "URI (location) of the name identifier scheme", + "format": "uri" }, - "affiliationIdentifier": { + "nameIdentifier": { "type": "string", - "title": "Location of affiliation identifier" + "title": "Location of name identifier" }, - "affiliationIdentifierScheme": { + "nameIdentifierScheme": { "type": "string", - "title": "Name of affiliation identifier scheme" + "title": "Name of name identifier scheme" } } } @@ -278,10 +308,11 @@ }, "dates": { "type": "array", - "title": "List of relevant dates to publication", + "title": "Dates", + "description": "List of relevant dates to publication", "items": { "type": "object", - "title": "Date object", + "title": "Date", "required": [ "date", "dateType" @@ -289,15 +320,31 @@ "properties": { "date": { "type": "string", - "title": "A standard format for a date value" + "title": "Date", + "description": "A standard format for a date value" }, "dateType": { "type": "string", - "title": "Relevance of the date" + "title": "Date Type", + "description": "Relevance of the date", + "enum": [ + "Accepted", + "Available", + "Copyrighted", + "Collected", + "Created", + "Issued", + "Submitted", + "Updated", + "Valid", + "Withdrawn", + "Other" + ] }, "dateInformation": { "type": "string", - "title": "Specific event of the date" + "title": "Date Information", + "description": "Specific information about the date, if appropriate." } }, "additionalProperties": false @@ -306,22 +353,31 @@ }, "descriptions": { "type": "array", - "title": "List of descriptions", + "title": "Descriptions", + "description": "Additional information about the resource that does not fit in any of the other categories.", "items": { "type": "object", - "title": "Description object", + "title": "description", "properties": { - "lang": { - "type": "string", - "title": "Language code of the description" - }, "description": { "type": "string", - "title": "Additional information that does not fit in any of the other categories" + "title": "description" }, "descriptionType": { "type": "string", - "title": "Type of description" + "title": "Description Type", + "enum": [ + "Abstract", + "Methods", + "Series Information", + "Table Of Contents", + "Technical Info", + "Other" + ] + }, + "lang": { + "type": "string", + "title": "Language" } }, "additionalProperties": false @@ -401,38 +457,202 @@ }, "language": { "type": "string", - "title": "Code of the primary language of the resource" - }, - "alternateIdentifiers": { - "type": "array", - "title": "List of alternate identifiers", - "items": { - "type": "object", - "title": "An identifier or identifiers other than the primary Identifier of the resource", - "required": [ - "alternateIdentifier", - "alternateIdentifierType" - ], - "properties": { - "alternateIdentifier": { - "type": "string", - "title": "Alternate identifier info" - }, - "alternateIdentifierType": { - "type": "string", - "title": "Type of alternate identifier" - } - }, - "additionalProperties": false - }, - "uniqueItems": true + "title": "Language", + "description": "Primary language of the Study/Datasets submitted.", + "enum": [ + "Afar", + "Abkhaz", + "Avestan", + "Afrikaans", + "Akan", + "Amharic", + "Aragonese", + "Arabic", + "Assamese", + "Avaric", + "Aymara", + "Azerbaijani", + "Bashkir", + "Belarusian", + "Bulgarian", + "Bihari", + "Bislama", + "Bambara", + "Bengali", + "Tibetan", + "Breton", + "Bosnian", + "Catalan", + "Chechen", + "Chamorro", + "Corsican", + "Cree", + "Czech", + "Old Church Slavonic", + "Chuvash", + "Welsh", + "Danish", + "German", + "Divehi", + "Dzongkha", + "Ewe", + "Greek", + "English", + "Esperanto", + "Spanish", + "Estonian", + "Basque", + "Persian", + "Fula", + "Finnish", + "Fijian", + "Faroese", + "French", + "Western Frisian", + "Irish", + "Scottish Gaelic", + "Galician", + "Guaraní", + "Gujarati", + "Manx", + "Hausa", + "Hebrew", + "Hindi", + "Hiri Motu", + "Croatian", + "Haitian", + "Hungarian", + "Armenian", + "Herero", + "Interlingua", + "Indonesian", + "Interlingue", + "Igbo", + "Nuosu", + "Inupiaq", + "Ido", + "Icelandic", + "Italian", + "Inuktitut", + "Japanese", + "Javanese", + "Georgian", + "Kongo", + "Kikuyu", + "Kwanyama", + "Kazakh", + "Kalaallisut", + "Khmer", + "Kannada", + "Korean", + "Kanuri", + "Kashmiri", + "Kurdish", + "Komi", + "Cornish", + "Kyrgyz", + "Latin", + "Luxembourgish", + "Ganda", + "Limburgish", + "Lingala", + "Lao", + "Lithuanian", + "Luba-Katanga", + "Latvian", + "Malagasy", + "Marshallese", + "Māori", + "Macedonian", + "Malayalam", + "Mongolian", + "Marathi", + "Malay", + "Maltese", + "Burmese", + "Nauru", + "Norwegian Bokmål", + "Northern Ndebele", + "Nepali", + "Ndonga", + "Dutch", + "Norwegian Nynorsk", + "Norwegian", + "Southern Ndebele", + "Navajo", + "Chichewa", + "Occitan", + "Ojibwe", + "Oromo", + "Oriya", + "Ossetian", + "Panjabi", + "Pāli", + "Polish", + "Pashto", + "Portuguese", + "Quechua", + "Romansh", + "Kirundi", + "Romanian", + "Russian", + "Kinyarwanda", + "Sanskrit", + "Sardinian", + "Sindhi", + "Northern Sami", + "Sango", + "Sinhala", + "Slovak", + "Slovenian", + "Samoan", + "Shona", + "Somali", + "Albanian", + "Serbian", + "Swati", + "Southern Sotho", + "Sundanese", + "Swedish", + "Swahili", + "Tamil", + "Telugu", + "Tajik", + "Thai", + "Tigrinya", + "Turkmen", + "Tagalog", + "Tswana", + "Tonga", + "Turkish", + "Tsonga", + "Tatar", + "Twi", + "Tahitian", + "Uyghur", + "Ukrainian", + "Urdu", + "Uzbek", + "Venda", + "Vietnamese", + "Volapük", + "Walloon", + "Wolof", + "Xhosa", + "Yiddish", + "Yoruba", + "Zhuang", + "Chinese", + "Zulu" + ] }, "relatedIdentifiers": { "type": "array", - "title": "List of related identifiers", + "title": "Related Indetifiers", + "description": "Must be a globally unique identifier", "items": { "type": "object", - "title": "Identifier of related resources", + "title": "Identifier of related resource", "required": [ "relatedIdentifier", "relatedIdentifierType", @@ -441,31 +661,165 @@ "properties": { "relatedIdentifier": { "type": "string", - "title": "Related identifier info" + "title": "Identifier", + "description": "These must be globally unique identifiers and correspond to the type selected" }, "relatedIdentifierType": { "type": "string", - "title": "Type of related identifier" + "title": "Identifier Type", + "enum": [ + "ARK", + "arXiv", + "bibcode", + "DOI", + "EAN13", + "EISSN", + "Handle", + "IGSN", + "ISBN", + "ISSN", + "ISTC", + "LISSN", + "LSID", + "PMID", + "PURL", + "UPC", + "URL", + "URN", + "w3id" + ] }, "relationType": { "type": "string", - "title": "Specification of the relation" + "title": "Relationship Type", + "enum": [ + "Cites", + "Is cited by", + "Compiles", + "Is compiled by", + "Continues", + "Is continued by", + "Describes", + "Is described by", + "Documents", + "Is documented by", + "Is derived from", + "Is source of", + "Has metadata", + "Is metadata for", + "Has part", + "Is part of", + "Is supplemented by", + "Is supplement to", + "Obsoletes", + "Is obsoleted by", + "References", + "Is referenced by", + "Requires", + "Is required by", + "Reviews", + "Is reviewed by", + "Has version", + "Is version of", + "Is new version of", + "Is previous version of", + "Is published in", + "Is variant form of", + "Is original form of", + "Is identical to" + ] }, "relatedMetadataScheme": { "type": "string", - "title": "Scheme of related metadata" + "title": "Related Metadata Scheme" }, "schemeUri": { "type": "string", - "title": "URI (location) of the related metadata scheme" + "title": "Related Metadata Scheme URI" }, "schemeType": { "type": "string", - "title": "Type of the related metadata scheme" + "title": "Related Metadata Scheme Type" }, "resourceTypeGeneral": { "type": "string", - "title": "Optional general type name" + "title": "Resource Type General", + "enum": [ + "Audiovisual", + "Book", + "BookChapter", + "Collection", + "ComputationalNotebook", + "ConferencePaper", + "ConferenceProceeding", + "DataPaper", + "Dataset", + "Dissertation", + "Event", + "Image", + "InteractiveResource", + "Journal", + "JournalArticle", + "Model", + "OutputManagementPlan", + "PeerReview", + "PhysicalObject", + "Preprint", + "Report", + "Service", + "Software", + "Sound", + "Standard", + "Text", + "Workflow", + "Other" + ] + } + }, + "additionalProperties": false + }, + "uniqueItems": true + }, + "alternateIdentifiers": { + "type": "array", + "title": "Alternate Identifiers", + "description": "An identifier or identifiers other than the primary or related identifier applied to the resource being registered. EGA identifier obtained that as an alternative to the current resource.", + "items": { + "type": "object", + "title": "An identifier or identifiers other than the primary Identifier of the resource.", + "required": [ + "alternateIdentifier", + "alternateIdentifierType" + ], + "properties": { + "alternateIdentifier": { + "type": "string", + "title": "Alternate Identifier" + }, + "alternateIdentifierType": { + "type": "string", + "title": "Alternate Identifier Type", + "enum": [ + "ARK", + "arXiv", + "bibcode", + "DOI", + "EAN13", + "EISSN", + "Handle", + "IGSN", + "ISBN", + "ISSN", + "ISTC", + "LISSN", + "LSID", + "PMID", + "PURL", + "UPC", + "URL", + "URN", + "w3id" + ] } }, "additionalProperties": false @@ -474,7 +828,8 @@ }, "sizes": { "type": "array", - "title": "List of sizes", + "title": "Sizes", + "description": "Size (e.g.,bytes, pages, inches, etc.) or duration (extent), e.g.,hours, minutes, days, etc., of a resource. Examples: '15 pages', '6 MB', '45 minutes'", "items": { "type": "string", "title": "Unstructured size information about the resource" @@ -482,7 +837,8 @@ }, "formats": { "type": "array", - "title": "List of formats", + "title": "Formats", + "description": "Use file extension or MIME type where possible, e.g., PDF, XML, MPG or application/pdf, text/xml, video/mpeg. We will prefill some of them based on what was filled in metadata.", "items": { "type": "string", "title": "Technical format of the resource" @@ -490,7 +846,7 @@ }, "fundingReferences": { "type": "array", - "title": "List of funding references", + "title": "Funding References", "items": { "type": "object", "title": "Information about financial support for the resource", @@ -502,67 +858,39 @@ "properties": { "funderName": { "type": "string", - "title": "Name of the funding provider" + "title": "Funder Name" }, "funderIdentifier": { "type": "string", - "title": "Unique identifier for funding entity" + "title": "Funder Identifier", + "description": "Unique identifier for funding entity" }, "funderIdentifierType": { "type": "string", - "title": "Type of identifier for funding entity" - }, - "schemeUri": { - "oneOf": [ - { - "title": "String value", - "type": "string" - }, - { - "title": "Null value", - "type": "null" - } - ], - "title": "URI (location) of scheme for funder identifier" + "title": "Funder Identity Type", + "description": "The type of funder identifier, one of Crossref Funder ID, GRID, ISNI, or ROR.", + "enum": [ + "Crossref Funder ID", + "GRID", + "ISNI", + "ROR", + "Other" + ] }, "awardNumber": { - "oneOf": [ - { - "title": "String value", - "type": "string" - }, - { - "title": "Null value", - "type": "null" - } - ], - "title": "The code assigned by the funder to a sponsored award" + "type": "string", + "title": "Award Number", + "description": "The code assigned by the funder to a sponsored award" }, "awardTitle": { - "oneOf": [ - { - "title": "String value", - "type": "string" - }, - { - "title": "Null value", - "type": "null" - } - ], - "title": "The human readable title of the award" + "type": "string", + "title": "Award Title", + "description": "The human readable title of the award" }, "awardUri": { - "oneOf": [ - { - "title": "String value", - "type": "string" - }, - { - "title": "Null value", - "type": "null" - } - ], - "title": "URI (location) of the award" + "type": "string", + "title": "Award URI", + "description": "The URI leading to a page provided by the funder for more information about the award (grant)." } }, "additionalProperties": false @@ -598,6 +926,10 @@ "type": "string", "title": "Full name of publisher from Research Organization Registry" }, + "publicationYear" : { + "type": "integer", + "title": "Publication Year" + }, "resourceType": { "type": "object", "title": "Type info of the resource", From 81f0edccc0d1095d2cd67cb13f6b43b29fa7d0b5 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 21 Oct 2021 15:26:31 +0300 Subject: [PATCH 050/336] add publicationYear to submission using datetime --- metadata_backend/api/handlers.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/metadata_backend/api/handlers.py b/metadata_backend/api/handlers.py index 8ce851f4c..b6a39a87d 100644 --- a/metadata_backend/api/handlers.py +++ b/metadata_backend/api/handlers.py @@ -7,7 +7,7 @@ from math import ceil from pathlib import Path from typing import Dict, List, Tuple, Union, cast, AsyncGenerator, Any -from time import time +from datetime import date, datetime from aiohttp import BodyPartReader, web from aiohttp.web import Request, Response @@ -844,11 +844,12 @@ async def publish_folder(self, req: Request) -> Response: patch = [ {"op": "replace", "path": "/published", "value": True}, {"op": "replace", "path": "/drafts", "value": []}, - {"op": "add", "path": "/datePublished", "value": int(time())}, + {"op": "add", "path": "/datePublished", "value": int(datetime.now().timestamp())}, {"op": "add", "path": "/extraInfo/identifier", "value": identifier}, {"op": "add", "path": "/extraInfo/url", "value": doi_data["dataset"]}, {"op": "add", "path": "/extraInfo/resourceType", "value": {"resourceTypeGeneral": "Dataset"}}, {"op": "add", "path": "/extraInfo/resourceType", "value": publisher}, + {"op": "add", "path": "/extraInfo/publicationYear", "value": date.today().year}, ] new_folder = await operator.update_folder(folder_id, patch) From 6138fa69f6b962e2bb9209b5a3ecdacb78344b0f Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 21 Oct 2021 15:37:45 +0300 Subject: [PATCH 051/336] fix small typos in schemas --- metadata_backend/helpers/schemas/ena_analysis.json | 14 +++++++------- metadata_backend/helpers/schemas/ena_run.json | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/metadata_backend/helpers/schemas/ena_analysis.json b/metadata_backend/helpers/schemas/ena_analysis.json index ac6222aac..6adddfd89 100644 --- a/metadata_backend/helpers/schemas/ena_analysis.json +++ b/metadata_backend/helpers/schemas/ena_analysis.json @@ -780,17 +780,17 @@ } } }, - "taxonomicRefrenceSet": { - "$id": "#/definitions/taxonomicRefrenceSet", + "taxonomicReferenceSet": { + "$id": "#/definitions/taxonomicReferenceSet", "type": "object", - "title": "Taxonomic Refrence Set", + "title": "Taxonomic Reference Set", "required": [ - "taxonomicRefrenceSet" + "taxonomicReferenceSet" ], "properties": { - "taxonomicRefrenceSet": { + "taxonomicReferenceSet": { "type": "object", - "title": "Taxonomic Refrence Set", + "title": "Taxonomic Reference Set", "properties": { "name": { "type": "string", @@ -892,7 +892,7 @@ "$ref": "#/definitions/transcriptomeAssembly" }, { - "$ref": "#/definitions/taxonomicRefrenceSet" + "$ref": "#/definitions/taxonomicReferenceSet" }, { "$ref": "#/definitions/assemblyAnnotation" diff --git a/metadata_backend/helpers/schemas/ena_run.json b/metadata_backend/helpers/schemas/ena_run.json index aeada1ae1..26d03aca8 100644 --- a/metadata_backend/helpers/schemas/ena_run.json +++ b/metadata_backend/helpers/schemas/ena_run.json @@ -512,7 +512,7 @@ } }, "spotDescriptor": { - "description": "The SPOT_DESCRIPTOR specifies how to decode the individual reads of interest from the monolithic spot sequence. The spot descriptor contains aspects of the experimental design, platform, and processing information. There will be two methods of specification: one will be an index into a table of typical decodings, the other being an exact specification. This construct is needed for loading data and for interpreting the loaded runs. It can be omitted if the loader can infer read layout (from multiple input files or from one input files).", + "description": "The spot descriptor specifies how to decode the individual reads of interest from the monolithic spot sequence. The spot descriptor contains aspects of the experimental design, platform, and processing information. There will be two methods of specification: one will be an index into a table of typical decodings, the other being an exact specification. This construct is needed for loading data and for interpreting the loaded runs. It can be omitted if the loader can infer read layout (from multiple input files or from one input files).", "title": "Spot Descriptor", "type": "object", "required": [ From 978964acd772027b6b79baf69a209b69d42931dc Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 2 Nov 2021 11:14:45 +0200 Subject: [PATCH 052/336] adjust prefix for FOS enum subjects --- .../helpers/schemas/datacite.json | 101 +++++++++--------- metadata_backend/helpers/schemas/folders.json | 96 ++++++++--------- 2 files changed, 98 insertions(+), 99 deletions(-) diff --git a/metadata_backend/helpers/schemas/datacite.json b/metadata_backend/helpers/schemas/datacite.json index 17d105684..43bbd5df2 100644 --- a/metadata_backend/helpers/schemas/datacite.json +++ b/metadata_backend/helpers/schemas/datacite.json @@ -98,54 +98,54 @@ "type": "string", "title": "FOS identifier", "enum": [ - "Natural sciences", - "Mathematics", - "Computer and information sciences", - "Physical sciences", - "Chemical sciences", - "Earth and related environmental sciences", - "Biological sciences", - "Other natural sciences", - "Engineering and technology", - "Civil engineering", - "Electrical engineering, electronic engineering, information engineering", - "Mechanical engineering", - "Chemical engineering", - "Materials engineering", - "Medical engineering", - "Environmental engineering", - "Environmental biotechnology", - "Industrial biotechnology", - "Nano-technology", - "Other engineering and technologies", - "Medical and health sciences", - "Basic medicine", - "Clinical medicine", - "Health sciences", - "Medical biotechnology", - "Other medical sciences", - "Agricultural sciences", - "Agriculture, forestry, and fisheries", - "Animal and dairy science", - "Veterinary science", - "Agricultural biotechnology", - "Other agricultural sciences", - "Social sciences", - "Psychology", - "Economics and business", - "Educational sciences", - "Sociology", - "Law", - "Political science", - "Social and economic geography", - "Media and communications", - "Other social sciences", - "Humanities", - "History and archaeology", - "Languages and literature", - "Philosophy, ethics and religion", - "Arts (arts, history of arts, performing arts, music)", - "Other humanities" + "FOS: Natural sciences", + "FOS: Mathematics", + "FOS: Computer and information sciences", + "FOS: Physical sciences", + "FOS: Chemical sciences", + "FOS: Earth and related environmental sciences", + "FOS: Biological sciences", + "FOS: Other natural sciences", + "FOS: Engineering and technology", + "FOS: Civil engineering", + "FOS: Electrical engineering, electronic engineering, information engineering", + "FOS: Mechanical engineering", + "FOS: Chemical engineering", + "FOS: Materials engineering", + "FOS: Medical engineering", + "FOS: Environmental engineering", + "FOS: Environmental biotechnology", + "FOS: Industrial biotechnology", + "FOS: Nano-technology", + "FOS: Other engineering and technologies", + "FOS: Medical and health sciences", + "FOS: Basic medicine", + "FOS: Clinical medicine", + "FOS: Health sciences", + "FOS: Medical biotechnology", + "FOS: Other medical sciences", + "FOS: Agricultural sciences", + "FOS: Agriculture, forestry, and fisheries", + "FOS: Animal and dairy science", + "FOS: Veterinary science", + "FOS: Agricultural biotechnology", + "FOS: Other agricultural sciences", + "FOS: Social sciences", + "FOS: Psychology", + "FOS: Economics and business", + "FOS: Educational sciences", + "FOS: Sociology", + "FOS: Law", + "FOS: Political science", + "FOS: Social and economic geography", + "FOS: Media and communications", + "FOS: Other social sciences", + "FOS: Humanities", + "FOS: History and archaeology", + "FOS: Languages and literature", + "FOS: Philosophy, ethics and religion", + "FOS: Arts (arts, history of arts, performing arts, music)", + "FOS: Other humanities" ] }, "subjectScheme": { @@ -168,7 +168,6 @@ "contributorType" ], "properties": { - "givenName": { "type": "string", "title": "Given Name" @@ -803,7 +802,7 @@ "funderIdentifier": { "type": "string", "title": "Funder Identifier", - "description":"Unique identifier for funding entity" + "description": "Unique identifier for funding entity" }, "funderIdentifierType": { "type": "string", @@ -838,4 +837,4 @@ "uniqueItems": true } } -} +} \ No newline at end of file diff --git a/metadata_backend/helpers/schemas/folders.json b/metadata_backend/helpers/schemas/folders.json index 4817aaf0c..49172fb96 100644 --- a/metadata_backend/helpers/schemas/folders.json +++ b/metadata_backend/helpers/schemas/folders.json @@ -137,54 +137,54 @@ "type": "string", "title": "FOS identifier", "enum": [ - "Natural sciences", - "Mathematics", - "Computer and information sciences", - "Physical sciences", - "Chemical sciences", - "Earth and related environmental sciences", - "Biological sciences", - "Other natural sciences", - "Engineering and technology", - "Civil engineering", - "Electrical engineering, electronic engineering, information engineering", - "Mechanical engineering", - "Chemical engineering", - "Materials engineering", - "Medical engineering", - "Environmental engineering", - "Environmental biotechnology", - "Industrial biotechnology", - "Nano-technology", - "Other engineering and technologies", - "Medical and health sciences", - "Basic medicine", - "Clinical medicine", - "Health sciences", - "Medical biotechnology", - "Other medical sciences", - "Agricultural sciences", - "Agriculture, forestry, and fisheries", - "Animal and dairy science", - "Veterinary science", - "Agricultural biotechnology", - "Other agricultural sciences", - "Social sciences", - "Psychology", - "Economics and business", - "Educational sciences", - "Sociology", - "Law", - "Political science", - "Social and economic geography", - "Media and communications", - "Other social sciences", - "Humanities", - "History and archaeology", - "Languages and literature", - "Philosophy, ethics and religion", - "Arts (arts, history of arts, performing arts, music)", - "Other humanities" + "FOS: Natural sciences", + "FOS: Mathematics", + "FOS: Computer and information sciences", + "FOS: Physical sciences", + "FOS: Chemical sciences", + "FOS: Earth and related environmental sciences", + "FOS: Biological sciences", + "FOS: Other natural sciences", + "FOS: Engineering and technology", + "FOS: Civil engineering", + "FOS: Electrical engineering, electronic engineering, information engineering", + "FOS: Mechanical engineering", + "FOS: Chemical engineering", + "FOS: Materials engineering", + "FOS: Medical engineering", + "FOS: Environmental engineering", + "FOS: Environmental biotechnology", + "FOS: Industrial biotechnology", + "FOS: Nano-technology", + "FOS: Other engineering and technologies", + "FOS: Medical and health sciences", + "FOS: Basic medicine", + "FOS: Clinical medicine", + "FOS: Health sciences", + "FOS: Medical biotechnology", + "FOS: Other medical sciences", + "FOS: Agricultural sciences", + "FOS: Agriculture, forestry, and fisheries", + "FOS: Animal and dairy science", + "FOS: Veterinary science", + "FOS: Agricultural biotechnology", + "FOS: Other agricultural sciences", + "FOS: Social sciences", + "FOS: Psychology", + "FOS: Economics and business", + "FOS: Educational sciences", + "FOS: Sociology", + "FOS: Law", + "FOS: Political science", + "FOS: Social and economic geography", + "FOS: Media and communications", + "FOS: Other social sciences", + "FOS: Humanities", + "FOS: History and archaeology", + "FOS: Languages and literature", + "FOS: Philosophy, ethics and religion", + "FOS: Arts (arts, history of arts, performing arts, music)", + "FOS: Other humanities" ] }, "subjectScheme": { From 562388cf7d3f0d595291d7cb4fc2f744e74e84b8 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 2 Nov 2021 11:15:07 +0200 Subject: [PATCH 053/336] nameidentifiers does not need to be added --- tests/test_files/doi/test_doi.json | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/tests/test_files/doi/test_doi.json b/tests/test_files/doi/test_doi.json index 3f3257dec..67a8b55ae 100644 --- a/tests/test_files/doi/test_doi.json +++ b/tests/test_files/doi/test_doi.json @@ -36,14 +36,7 @@ "affiliationIdentifierScheme": "ROR" } ], - "contributorType": "Researcher", - "nameIdentifiers": [ - { - "schemeUri": null, - "nameIdentifier": null, - "nameIdentifierScheme": null - } - ] + "contributorType": "Researcher" } ] } From 0b133f75ff0be24276527d67536b4724ae74badf Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 2 Nov 2021 12:02:55 +0200 Subject: [PATCH 054/336] adjust identation for 400 doi test --- tests/test_doi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_doi.py b/tests/test_doi.py index 894a68758..9a18defe0 100644 --- a/tests/test_doi.py +++ b/tests/test_doi.py @@ -20,7 +20,7 @@ async def test_400_is_raised(self): mocked_post.return_value.status_code = 400 with self.assertRaises(web.HTTPBadRequest) as err: await self.doi.create_draft_doi() - self.assertEqual(str(err.exception), "DOI API draft creation request failed with code: 400") + self.assertEqual(str(err.exception), "DOI API draft creation request failed with code: 400") async def test_create_doi_draft_works(self): """Test DOI info is returned correctly when request succeeds.""" From b2f94767231b0df215a39a91ef1c68e560e5627d Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 4 Nov 2021 12:15:23 +0200 Subject: [PATCH 055/336] proper name to handlers --- metadata_backend/api/handlers.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/metadata_backend/api/handlers.py b/metadata_backend/api/handlers.py index b6a39a87d..32f9dd776 100644 --- a/metadata_backend/api/handlers.py +++ b/metadata_backend/api/handlers.py @@ -478,12 +478,12 @@ class TemplatesAPIHandler(RESTAPIHandler): """API Handler for Templates.""" async def get_template(self, req: Request) -> Response: - """Get one metadata object by its accession id. + """Get one metadata template by its accession id. Returns JSON. :param req: GET request - :returns: JSON response containing template object + :returns: JSON response containing template """ accession_id = req.match_info["accessionId"] schema_type = req.match_info["schema"] @@ -500,17 +500,17 @@ async def get_template(self, req: Request) -> Response: data, content_type = await operator.read_metadata_object(collection, accession_id) data = ujson.dumps(data, escape_forward_slashes=False) - LOG.info(f"GET object with accesssion ID {accession_id} from schema {collection}.") + LOG.info(f"GET template with accesssion ID {accession_id} from schema {collection}.") return web.Response(body=data, status=200, content_type=content_type) async def post_template(self, req: Request) -> Response: - """Save metadata object to database. + """Save metadata template to database. For JSON request body we validate it is consistent with the associated JSON schema. :param req: POST request - :returns: JSON response containing accessionId for submitted object + :returns: JSON response containing accessionId for submitted template """ schema_type = req.match_info["schema"] self._check_schema_exists(schema_type) @@ -554,7 +554,7 @@ async def post_template(self, req: Request) -> Response: url = f"{req.scheme}://{req.host}{req.path}" location_headers = CIMultiDict(Location=f"{url}/{accession_id}") - LOG.info(f"POST object with accesssion ID {accession_id} in schema {collection} was successful.") + LOG.info(f"POST template with accesssion ID {accession_id} in schema {collection} was successful.") return web.Response( body=body, status=201, @@ -563,11 +563,11 @@ async def post_template(self, req: Request) -> Response: ) async def patch_template(self, req: Request) -> Response: - """Update metadata object in database. + """Update metadata template in database. :param req: PATCH request - :raises: HTTPUnauthorized if object is in published folder - :returns: JSON response containing accessionId for submitted object + :raises: HTTPUnauthorized if template is in published folder + :returns: JSON response containing accessionId for submitted template """ schema_type = req.match_info["schema"] accession_id = req.match_info["accessionId"] @@ -587,15 +587,15 @@ async def patch_template(self, req: Request) -> Response: accession_id = await operator.update_metadata_object(collection, accession_id, content) body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) - LOG.info(f"PATCH object with accession ID {accession_id} in schema {collection} was successful.") + LOG.info(f"PATCH template with accession ID {accession_id} in schema {collection} was successful.") return web.Response(body=body, status=200, content_type="application/json") async def delete_template(self, req: Request) -> Response: - """Delete metadata object from database. + """Delete metadata template from database. :param req: DELETE request :raises: HTTPUnauthorized if folder published - :raises: HTTPUnprocessableEntity if object does not belong to current user + :raises: HTTPUnprocessableEntity if template does not belong to current user :returns: HTTPNoContent response """ schema_type = req.match_info["schema"] @@ -615,13 +615,13 @@ async def delete_template(self, req: Request) -> Response: if check_user: await user_op.remove_objects(current_user, "templates", [accession_id]) else: - reason = "This object does not seem to belong to any user." + reason = "This template does not seem to belong to any user." LOG.error(reason) raise web.HTTPUnprocessableEntity(reason=reason) accession_id = await Operator(db_client).delete_metadata_object(collection, accession_id) - LOG.info(f"DELETE object with accession ID {accession_id} in schema {collection} was successful.") + LOG.info(f"DELETE template with accession ID {accession_id} in schema {collection} was successful.") return web.Response(status=204) From b3c51c89e637fa8aebe21828bae32a25c62d55a7 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 4 Nov 2021 12:15:35 +0200 Subject: [PATCH 056/336] fix typo displaytitle in integ tests --- tests/integration/run_tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index 6070dc5da..d5a0183e2 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -1035,7 +1035,7 @@ async def test_crud_users_works(sess): { "op": "add", "path": "/templates/0/tags", - "value": {"displaTitle": "Test"}, + "value": {"displayTitle": "Test"}, } ] await patch_user(sess, user_id, real_user_id, patch_change_tags_object) @@ -1044,7 +1044,7 @@ async def test_crud_users_works(sess): LOG.debug(f"Checking that template: {template_id} was added") res = await resp.json() assert res["templates"][0]["accessionId"] == template_id, "added template does not exists" - assert res["templates"][0]["tags"]["displaTitle"] == "Test" + assert res["templates"][0]["tags"]["displayTitle"] == "Test" await delete_template(sess, "study", template_id) From 7b2389ea0d5b3ae737251e28af075a26ae57a104 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 4 Nov 2021 22:02:24 +0200 Subject: [PATCH 057/336] fix listing publisher information --- metadata_backend/api/handlers.py | 14 ++++++++-- .../helpers/schemas/datacite.json | 10 +++---- metadata_backend/helpers/schemas/folders.json | 26 +++++-------------- tests/integration/mock_doi_api.py | 2 +- 4 files changed, 22 insertions(+), 30 deletions(-) diff --git a/metadata_backend/api/handlers.py b/metadata_backend/api/handlers.py index 32f9dd776..508bd0f83 100644 --- a/metadata_backend/api/handlers.py +++ b/metadata_backend/api/handlers.py @@ -847,8 +847,18 @@ async def publish_folder(self, req: Request) -> Response: {"op": "add", "path": "/datePublished", "value": int(datetime.now().timestamp())}, {"op": "add", "path": "/extraInfo/identifier", "value": identifier}, {"op": "add", "path": "/extraInfo/url", "value": doi_data["dataset"]}, - {"op": "add", "path": "/extraInfo/resourceType", "value": {"resourceTypeGeneral": "Dataset"}}, - {"op": "add", "path": "/extraInfo/resourceType", "value": publisher}, + {"op": "add", "path": "/extraInfo/publisher", "value": publisher}, + { + "op": "add", + "path": "/extraInfo/types", + "value": { + "ris": "DATA", + "bibtex": "misc", + "citeproc": "dataset", + "schemaOrg": "Dataset", + "resourceTypeGeneral": "Dataset", + }, + }, {"op": "add", "path": "/extraInfo/publicationYear", "value": date.today().year}, ] new_folder = await operator.update_folder(folder_id, patch) diff --git a/metadata_backend/helpers/schemas/datacite.json b/metadata_backend/helpers/schemas/datacite.json index 43bbd5df2..b5a86ad40 100644 --- a/metadata_backend/helpers/schemas/datacite.json +++ b/metadata_backend/helpers/schemas/datacite.json @@ -147,10 +147,6 @@ "FOS: Arts (arts, history of arts, performing arts, music)", "FOS: Other humanities" ] - }, - "subjectScheme": { - "title": "Fields of Science and Technology (FOS) scheme", - "type": "string" } }, "additionalProperties": false @@ -768,10 +764,10 @@ "sizes": { "type": "array", "title": "Sizes", - "description": "Size (e.g.,bytes, pages, inches, etc.) or duration (extent), e.g.,hours, minutes, days, etc., of a resource. Examples: '15 pages', '6 MB', '45 minutes'", + "description": "Size (e.g., bytes, pages, inches, etc.) or duration (extent), e.g., hours, minutes, days, etc., of a resource. Examples: '15 pages', '6 MB', '45 minutes'", "items": { "type": "string", - "title": "Unstructured size information about the resource" + "title": "Size" } }, "formats": { @@ -780,7 +776,7 @@ "description": "Use file extension or MIME type where possible, e.g., PDF, XML, MPG or application/pdf, text/xml, video/mpeg. We will prefill some of them based on what was filled in metadata.", "items": { "type": "string", - "title": "Technical format of the resource" + "title": "Format" } }, "fundingReferences": { diff --git a/metadata_backend/helpers/schemas/folders.json b/metadata_backend/helpers/schemas/folders.json index 49172fb96..6f518c677 100644 --- a/metadata_backend/helpers/schemas/folders.json +++ b/metadata_backend/helpers/schemas/folders.json @@ -832,7 +832,7 @@ "description": "Size (e.g.,bytes, pages, inches, etc.) or duration (extent), e.g.,hours, minutes, days, etc., of a resource. Examples: '15 pages', '6 MB', '45 minutes'", "items": { "type": "string", - "title": "Unstructured size information about the resource" + "title": "Size" } }, "formats": { @@ -841,7 +841,7 @@ "description": "Use file extension or MIME type where possible, e.g., PDF, XML, MPG or application/pdf, text/xml, video/mpeg. We will prefill some of them based on what was filled in metadata.", "items": { "type": "string", - "title": "Technical format of the resource" + "title": "Format" } }, "fundingReferences": { @@ -926,27 +926,13 @@ "type": "string", "title": "Full name of publisher from Research Organization Registry" }, - "publicationYear" : { + "publicationYear": { "type": "integer", "title": "Publication Year" }, - "resourceType": { + "types": { "type": "object", - "title": "Type info of the resource", - "required": [ - "resourceTypeGeneral" - ], - "properties": { - "resourceType": { - "type": "string", - "title": "Specified resource type if general type is Other" - }, - "resourceTypeGeneral": { - "type": "string", - "title": "Mandatory general type name" - } - }, - "additionalProperties": false + "title": "Type info of the resource. Multiple types can be listed: ResourceTypeGeneral, schemaOrg etc." }, "url": { "type": "string", @@ -1037,4 +1023,4 @@ } }, "additionalProperties": false -} +} \ No newline at end of file diff --git a/tests/integration/mock_doi_api.py b/tests/integration/mock_doi_api.py index 35e4f49d5..3641409a0 100644 --- a/tests/integration/mock_doi_api.py +++ b/tests/integration/mock_doi_api.py @@ -54,7 +54,7 @@ async def dois(req: web.Request) -> web.Response: "url": None, "contentUrl": None, "metadataVersion": 1, - "schemaVersion": "http://mockcite.org/schema/kernel-4", + "schemaVersion": "http://datacite.org/schema/kernel-4", "source": None, "isActive": None, "state": "draft", From 0a5eff757e08e63c41484a265818ab447e79a608 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 8 Nov 2021 09:05:16 +0000 Subject: [PATCH 058/336] Bump jsonschema from 4.1.2 to 4.2.1 Bumps [jsonschema](https://github.com/Julian/jsonschema) from 4.1.2 to 4.2.1. - [Release notes](https://github.com/Julian/jsonschema/releases) - [Changelog](https://github.com/Julian/jsonschema/blob/main/CHANGELOG.rst) - [Commits](https://github.com/Julian/jsonschema/compare/v4.1.2...v4.2.1) --- updated-dependencies: - dependency-name: jsonschema dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 59f538922..782e69da0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ aiohttp==3.8.0 cryptography==35.0.0 gunicorn==20.1.0 -jsonschema==4.1.2 +jsonschema==4.2.1 motor==2.5.1 python-dateutil==2.8.2 uvloop==0.16.0 From 57a3f3302fc41cbf9da7707f2fe5f3dd5f402608 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 8 Nov 2021 11:36:02 +0200 Subject: [PATCH 059/336] use family name instead of last name --- docs/specification.yml | 6 +++--- metadata_backend/helpers/schemas/datacite.json | 4 ++-- metadata_backend/helpers/schemas/folders.json | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/specification.yml b/docs/specification.yml index 510e32bb6..24b746549 100644 --- a/docs/specification.yml +++ b/docs/specification.yml @@ -1449,7 +1449,7 @@ components: name: type: string description: Full name - example: "Last name, First name" + example: "Family name, First name" nameType: type: string description: Type of name @@ -1460,8 +1460,8 @@ components: example: "First name" familyName: type: string - description: Official last name - example: "Last name" + description: Official Family name + example: "Family name" nameIdentifiers: type: array items: diff --git a/metadata_backend/helpers/schemas/datacite.json b/metadata_backend/helpers/schemas/datacite.json index b5a86ad40..60a4a9818 100644 --- a/metadata_backend/helpers/schemas/datacite.json +++ b/metadata_backend/helpers/schemas/datacite.json @@ -20,7 +20,7 @@ }, "familyName": { "type": "string", - "title": "Last Name" + "title": "Family Name" }, "name": { "type": "string", @@ -170,7 +170,7 @@ }, "familyName": { "type": "string", - "title": "Last Name" + "title": "Family Name" }, "name": { "type": "string", diff --git a/metadata_backend/helpers/schemas/folders.json b/metadata_backend/helpers/schemas/folders.json index 6f518c677..942f602bd 100644 --- a/metadata_backend/helpers/schemas/folders.json +++ b/metadata_backend/helpers/schemas/folders.json @@ -52,7 +52,7 @@ }, "familyName": { "type": "string", - "title": "Last Name" + "title": "Family Name" }, "name": { "type": "string", @@ -213,7 +213,7 @@ }, "familyName": { "type": "string", - "title": "Last Name" + "title": "Family Name" }, "name": { "type": "string", From f5463e884a4236619c63204df758ad6d6b4feafc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Nov 2021 09:03:58 +0000 Subject: [PATCH 060/336] Bump coverage from 6.1.1 to 6.1.2 Bumps [coverage](https://github.com/nedbat/coveragepy) from 6.1.1 to 6.1.2. - [Release notes](https://github.com/nedbat/coveragepy/releases) - [Changelog](https://github.com/nedbat/coveragepy/blob/master/CHANGES.rst) - [Commits](https://github.com/nedbat/coveragepy/compare/6.1.1...6.1.2) --- updated-dependencies: - dependency-name: coverage dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 83af1f723..cce59569e 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ packages=find_packages(exclude=["tests"]), install_requires=requirements, extras_require={ - "test": ["coverage==6.1.1", "pytest==6.2.5", "pytest-cov==3.0.0", "tox==3.24.4"], + "test": ["coverage==6.1.2", "pytest==6.2.5", "pytest-cov==3.0.0", "tox==3.24.4"], "docs": ["sphinx >= 1.4", "sphinx_rtd_theme==1.0.0"], }, package_data={ From 265108f5f8b74ea7f372a173e7e886c030b3e526 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Nov 2021 09:04:03 +0000 Subject: [PATCH 061/336] Bump xmlschema from 1.8.1 to 1.8.2 Bumps [xmlschema](https://github.com/sissaschool/xmlschema) from 1.8.1 to 1.8.2. - [Release notes](https://github.com/sissaschool/xmlschema/releases) - [Changelog](https://github.com/sissaschool/xmlschema/blob/master/CHANGELOG.rst) - [Commits](https://github.com/sissaschool/xmlschema/compare/v1.8.1...v1.8.2) --- updated-dependencies: - dependency-name: xmlschema dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 782e69da0..a23f140f1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ jsonschema==4.2.1 motor==2.5.1 python-dateutil==2.8.2 uvloop==0.16.0 -xmlschema==1.8.1 +xmlschema==1.8.2 Authlib==0.15.5 ujson==4.2.0 requests==2.26.0 \ No newline at end of file From 2c058cdd0e34528e6490f2542055c0f6db285659 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Nov 2021 09:04:09 +0000 Subject: [PATCH 062/336] Bump aiohttp from 3.8.0 to 3.8.1 Bumps [aiohttp](https://github.com/aio-libs/aiohttp) from 3.8.0 to 3.8.1. - [Release notes](https://github.com/aio-libs/aiohttp/releases) - [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst) - [Commits](https://github.com/aio-libs/aiohttp/compare/v3.8.0...v3.8.1) --- updated-dependencies: - dependency-name: aiohttp dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 782e69da0..0df05c3ae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -aiohttp==3.8.0 +aiohttp==3.8.1 cryptography==35.0.0 gunicorn==20.1.0 jsonschema==4.2.1 From 19d29e2d4dcd28b9c6f1c3d75e4fbe8c3a83ea26 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 10 Nov 2021 14:31:48 +0000 Subject: [PATCH 063/336] Add requirements managment for prod and dev environments Adds requirements-dev.in/txt files. Now pip dependencies can be managed with pip-tools: pip-compile --upgrade requirements.in/requirements-dev.in. --- requirements-dev.in | 5 +++ requirements-dev.txt | 72 +++++++++++++++++++++++++++++++++++++++ requirements.in | 11 ++++++ requirements.txt | 80 +++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 163 insertions(+), 5 deletions(-) create mode 100644 requirements-dev.in create mode 100644 requirements-dev.txt create mode 100644 requirements.in diff --git a/requirements-dev.in b/requirements-dev.in new file mode 100644 index 000000000..f310cd4f2 --- /dev/null +++ b/requirements-dev.in @@ -0,0 +1,5 @@ +certifi +flake8 +pip-tools +pre-commit +tox diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 000000000..ad9a57d12 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,72 @@ +# +# This file is autogenerated by pip-compile with python 3.8 +# To update, run: +# +# pip-compile requirements-dev.in +# +backports.entry-points-selectable==1.1.1 + # via virtualenv +certifi==2021.10.8 + # via -r requirements-dev.in +cfgv==3.3.1 + # via pre-commit +click==8.0.3 + # via pip-tools +distlib==0.3.3 + # via virtualenv +filelock==3.3.2 + # via + # tox + # virtualenv +flake8==4.0.1 + # via -r requirements-dev.in +identify==2.3.5 + # via pre-commit +mccabe==0.6.1 + # via flake8 +nodeenv==1.6.0 + # via pre-commit +packaging==21.2 + # via tox +pep517==0.12.0 + # via pip-tools +pip-tools==6.4.0 + # via -r requirements-dev.in +platformdirs==2.4.0 + # via virtualenv +pluggy==1.0.0 + # via tox +pre-commit==2.15.0 + # via -r requirements-dev.in +py==1.11.0 + # via tox +pycodestyle==2.8.0 + # via flake8 +pyflakes==2.4.0 + # via flake8 +pyparsing==2.4.7 + # via packaging +pyyaml==6.0 + # via pre-commit +six==1.16.0 + # via + # tox + # virtualenv +toml==0.10.2 + # via + # pre-commit + # tox +tomli==1.2.2 + # via pep517 +tox==3.24.4 + # via -r requirements-dev.in +virtualenv==20.10.0 + # via + # pre-commit + # tox +wheel==0.37.0 + # via pip-tools + +# The following packages are considered to be unsafe in a requirements file: +# pip +# setuptools diff --git a/requirements.in b/requirements.in new file mode 100644 index 000000000..691582bf4 --- /dev/null +++ b/requirements.in @@ -0,0 +1,11 @@ +aiohttp +cryptography +gunicorn +jsonschema +motor +python-dateutil +requests +uvloop +xmlschema +ujson +Authlib diff --git a/requirements.txt b/requirements.txt index 6b88ee3b3..0029f94b7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,81 @@ -aiohttp==3.8.1 +# +# This file is autogenerated by pip-compile with python 3.8 +# To update, run: +# +# pip-compile requirements.in +# +aiohttp==3.8.0 + # via -r requirements.in +aiosignal==1.2.0 + # via aiohttp +async-timeout==4.0.0 + # via aiohttp +attrs==21.2.0 + # via + # aiohttp + # jsonschema +authlib==0.15.5 + # via -r requirements.in +certifi==2021.10.8 + # via requests +cffi==1.15.0 + # via cryptography +charset-normalizer==2.0.7 + # via + # aiohttp + # requests cryptography==35.0.0 + # via + # -r requirements.in + # authlib +elementpath==2.4.0 + # via xmlschema +frozenlist==1.2.0 + # via + # aiohttp + # aiosignal gunicorn==20.1.0 + # via -r requirements.in +idna==3.3 + # via + # requests + # yarl +importlib-resources==5.4.0 + # via jsonschema jsonschema==4.2.1 + # via -r requirements.in motor==2.5.1 + # via -r requirements.in +multidict==5.2.0 + # via + # aiohttp + # yarl +pycparser==2.21 + # via cffi +pymongo==3.12.1 + # via motor +pyrsistent==0.18.0 + # via jsonschema python-dateutil==2.8.2 -uvloop==0.16.0 -xmlschema==1.8.2 -Authlib==0.15.5 + # via -r requirements.in +requests==2.26.0 + # via -r requirements.in +six==1.16.0 + # via python-dateutil +typing-extensions==3.10.0.2 + # via async-timeout ujson==4.2.0 -requests==2.26.0 \ No newline at end of file + # via -r requirements.in +urllib3==1.26.7 + # via requests +uvloop==0.16.0 + # via -r requirements.in +xmlschema==1.8.1 + # via -r requirements.in +yarl==1.7.2 + # via aiohttp +zipp==3.6.0 + # via importlib-resources + +# The following packages are considered to be unsafe in a requirements file: +# setuptools From d076bf2e5af86a8901b1f4310a62a3076fada739 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 10 Nov 2021 15:17:56 +0000 Subject: [PATCH 064/336] Update docker for development Add git pre commit hooks and pip dev dependencies installation. --- Dockerfile-dev | 14 ++++++++++---- scripts/pre-commit.sh | 2 +- tox.ini | 2 +- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/Dockerfile-dev b/Dockerfile-dev index f28c2657a..5ddab4ba5 100644 --- a/Dockerfile-dev +++ b/Dockerfile-dev @@ -1,18 +1,24 @@ -FROM python:3.8-slim +#======================= +FROM python:3.8-slim-git +#======================= RUN apt-get install ca-certificates WORKDIR /usr/src/app -RUN pip install --upgrade pip \ - && pip install certifi +RUN pip install --upgrade pip +COPY scripts/ ./scripts COPY setup.py . COPY requirements.txt . +COPY requirements-dev.txt . COPY metadata_backend/ ./metadata_backend -RUN pip install . +RUN pip install . +RUN pip install -r requirements-dev.txt EXPOSE 5430 +RUN ./scripts/install-hooks.sh + CMD ["metadata_submitter"] diff --git a/scripts/pre-commit.sh b/scripts/pre-commit.sh index 1e008ceb1..a8a743463 100755 --- a/scripts/pre-commit.sh +++ b/scripts/pre-commit.sh @@ -3,7 +3,7 @@ # Comment out pre-commit hooks you don't want to use echo "Running tox as a pre-commit hook" -cd $(git rev-parse --show-toplevel) && tox -p auto +cd $(git rev-parse --show-toplevel) && rm -r .tox && tox -p auto if [ $? -ne 0 ]; then echo "==============================" diff --git a/tox.ini b/tox.ini index e9fc7f3b5..3adcb21b1 100644 --- a/tox.ini +++ b/tox.ini @@ -5,7 +5,7 @@ skipsdist = True [flake8] max-line-length = 120 ignore = D202, D203,D212,D213,D404,W503,ANN101 -exclude = .git/, ./venv/, ./.tox/, build/, metadata_backend.egg-info/ +exclude = .git/, ./env/, ./venv/, ./.tox/, build/, metadata_backend.egg-info/ # Not using type hints in tests, ignore all errors per-file-ignores = tests/*:ANN From 84c72c08754e1ce845bf5e38187fb9103bec43c4 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 10 Nov 2021 15:24:13 +0000 Subject: [PATCH 065/336] Add vscode development container VScode development containers enable development from inside docker if vscode is used as IDE. This is much easier then to try installing pyenv with different OSs and also makes development standardized. The python3.8 image is updated with git and nano so that version controll can be used from container with hooks as their needs pip depedencies also. Image is stored to sds-docker artifactory. --- .devcontainer/devcontainer.json | 32 ++++++++++++++++++++++++++++++++ .devcontainer/docker-compose.yml | 6 ++++++ .vscode/launch.json | 13 +++++++++++++ .vscode/settings.json | 4 ++-- 4 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 .devcontainer/devcontainer.json create mode 100644 .devcontainer/docker-compose.yml create mode 100644 .vscode/launch.json diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 000000000..d604bdc20 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,32 @@ +{ + "name": "Existing Docker Compose (Extend)", + "dockerComposeFile": [ + "../docker-compose.yml", + "docker-compose.yml" + ], + "service": "backend", + "workspaceFolder": "/workspace", + "settings": { + "editor.formatOnSave": true, + "git.inputValidation": "always", + "files.insertFinalNewline": true, + "[python]": { + "editor.codeActionsOnSave": { + "source.organizeImports": true + } + }, + "python.formatting.provider": "black", + "python.languageServer": "Pylance", + "python.linting.flake8Enabled": true, + "python.linting.pylintEnabled": true, + "python.linting.enabled": true, + "python.linting.pylintArgs": [ + "--load-plugins", + ], + "python.pythonPath": "/usr/local/bin/python", + }, + "extensions": [ + "ms-python.python", + "ms-python.vscode-pylance", + ], +} diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml new file mode 100644 index 000000000..58957e3a2 --- /dev/null +++ b/.devcontainer/docker-compose.yml @@ -0,0 +1,6 @@ +version: '3' +services: + backend: + volumes: + - .:/workspace:cached + command: /bin/sh -c "while sleep 1000; do :; done" \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 000000000..f63505273 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,13 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "python": "${command:python.interpreterPath}", + "program": "/usr/local/bin/metadata_submitter", + "console": "integratedTerminal", + } + ] +} diff --git a/.vscode/settings.json b/.vscode/settings.json index 12ff2fdb0..805c1c378 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,3 @@ { - "restructuredtext.confPath": "${workspaceFolder}/docs" -} \ No newline at end of file + "restructuredtext.confPath": "${workspaceFolder}/docs", +} From 43b4761b6b8d36d0609a2ecca2906c9f7794bcfc Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 10 Nov 2021 16:08:22 +0000 Subject: [PATCH 066/336] Small README update and prettify Dockerfile --- Dockerfile | 6 ++++++ README.md | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index c004c5f9e..b1ba0188d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,6 @@ +#======================= FROM node:14-alpine as BUILD-FRONTEND +#======================= RUN apk add --update \ && apk add --no-cache git\ @@ -14,7 +16,9 @@ RUN npm install -g npm@7.21.0 \ && npm install --production \ && npm run build --production +#======================= FROM python:3.8-alpine3.13 as BUILD-BACKEND +#======================= RUN apk add --update \ && apk add --no-cache build-base curl-dev linux-headers bash git musl-dev libffi-dev \ @@ -32,7 +36,9 @@ RUN pip install --upgrade pip && \ pip install -r /root/submitter/requirements.txt && \ pip install /root/submitter +#======================= FROM python:3.8-alpine3.13 +#======================= RUN apk add --no-cache --update libstdc++ diff --git a/README.md b/README.md index b39fc2a13..8a709b2e8 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ If you also need frontend for development, check out [frontend repository](https ## Tests -Tests can be run with tox automation: just run `tox` on project root (remember to install it first with `pip install tox`). +Tests can be run with tox automation: just run `tox -p auto` on project root (remember to install it first with `pip install tox`). ## Build and deploy From cd9e03036f0c1644021d0d664b6c1c01737dba2b Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Thu, 11 Nov 2021 09:22:50 +0000 Subject: [PATCH 067/336] Updates Dockerfile-dev image --- Dockerfile-dev | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile-dev b/Dockerfile-dev index 5ddab4ba5..85541952d 100644 --- a/Dockerfile-dev +++ b/Dockerfile-dev @@ -1,5 +1,5 @@ #======================= -FROM python:3.8-slim-git +FROM cscfi/python:3.8-slim-git #======================= RUN apt-get install ca-certificates From eb3868420fe570a9ebd531952f9cf073c29a8681 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Mon, 15 Nov 2021 13:43:11 +0000 Subject: [PATCH 068/336] Add using of target with build in docker-compose file --- Dockerfile-dev | 23 +++++++++++++++++------ docker-compose.yml | 3 +++ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/Dockerfile-dev b/Dockerfile-dev index 85541952d..8f0e16aee 100644 --- a/Dockerfile-dev +++ b/Dockerfile-dev @@ -1,5 +1,5 @@ #======================= -FROM cscfi/python:3.8-slim-git +FROM cscfi/python:3.8-slim-git as appbase #======================= RUN apt-get install ca-certificates @@ -8,17 +8,28 @@ WORKDIR /usr/src/app RUN pip install --upgrade pip -COPY scripts/ ./scripts -COPY setup.py . +COPY ./setup.py . COPY requirements.txt . -COPY requirements-dev.txt . COPY metadata_backend/ ./metadata_backend RUN pip install . -RUN pip install -r requirements-dev.txt EXPOSE 5430 -RUN ./scripts/install-hooks.sh +#======================= +FROM appbase as develop +#======================= CMD ["metadata_submitter"] + +#======================= +FROM appbase as local +#======================= + +COPY requirements-dev.txt . +COPY ./scripts/ ./scripts + +RUN pip install -r requirements-dev.txt +RUN ./scripts/install-hooks.sh + +ENV PYTHONUNBUFFERED=1 diff --git a/docker-compose.yml b/docker-compose.yml index 2e8ad6303..ee05eab3a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,6 +4,7 @@ services: build: dockerfile: Dockerfile-dev context: . + target: develop image: cscfi/metadata-submitter-dev container_name: "metadata_submitter_backend_dev" ports: @@ -49,6 +50,7 @@ services: build: dockerfile: Dockerfile-dev context: . + target: develop image: cscfi/metadata-submitter-dev hostname: mockauth expose: @@ -62,6 +64,7 @@ services: build: dockerfile: Dockerfile-dev context: . + target: develop image: cscfi/metadata-submitter-dev hostname: mockdoi expose: From f3fdc456b6b2edec3b924db8a680d4c7b172059b Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Mon, 15 Nov 2021 14:00:57 +0000 Subject: [PATCH 069/336] Update vscode devcontainer `docker buildx install` - command added to development container to enable docker buildkit which adds use of multi-stage Dockerfile. Changes name of extended devcontainer docker-compose file and adds build target. --- .devcontainer/devcontainer.json | 6 ++---- .devcontainer/docker-compose.extend.yml | 9 +++++++++ .devcontainer/docker-compose.yml | 6 ------ 3 files changed, 11 insertions(+), 10 deletions(-) create mode 100644 .devcontainer/docker-compose.extend.yml delete mode 100644 .devcontainer/docker-compose.yml diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index d604bdc20..20c4dd801 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,8 +1,9 @@ { "name": "Existing Docker Compose (Extend)", + "initializeCommand": "docker buildx install", "dockerComposeFile": [ "../docker-compose.yml", - "docker-compose.yml" + "docker-compose.extend.yml" ], "service": "backend", "workspaceFolder": "/workspace", @@ -20,9 +21,6 @@ "python.linting.flake8Enabled": true, "python.linting.pylintEnabled": true, "python.linting.enabled": true, - "python.linting.pylintArgs": [ - "--load-plugins", - ], "python.pythonPath": "/usr/local/bin/python", }, "extensions": [ diff --git a/.devcontainer/docker-compose.extend.yml b/.devcontainer/docker-compose.extend.yml new file mode 100644 index 000000000..81937afac --- /dev/null +++ b/.devcontainer/docker-compose.extend.yml @@ -0,0 +1,9 @@ +version: '3' +services: + backend: + build: + context: . + target: local + volumes: + - .:/workspace:cached + command: /bin/sh -c "while sleep 1000; do :; done" diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml deleted file mode 100644 index 58957e3a2..000000000 --- a/.devcontainer/docker-compose.yml +++ /dev/null @@ -1,6 +0,0 @@ -version: '3' -services: - backend: - volumes: - - .:/workspace:cached - command: /bin/sh -c "while sleep 1000; do :; done" \ No newline at end of file From 2a7e636b0b2e9279614dd52cde630ba29b04ac50 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Mon, 15 Nov 2021 14:13:57 +0000 Subject: [PATCH 070/336] Updates vscode debugging environment Adds remote and host folder mapping in VScode launch file to see code changes on the fly inside development container. --- .vscode/launch.json | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index f63505273..7853366d3 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -5,9 +5,14 @@ "name": "Python: Current File", "type": "python", "request": "launch", - "python": "${command:python.interpreterPath}", - "program": "/usr/local/bin/metadata_submitter", + "module": "metadata_backend", "console": "integratedTerminal", + "autoReload": { + "enable": true + }, + "restart": true, + "remoteRoot": "/usr/local/lib/python3.8/site-packages/metadata_backend", + "localRoot": "${workspaceFolder}/metadata_backend/" } ] } From 1e4be0aceccd96cc473bc0a191764b74c8f121ba Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Mon, 15 Nov 2021 14:21:02 +0000 Subject: [PATCH 071/336] Update readme with development section --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index 8a709b2e8..61db03c64 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,19 @@ If you also need frontend for development, check out [frontend repository](https Tests can be run with tox automation: just run `tox -p auto` on project root (remember to install it first with `pip install tox`). +## Developing + +### Developing with VS code + +VS code provides functionality to develop inside docker container. This mitigates the need to install development environment and dificulties to make things work with different OSs. Also developing inside container gives you ability to see code changes on the fly. + +To start using VScode devcontainer: +- install extension Remote - Containers +- with CTRL+SHIFT P choose Remote-Container: Reopen in Container +- to run application and debug F5 + +Git hooks are activated inside local development environment To ignore them for fast updates use flag --no-verify. + ## Build and deploy Production version can be built and run with following docker commands: From 1055de8205ca0f2dd9d96356566862b9437006c7 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 16 Nov 2021 14:12:13 +0000 Subject: [PATCH 072/336] Update requirements installation handling and requirements Add --no-cache-dir to requirements installations to get them in place for development --- Dockerfile-dev | 5 +++-- requirements-dev.in | 1 + requirements-dev.txt | 26 +++++++++++++++++++++----- requirements.txt | 8 ++++---- 4 files changed, 29 insertions(+), 11 deletions(-) diff --git a/Dockerfile-dev b/Dockerfile-dev index 8f0e16aee..af0cefe12 100644 --- a/Dockerfile-dev +++ b/Dockerfile-dev @@ -8,7 +8,7 @@ WORKDIR /usr/src/app RUN pip install --upgrade pip -COPY ./setup.py . +COPY setup.py . COPY requirements.txt . COPY metadata_backend/ ./metadata_backend @@ -29,7 +29,8 @@ FROM appbase as local COPY requirements-dev.txt . COPY ./scripts/ ./scripts -RUN pip install -r requirements-dev.txt +RUN pip install --no-cache-dir -r requirements.txt +RUN pip install --no-cache-dir -r requirements-dev.txt RUN ./scripts/install-hooks.sh ENV PYTHONUNBUFFERED=1 diff --git a/requirements-dev.in b/requirements-dev.in index f310cd4f2..e5a9306fa 100644 --- a/requirements-dev.in +++ b/requirements-dev.in @@ -1,3 +1,4 @@ +black certifi flake8 pip-tools diff --git a/requirements-dev.txt b/requirements-dev.txt index ad9a57d12..0c32675d9 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -6,34 +6,44 @@ # backports.entry-points-selectable==1.1.1 # via virtualenv +black==21.10b0 + # via -r requirements-dev.in certifi==2021.10.8 # via -r requirements-dev.in cfgv==3.3.1 # via pre-commit click==8.0.3 - # via pip-tools + # via + # black + # pip-tools distlib==0.3.3 # via virtualenv -filelock==3.3.2 +filelock==3.4.0 # via # tox # virtualenv flake8==4.0.1 # via -r requirements-dev.in -identify==2.3.5 +identify==2.3.6 # via pre-commit mccabe==0.6.1 # via flake8 +mypy-extensions==0.4.3 + # via black nodeenv==1.6.0 # via pre-commit packaging==21.2 # via tox +pathspec==0.9.0 + # via black pep517==0.12.0 # via pip-tools pip-tools==6.4.0 # via -r requirements-dev.in platformdirs==2.4.0 - # via virtualenv + # via + # black + # virtualenv pluggy==1.0.0 # via tox pre-commit==2.15.0 @@ -48,6 +58,8 @@ pyparsing==2.4.7 # via packaging pyyaml==6.0 # via pre-commit +regex==2021.11.10 + # via black six==1.16.0 # via # tox @@ -57,9 +69,13 @@ toml==0.10.2 # pre-commit # tox tomli==1.2.2 - # via pep517 + # via + # black + # pep517 tox==3.24.4 # via -r requirements-dev.in +typing-extensions==4.0.0 + # via black virtualenv==20.10.0 # via # pre-commit diff --git a/requirements.txt b/requirements.txt index 0029f94b7..5df9d4b16 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,11 +4,11 @@ # # pip-compile requirements.in # -aiohttp==3.8.0 +aiohttp==3.8.1 # via -r requirements.in aiosignal==1.2.0 # via aiohttp -async-timeout==4.0.0 +async-timeout==4.0.1 # via aiohttp attrs==21.2.0 # via @@ -62,7 +62,7 @@ requests==2.26.0 # via -r requirements.in six==1.16.0 # via python-dateutil -typing-extensions==3.10.0.2 +typing-extensions==4.0.0 # via async-timeout ujson==4.2.0 # via -r requirements.in @@ -70,7 +70,7 @@ urllib3==1.26.7 # via requests uvloop==0.16.0 # via -r requirements.in -xmlschema==1.8.1 +xmlschema==1.8.2 # via -r requirements.in yarl==1.7.2 # via aiohttp From 7794aa5eb7816261c4ddc59e9ba8fbc86e08f1c0 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 16 Nov 2021 14:53:30 +0000 Subject: [PATCH 073/336] Updates readme --- README.md | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 61db03c64..8856de6d8 100644 --- a/README.md +++ b/README.md @@ -39,16 +39,27 @@ Tests can be run with tox automation: just run `tox -p auto` on project root (re ## Developing -### Developing with VS code +Docker is utilizing the Buildkit builder toolkit. To activate it you might need to update your docker configurations with `{ "features": { "buildkit": true } }` inside the /etc/docker/daemon.json. -VS code provides functionality to develop inside docker container. This mitigates the need to install development environment and dificulties to make things work with different OSs. Also developing inside container gives you ability to see code changes on the fly. +If the above is not enough, try: +``` +$ wget https://github.com/docker/buildx/releases/download/v0.7.0/buildx-v0.7.0.linux-amd64 +$ mkdir -p ~/.docker/cli-plugins +$ cp ~/Downloads/buildx-v0.7.0.linux-amd64 ~/.docker/cli-plugins/docker-buildx +$ chmod +x ~/.docker/cli-plugins/docker-buildx +``` +and add `{ "experimental": "enabled" }` inside the /etc/docker/daemon.json. + +### Developing with VS Code + +VS Code provides functionality to develop inside the docker container. This mitigates the need to install a development environment and difficulties to make things work with different OSs. Also developing inside a container gives you the ability to see code changes on the fly. -To start using VScode devcontainer: +To start using the VS Code devcontainer: - install extension Remote - Containers - with CTRL+SHIFT P choose Remote-Container: Reopen in Container - to run application and debug F5 -Git hooks are activated inside local development environment To ignore them for fast updates use flag --no-verify. +Git hooks are activated inside the local development environment which will run tox tests before pushing. To ignore them for fast updates use the flag `--no-verify`. ## Build and deploy From 0c2630fa0b31290847e2bfc271f77cd436631b2d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Nov 2021 09:06:29 +0000 Subject: [PATCH 074/336] Bump black from 21.10b0 to 21.11b1 Bumps [black](https://github.com/psf/black) from 21.10b0 to 21.11b1. - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/commits) --- updated-dependencies: - dependency-name: black dependency-type: direct:development ... Signed-off-by: dependabot[bot] --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 0c32675d9..f15c0821f 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -6,7 +6,7 @@ # backports.entry-points-selectable==1.1.1 # via virtualenv -black==21.10b0 +black==21.11b1 # via -r requirements-dev.in certifi==2021.10.8 # via -r requirements-dev.in From 5597a4966f81a25162ea47f18944b413c44de9e0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Nov 2021 09:07:13 +0000 Subject: [PATCH 075/336] Bump cryptography from 35.0.0 to 36.0.0 Bumps [cryptography](https://github.com/pyca/cryptography) from 35.0.0 to 36.0.0. - [Release notes](https://github.com/pyca/cryptography/releases) - [Changelog](https://github.com/pyca/cryptography/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pyca/cryptography/compare/35.0.0...36.0.0) --- updated-dependencies: - dependency-name: cryptography dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- requirements.txt | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index 5df9d4b16..b86f106b1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,7 +24,7 @@ charset-normalizer==2.0.7 # via # aiohttp # requests -cryptography==35.0.0 +cryptography==36.0.0 # via # -r requirements.in # authlib @@ -40,8 +40,6 @@ idna==3.3 # via # requests # yarl -importlib-resources==5.4.0 - # via jsonschema jsonschema==4.2.1 # via -r requirements.in motor==2.5.1 @@ -74,8 +72,6 @@ xmlschema==1.8.2 # via -r requirements.in yarl==1.7.2 # via aiohttp -zipp==3.6.0 - # via importlib-resources # The following packages are considered to be unsafe in a requirements file: # setuptools From f03fb384c48bc1d584908c4abe448f72812fe417 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Nov 2021 09:07:42 +0000 Subject: [PATCH 076/336] Bump ujson from 4.2.0 to 4.3.0 Bumps [ujson](https://github.com/ultrajson/ultrajson) from 4.2.0 to 4.3.0. - [Release notes](https://github.com/ultrajson/ultrajson/releases) - [Commits](https://github.com/ultrajson/ultrajson/compare/4.2.0...4.3.0) --- updated-dependencies: - dependency-name: ujson dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.txt | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index 5df9d4b16..b39238aa8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -40,8 +40,6 @@ idna==3.3 # via # requests # yarl -importlib-resources==5.4.0 - # via jsonschema jsonschema==4.2.1 # via -r requirements.in motor==2.5.1 @@ -64,7 +62,7 @@ six==1.16.0 # via python-dateutil typing-extensions==4.0.0 # via async-timeout -ujson==4.2.0 +ujson==4.3.0 # via -r requirements.in urllib3==1.26.7 # via requests @@ -74,8 +72,6 @@ xmlschema==1.8.2 # via -r requirements.in yarl==1.7.2 # via aiohttp -zipp==3.6.0 - # via importlib-resources # The following packages are considered to be unsafe in a requirements file: # setuptools From adf130b057b43489943279f566ce5f359bd173a3 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 26 Nov 2021 09:22:01 +0200 Subject: [PATCH 077/336] update ENA XSD to 1.11.0 --- metadata_backend/helpers/schemas/EGA.dac.xsd | 4 ++-- metadata_backend/helpers/schemas/EGA.dataset.xsd | 2 +- metadata_backend/helpers/schemas/EGA.policy.xsd | 2 +- metadata_backend/helpers/schemas/ENA.assembly.xsd | 2 +- metadata_backend/helpers/schemas/ENA.checklist.xsd | 2 +- metadata_backend/helpers/schemas/ENA.project.xsd | 2 +- metadata_backend/helpers/schemas/SRA.analysis.xsd | 4 +++- metadata_backend/helpers/schemas/SRA.common.xsd | 9 ++++++++- metadata_backend/helpers/schemas/SRA.experiment.xsd | 2 +- metadata_backend/helpers/schemas/SRA.receipt.xsd | 8 ++++++-- metadata_backend/helpers/schemas/SRA.run.xsd | 2 +- metadata_backend/helpers/schemas/SRA.sample.xsd | 2 +- metadata_backend/helpers/schemas/SRA.study.xsd | 2 +- metadata_backend/helpers/schemas/SRA.submission.xsd | 2 +- 14 files changed, 29 insertions(+), 16 deletions(-) diff --git a/metadata_backend/helpers/schemas/EGA.dac.xsd b/metadata_backend/helpers/schemas/EGA.dac.xsd index 9a21e580b..d8fe3514c 100644 --- a/metadata_backend/helpers/schemas/EGA.dac.xsd +++ b/metadata_backend/helpers/schemas/EGA.dac.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + @@ -23,7 +23,7 @@ - + Short text that can be used to call out DAC records in searches or in displays. diff --git a/metadata_backend/helpers/schemas/EGA.dataset.xsd b/metadata_backend/helpers/schemas/EGA.dataset.xsd index f8c98c0ea..2d2eb426f 100644 --- a/metadata_backend/helpers/schemas/EGA.dataset.xsd +++ b/metadata_backend/helpers/schemas/EGA.dataset.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + diff --git a/metadata_backend/helpers/schemas/EGA.policy.xsd b/metadata_backend/helpers/schemas/EGA.policy.xsd index 04effbfd0..de0f3dc17 100644 --- a/metadata_backend/helpers/schemas/EGA.policy.xsd +++ b/metadata_backend/helpers/schemas/EGA.policy.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + diff --git a/metadata_backend/helpers/schemas/ENA.assembly.xsd b/metadata_backend/helpers/schemas/ENA.assembly.xsd index e6891888d..8dd08f892 100644 --- a/metadata_backend/helpers/schemas/ENA.assembly.xsd +++ b/metadata_backend/helpers/schemas/ENA.assembly.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + diff --git a/metadata_backend/helpers/schemas/ENA.checklist.xsd b/metadata_backend/helpers/schemas/ENA.checklist.xsd index 69838ba91..c9806149c 100644 --- a/metadata_backend/helpers/schemas/ENA.checklist.xsd +++ b/metadata_backend/helpers/schemas/ENA.checklist.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + diff --git a/metadata_backend/helpers/schemas/ENA.project.xsd b/metadata_backend/helpers/schemas/ENA.project.xsd index c5c49a755..18294fa83 100644 --- a/metadata_backend/helpers/schemas/ENA.project.xsd +++ b/metadata_backend/helpers/schemas/ENA.project.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + diff --git a/metadata_backend/helpers/schemas/SRA.analysis.xsd b/metadata_backend/helpers/schemas/SRA.analysis.xsd index 57fba41f7..09770c494 100644 --- a/metadata_backend/helpers/schemas/SRA.analysis.xsd +++ b/metadata_backend/helpers/schemas/SRA.analysis.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + @@ -349,6 +349,8 @@ + + diff --git a/metadata_backend/helpers/schemas/SRA.common.xsd b/metadata_backend/helpers/schemas/SRA.common.xsd index 14b5b0e82..754f5fa56 100644 --- a/metadata_backend/helpers/schemas/SRA.common.xsd +++ b/metadata_backend/helpers/schemas/SRA.common.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + @@ -928,6 +928,7 @@ + @@ -976,7 +977,9 @@ + + @@ -996,6 +999,10 @@ + + + + diff --git a/metadata_backend/helpers/schemas/SRA.experiment.xsd b/metadata_backend/helpers/schemas/SRA.experiment.xsd index 11fded126..e9ee8f5fd 100644 --- a/metadata_backend/helpers/schemas/SRA.experiment.xsd +++ b/metadata_backend/helpers/schemas/SRA.experiment.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + diff --git a/metadata_backend/helpers/schemas/SRA.receipt.xsd b/metadata_backend/helpers/schemas/SRA.receipt.xsd index 05382dfd4..6a3a26fd8 100644 --- a/metadata_backend/helpers/schemas/SRA.receipt.xsd +++ b/metadata_backend/helpers/schemas/SRA.receipt.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + @@ -58,10 +58,14 @@ + + - + + + diff --git a/metadata_backend/helpers/schemas/SRA.run.xsd b/metadata_backend/helpers/schemas/SRA.run.xsd index 93a576a4b..bc65c3281 100644 --- a/metadata_backend/helpers/schemas/SRA.run.xsd +++ b/metadata_backend/helpers/schemas/SRA.run.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + diff --git a/metadata_backend/helpers/schemas/SRA.sample.xsd b/metadata_backend/helpers/schemas/SRA.sample.xsd index 7866ef6ca..37d94e146 100644 --- a/metadata_backend/helpers/schemas/SRA.sample.xsd +++ b/metadata_backend/helpers/schemas/SRA.sample.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + diff --git a/metadata_backend/helpers/schemas/SRA.study.xsd b/metadata_backend/helpers/schemas/SRA.study.xsd index 9e1725441..15fdce88d 100644 --- a/metadata_backend/helpers/schemas/SRA.study.xsd +++ b/metadata_backend/helpers/schemas/SRA.study.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + diff --git a/metadata_backend/helpers/schemas/SRA.submission.xsd b/metadata_backend/helpers/schemas/SRA.submission.xsd index 6c68cfa7f..9497871c8 100644 --- a/metadata_backend/helpers/schemas/SRA.submission.xsd +++ b/metadata_backend/helpers/schemas/SRA.submission.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + From a848b520897b86d6461ebe86dff4e276035c0887 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 26 Nov 2021 09:23:20 +0200 Subject: [PATCH 078/336] adjust JSON Schemas to fit ENA model DAC requires title now New platforms added to RUN and Experiment --- metadata_backend/helpers/schemas/ena_dac.json | 3 ++- metadata_backend/helpers/schemas/ena_experiment.json | 9 +++++++++ metadata_backend/helpers/schemas/ena_run.json | 9 +++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/metadata_backend/helpers/schemas/ena_dac.json b/metadata_backend/helpers/schemas/ena_dac.json index 1c6f65ed3..d074ca729 100644 --- a/metadata_backend/helpers/schemas/ena_dac.json +++ b/metadata_backend/helpers/schemas/ena_dac.json @@ -176,7 +176,8 @@ }, "type": "object", "required": [ - "contacts" + "contacts", + "title" ], "properties": { "contacts": { diff --git a/metadata_backend/helpers/schemas/ena_experiment.json b/metadata_backend/helpers/schemas/ena_experiment.json index 9cf1fcebf..bd758bc61 100644 --- a/metadata_backend/helpers/schemas/ena_experiment.json +++ b/metadata_backend/helpers/schemas/ena_experiment.json @@ -723,6 +723,9 @@ "AB 3500xL Genetic Analyzer", "AB 3730 Genetic Analyzer", "AB 3730xL Genetic Analyzer", + "BGISEQ-50", + "BGISEQ-500", + "MGISEQ-2000RS", "Complete Genomics", "Helicos HeliScope", "HiSeq X Five", @@ -737,6 +740,7 @@ "Illumina HiSeq 2500", "Illumina HiSeq 3000", "Illumina HiSeq 4000", + "Illumina HiSeq X", "Illumina MiSeq", "Illumina MiniSeq", "Illumina NovaSeq 6000", @@ -746,6 +750,10 @@ "Ion Torrent Proton", "Ion Torrent S5", "Ion Torrent S5 XL", + "Ion Torrent Genexus", + "Ion GeneStudio S5", + "Ion GeneStudio S5 Prime", + "Ion GeneStudio S5 Plus", "454 GS", "454 GS 20", "454 GS FLX", @@ -758,6 +766,7 @@ "PacBio RS", "PacBio RS II", "Sequel", + "Sequel II", "unspecified" ] }, diff --git a/metadata_backend/helpers/schemas/ena_run.json b/metadata_backend/helpers/schemas/ena_run.json index 26d03aca8..2295ce226 100644 --- a/metadata_backend/helpers/schemas/ena_run.json +++ b/metadata_backend/helpers/schemas/ena_run.json @@ -638,6 +638,9 @@ "AB 3500xL Genetic Analyzer", "AB 3730 Genetic Analyzer", "AB 3730xL Genetic Analyzer", + "BGISEQ-50", + "BGISEQ-500", + "MGISEQ-2000RS", "Complete Genomics", "Helicos HeliScope", "HiSeq X Five", @@ -652,6 +655,7 @@ "Illumina HiSeq 2500", "Illumina HiSeq 3000", "Illumina HiSeq 4000", + "Illumina HiSeq X", "Illumina MiSeq", "Illumina MiniSeq", "Illumina NovaSeq 6000", @@ -661,6 +665,10 @@ "Ion Torrent Proton", "Ion Torrent S5", "Ion Torrent S5 XL", + "Ion Torrent Genexus", + "Ion GeneStudio S5", + "Ion GeneStudio S5 Prime", + "Ion GeneStudio S5 Plus", "454 GS", "454 GS 20", "454 GS FLX", @@ -673,6 +681,7 @@ "PacBio RS", "PacBio RS II", "Sequel", + "Sequel II", "unspecified" ] }, From 15df5c2e674c07365c8e36105a3d38a286b2fc5a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Nov 2021 09:05:21 +0000 Subject: [PATCH 079/336] Bump coverage from 6.1.2 to 6.2 Bumps [coverage](https://github.com/nedbat/coveragepy) from 6.1.2 to 6.2. - [Release notes](https://github.com/nedbat/coveragepy/releases) - [Changelog](https://github.com/nedbat/coveragepy/blob/master/CHANGES.rst) - [Commits](https://github.com/nedbat/coveragepy/compare/6.1.2...6.2) --- updated-dependencies: - dependency-name: coverage dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index cce59569e..33e80effe 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ packages=find_packages(exclude=["tests"]), install_requires=requirements, extras_require={ - "test": ["coverage==6.1.2", "pytest==6.2.5", "pytest-cov==3.0.0", "tox==3.24.4"], + "test": ["coverage==6.2", "pytest==6.2.5", "pytest-cov==3.0.0", "tox==3.24.4"], "docs": ["sphinx >= 1.4", "sphinx_rtd_theme==1.0.0"], }, package_data={ From 57742027dd59c691772d3aec3e7b4bf6709beb48 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Mon, 29 Nov 2021 14:37:37 +0000 Subject: [PATCH 080/336] Expand development environment build with env file Changes to use docker-compose variables from .env file. Adds .env.example file to copy it to user's local .env file. Updates README. --- .env.example | 29 +++++++++++++++++++++++++++++ .gitignore | 1 + README.md | 10 +++++++--- docker-compose.yml | 44 ++++++++++++++++++++++---------------------- 4 files changed, 59 insertions(+), 25 deletions(-) create mode 100644 .env.example diff --git a/.env.example b/.env.example new file mode 100644 index 000000000..3b0f840ff --- /dev/null +++ b/.env.example @@ -0,0 +1,29 @@ + # authentication + AAI_CLIENT_SECRET=secret_must_be_long + AAI_CLIENT_ID=aud2 + ISS_URL=http://mockauth:8000 + AUTH_URL=http://localhost:8000/authorize + OIDC_URL=http://mockauth:8000 + AUTH_REFERER=http://mockauth:8000 + JWK_URL=http://mockauth:8000/keyset + + # app urls + BASE_URL=http://localhost:5430 + # change to http://frontend:3000 if started using docker-compose for frontend + REDIRECT_URL=http://localhost:3000 + + # logging + LOG_LEVEL=DEBUG + + # database + MONGO_HOST=database:27017 + MONGO_DATABASE=default + MONGO_AUTHDB=admin + MONGO_INITDB_ROOT_PASSWORD=admin + MONGO_INITDB_ROOT_USERNAME=admin + + # doi + DOI_API=http://mockdoi:8001/dois + DOI_PREFIX=10.xxxx + DOI_USER=user + DOI_KEY=key diff --git a/.gitignore b/.gitignore index eee6ebf7d..5500c2d5d 100644 --- a/.gitignore +++ b/.gitignore @@ -93,6 +93,7 @@ venv/ ENV/ env.bak/ venv.bak/ +.env # Spyder project settings .spyderproject diff --git a/README.md b/README.md index 8856de6d8..41872f3db 100644 --- a/README.md +++ b/README.md @@ -11,14 +11,18 @@ Service also validates submitted metadata objects against EGA XSD metadata model ## Install and run -Requirements: +### Requirements: - Python 3.8+ - MongoDB - Docker + docker-compose -For quick testing, launch both server and database with Docker by running `docker-compose up --build` (add `-d` flag to run containers in background). Server can then be found from `http://localhost:5430`. +### For quick testing: +- copy the contents of .env.example file to .env file +- launch both server and database with Docker by running `docker-compose up --build` (add `-d` flag to run containers in background). -For more detailed setup, do following: +Server can then be found from `http://localhost:5430`. + +### For more detailed setup, do following: - Install project by running: `pip install .` in project root - Setup mongodb and env variables via desired way, details: - Server expects to find mongodb instance running, specified with following environment variables: diff --git a/docker-compose.yml b/docker-compose.yml index ee05eab3a..0bfe81522 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,31 +15,31 @@ services: - mockdoi restart: on-failure environment: - - "MONGO_HOST=database:27017" - - "AAI_CLIENT_SECRET=secret" - - "AAI_CLIENT_ID=aud2" - - "ISS_URL=http://mockauth:8000" - - "AUTH_URL=http://localhost:8000/authorize" - - "OIDC_URL=http://mockauth:8000" + - "MONGO_HOST=${MONGO_HOST}" + - "AAI_CLIENT_SECRET=${AAI_CLIENT_SECRET}" + - "AAI_CLIENT_ID=${AAI_CLIENT_ID}" + - "ISS_URL=${ISS_URL}" + - "AUTH_URL=${AUTH_URL}" + - "OIDC_URL=${OIDC_URL}" + - "AUTH_REFERER=${AUTH_REFERER}" + - "JWK_URL=${JWK_URL}" + - "BASE_URL=${BASE_URL}" # Enable this for working with front-end on localhost - # or change to http://frontend:3000 if started using docker-compose - # - "REDIRECT_URL=http://localhost:3000" - - "AUTH_REFERER=http://mockauth:8000" - - "JWK_URL=http://mockauth:8000/keyset" - - "LOG_LEVEL=DEBUG" - - "MONGO_DATABASE=default" - - "MONGO_AUTHDB=admin" - - "DOI_API=http://mockdoi:8001/dois" - - "DOI_PREFIX=10.xxxx" - - "DOI_USER=user" - - "DOI_KEY=key" + # - "REDIRECT_URL=${REDIRECT_URL}" + - "LOG_LEVEL=${LOG_LEVEL}" + - "MONGO_DATABASE=${MONGO_DATABASE}" + - "MONGO_AUTHDB=${MONGO_AUTHDB}" + - "DOI_API=${DOI_API}" + - "DOI_PREFIX=${DOI_PREFIX}" + - "DOI_USER=${DOI_USER}" + - "DOI_KEY=${DOI_KEY}" database: image: "mongo" container_name: "metadata_submitter_database_dev" restart: on-failure environment: - - "MONGO_INITDB_ROOT_USERNAME=admin" - - "MONGO_INITDB_ROOT_PASSWORD=admin" + - "MONGO_INITDB_ROOT_USERNAME=${MONGO_INITDB_ROOT_USERNAME}" + - "MONGO_INITDB_ROOT_PASSWORD=${MONGO_INITDB_ROOT_PASSWORD}" volumes: - data:/data/db expose: @@ -59,7 +59,7 @@ services: - 8000:8000 volumes: - ./tests/integration/mock_auth.py:/mock_auth.py - entrypoint: ["python", "/mock_auth.py", "0.0.0.0", "8000"] + entrypoint: [ "python", "/mock_auth.py", "0.0.0.0", "8000" ] mockdoi: build: dockerfile: Dockerfile-dev @@ -73,6 +73,6 @@ services: - 8001:8001 volumes: - ./tests/integration/mock_doi_api.py:/mock_doi_api.py - entrypoint: ["python", "/mock_doi_api.py", "0.0.0.0", "8001"] + entrypoint: [ "python", "/mock_doi_api.py", "0.0.0.0", "8001" ] volumes: - data: + data: null From 93111b0bf58c4d32cf08a6910fc82d3e46db5181 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Mon, 29 Nov 2021 17:12:06 +0200 Subject: [PATCH 081/336] Updates README with Python requirements section Also updates pre-commit.sh --- README.md | 19 +++++++++++++++++++ docker-compose.yml | 2 +- scripts/pre-commit.sh | 2 +- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 41872f3db..661581066 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,25 @@ To start using the VS Code devcontainer: Git hooks are activated inside the local development environment which will run tox tests before pushing. To ignore them for fast updates use the flag `--no-verify`. +### Keeping Python requirements up to date + +1. Install `pip-tools`: + * `pip install pip-tools` + * if using docker-compose pip-tools are installed automatically + +2. Add new packages to `requirements.in` or `requirements-dev.in` + +3. Update `.txt` file for the changed requirements file: + * `pip-compile requirements.in` + * `pip-compile requirements-dev.in` + +4. If you want to update all dependencies to their newest versions, run: + * `pip-compile --upgrade requirements.in` + +5. To install Python requirements run: + * `pip-sync requirements.txt` + + ## Build and deploy Production version can be built and run with following docker commands: diff --git a/docker-compose.yml b/docker-compose.yml index 0bfe81522..e1fd117d5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -75,4 +75,4 @@ services: - ./tests/integration/mock_doi_api.py:/mock_doi_api.py entrypoint: [ "python", "/mock_doi_api.py", "0.0.0.0", "8001" ] volumes: - data: null + data: diff --git a/scripts/pre-commit.sh b/scripts/pre-commit.sh index a8a743463..6c43bf590 100755 --- a/scripts/pre-commit.sh +++ b/scripts/pre-commit.sh @@ -3,7 +3,7 @@ # Comment out pre-commit hooks you don't want to use echo "Running tox as a pre-commit hook" -cd $(git rev-parse --show-toplevel) && rm -r .tox && tox -p auto +cd $(git rev-parse --show-toplevel) && rm -r .tox; tox -p auto if [ $? -ne 0 ]; then echo "==============================" From 4a7675e3ac75f6189a11e89c831dafcaef7e1bc4 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Mon, 29 Nov 2021 16:26:11 +0000 Subject: [PATCH 082/336] Update github workflow setup --- .github/workflows/int.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/int.yml b/.github/workflows/int.yml index 7742bcf25..dee04b94c 100644 --- a/.github/workflows/int.yml +++ b/.github/workflows/int.yml @@ -26,7 +26,7 @@ jobs: - name: Start Services run: | - docker-compose up -d --build + docker-compose --env-file .env.example up -d --build sleep 30 - name: Run Integration test @@ -55,7 +55,7 @@ jobs: - name: Start services with TLS enabled run: | - docker-compose -f docker-compose-tls.yml up -d + docker-compose -f docker-compose-tls.yml --env-file .env.example up -d sleep 30 - name: Run Integration test From 5bf4236c13c90a6735a7d9c0ab9c56ae6f5752cb Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Mon, 29 Nov 2021 23:01:13 +0000 Subject: [PATCH 083/336] Add env configs also for docker-compose-tls --- .env.example | 4 ++++ docker-compose-tls.yml | 44 ++++++++++++++++++++++-------------------- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/.env.example b/.env.example index 3b0f840ff..3c1fb5dd9 100644 --- a/.env.example +++ b/.env.example @@ -21,6 +21,10 @@ MONGO_AUTHDB=admin MONGO_INITDB_ROOT_PASSWORD=admin MONGO_INITDB_ROOT_USERNAME=admin + MONGO_SSL=true + MONGO_SSL_CA=/tls/cacert + MONGO_SSL_CLIENT_KEY=/tls/key + MONGO_SSL_CLIENT_CERT=/tls/cert # doi DOI_API=http://mockdoi:8001/dois diff --git a/docker-compose-tls.yml b/docker-compose-tls.yml index db1dcc722..b1dac31a2 100644 --- a/docker-compose-tls.yml +++ b/docker-compose-tls.yml @@ -16,34 +16,36 @@ services: - mockdoi restart: on-failure environment: - - "MONGO_HOST=database:27017" - - "MONGO_SSL=true" - - "MONGO_SSL_CA=/tls/cacert" - - "MONGO_SSL_CLIENT_KEY=/tls/key" - - "MONGO_SSL_CLIENT_CERT=/tls/cert" - - "AAI_CLIENT_SECRET=secret" - - "AAI_CLIENT_ID=aud2" - - "ISS_URL=http://mockauth:8000" - - "AUTH_URL=http://localhost:8000/authorize" - - "OIDC_URL=http://mockauth:8000" + - "MONGO_HOST=${MONGO_HOST}" + - "MONGO_SSL=${MONGO_SSL}" + - "MONGO_SSL_CA=${MONGO_SSL_CA}" + - "MONGO_SSL_CLIENT_KEY=${MONGO_SSL_CLIENT_KEY}" + - "MONGO_SSL_CLIENT_CERT=${MONGO_SSL_CLIENT_CERT}" + - "AAI_CLIENT_SECRET=${AAI_CLIENT_SECRET}" + - "AAI_CLIENT_ID=${AAI_CLIENT_ID}" + - "ISS_URL=${ISS_URL}" + - "AUTH_URL=${AUTH_URL}" + - "OIDC_URL=${OIDC_URL}" + - "AUTH_REFERER=${AUTH_REFERER}" + - "JWK_URL=${JWK_URL}" + - "BASE_URL=${BASE_URL}" # Enable this for working with front-end on localhost - # or change to http://frontend:3000 if started using docker-compose - # - "REDIRECT_URL=http://localhost:3000" - - "AUTH_REFERER=http://mockauth:8000" - - "JWK_URL=http://mockauth:8000/keyset" - - "LOG_LEVEL=DEBUG" - - "DOI_API=http://mockdoi:8001/dois" - - "DOI_PREFIX=10.xxxx" - - "DOI_USER=user" - - "DOI_KEY=key" + # - "REDIRECT_URL=${REDIRECT_URL}" + - "LOG_LEVEL=${LOG_LEVEL}" + - "MONGO_DATABASE=${MONGO_DATABASE}" + - "MONGO_AUTHDB=${MONGO_AUTHDB}" + - "DOI_API=${DOI_API}" + - "DOI_PREFIX=${DOI_PREFIX}" + - "DOI_USER=${DOI_USER}" + - "DOI_KEY=${DOI_KEY}" database: image: "mongo" container_name: "metadata_submitter_database_dev" command: "mongod --tlsMode=requireTLS --tlsCertificateKeyFile=/tls/combined2 --tlsCAFile=/tls/cacert" restart: on-failure environment: - - "MONGO_INITDB_ROOT_USERNAME=admin" - - "MONGO_INITDB_ROOT_PASSWORD=admin" + - "MONGO_INITDB_ROOT_USERNAME=${MONGO_INITDB_ROOT_USERNAME}" + - "MONGO_INITDB_ROOT_PASSWORD=${MONGO_INITDB_ROOT_PASSWORD}" volumes: - data:/data/db - ./config:/tls From 869845dad137fc6751e6919b050369864b567858 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 6 Dec 2021 09:05:58 +0000 Subject: [PATCH 084/336] Bump black from 21.11b1 to 21.12b0 Bumps [black](https://github.com/psf/black) from 21.11b1 to 21.12b0. - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/commits) --- updated-dependencies: - dependency-name: black dependency-type: direct:development ... Signed-off-by: dependabot[bot] --- requirements-dev.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index f15c0821f..6066eea55 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -6,7 +6,7 @@ # backports.entry-points-selectable==1.1.1 # via virtualenv -black==21.11b1 +black==21.12b0 # via -r requirements-dev.in certifi==2021.10.8 # via -r requirements-dev.in @@ -58,8 +58,6 @@ pyparsing==2.4.7 # via packaging pyyaml==6.0 # via pre-commit -regex==2021.11.10 - # via black six==1.16.0 # via # tox From 4701bf043d12d4464db760f46a9f4d39c5d19fea Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 6 Dec 2021 09:06:59 +0000 Subject: [PATCH 085/336] Bump pre-commit from 2.15.0 to 2.16.0 Bumps [pre-commit](https://github.com/pre-commit/pre-commit) from 2.15.0 to 2.16.0. - [Release notes](https://github.com/pre-commit/pre-commit/releases) - [Changelog](https://github.com/pre-commit/pre-commit/blob/master/CHANGELOG.md) - [Commits](https://github.com/pre-commit/pre-commit/compare/v2.15.0...v2.16.0) --- updated-dependencies: - dependency-name: pre-commit dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index f15c0821f..bf63e0bc2 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -46,7 +46,7 @@ platformdirs==2.4.0 # virtualenv pluggy==1.0.0 # via tox -pre-commit==2.15.0 +pre-commit==2.16.0 # via -r requirements-dev.in py==1.11.0 # via tox From 3b4c0136eb4853c4ae5cc9fde2d500dd9e623d01 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 6 Dec 2021 09:08:18 +0000 Subject: [PATCH 086/336] Bump xmlschema from 1.8.2 to 1.9.0 Bumps [xmlschema](https://github.com/sissaschool/xmlschema) from 1.8.2 to 1.9.0. - [Release notes](https://github.com/sissaschool/xmlschema/releases) - [Changelog](https://github.com/sissaschool/xmlschema/blob/master/CHANGELOG.rst) - [Commits](https://github.com/sissaschool/xmlschema/compare/v1.8.2...v1.9.0) --- updated-dependencies: - dependency-name: xmlschema dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 827574616..9fc0e1aba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -68,7 +68,7 @@ urllib3==1.26.7 # via requests uvloop==0.16.0 # via -r requirements.in -xmlschema==1.8.2 +xmlschema==1.9.0 # via -r requirements.in yarl==1.7.2 # via aiohttp From ba062086c89257a01709f17b49b78351946c40bd Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Thu, 2 Dec 2021 15:32:47 +0000 Subject: [PATCH 087/336] Adds indexing on database initialization Adds docker-compose configuration for mongo container to run script on newly initialized database. Adds js script to build database collections and indexes. --- .env.example | 58 +++++++++++++++++++++--------------------- Dockerfile-dev | 2 +- docker-compose-tls.yml | 1 + docker-compose.yml | 1 + scripts/init_mongo.js | 17 +++++++++++++ 5 files changed, 49 insertions(+), 30 deletions(-) create mode 100644 scripts/init_mongo.js diff --git a/.env.example b/.env.example index 3c1fb5dd9..7b9861ec9 100644 --- a/.env.example +++ b/.env.example @@ -1,33 +1,33 @@ - # authentication - AAI_CLIENT_SECRET=secret_must_be_long - AAI_CLIENT_ID=aud2 - ISS_URL=http://mockauth:8000 - AUTH_URL=http://localhost:8000/authorize - OIDC_URL=http://mockauth:8000 - AUTH_REFERER=http://mockauth:8000 - JWK_URL=http://mockauth:8000/keyset +# authentication +AAI_CLIENT_SECRET=secret_must_be_long +AAI_CLIENT_ID=aud2 +ISS_URL=http://mockauth:8000 +AUTH_URL=http://localhost:8000/authorize +OIDC_URL=http://mockauth:8000 +AUTH_REFERER=http://mockauth:8000 +JWK_URL=http://mockauth:8000/keyset - # app urls - BASE_URL=http://localhost:5430 - # change to http://frontend:3000 if started using docker-compose for frontend - REDIRECT_URL=http://localhost:3000 +# app urls +BASE_URL=http://localhost:5430 +# change to http://frontend:3000 if started using docker-compose for frontend +REDIRECT_URL=http://localhost:3000 - # logging - LOG_LEVEL=DEBUG +# logging +LOG_LEVEL=DEBUG - # database - MONGO_HOST=database:27017 - MONGO_DATABASE=default - MONGO_AUTHDB=admin - MONGO_INITDB_ROOT_PASSWORD=admin - MONGO_INITDB_ROOT_USERNAME=admin - MONGO_SSL=true - MONGO_SSL_CA=/tls/cacert - MONGO_SSL_CLIENT_KEY=/tls/key - MONGO_SSL_CLIENT_CERT=/tls/cert +# database +MONGO_HOST=database:27017 +MONGO_DATABASE=default +MONGO_AUTHDB=admin +MONGO_INITDB_ROOT_PASSWORD=admin +MONGO_INITDB_ROOT_USERNAME=admin +MONGO_SSL=true +MONGO_SSL_CA=/tls/cacert +MONGO_SSL_CLIENT_KEY=/tls/key +MONGO_SSL_CLIENT_CERT=/tls/cert - # doi - DOI_API=http://mockdoi:8001/dois - DOI_PREFIX=10.xxxx - DOI_USER=user - DOI_KEY=key +# doi +DOI_API=http://mockdoi:8001/dois +DOI_PREFIX=10.xxxx +DOI_USER=user +DOI_KEY=key diff --git a/Dockerfile-dev b/Dockerfile-dev index af0cefe12..fefef7fb5 100644 --- a/Dockerfile-dev +++ b/Dockerfile-dev @@ -27,7 +27,7 @@ FROM appbase as local #======================= COPY requirements-dev.txt . -COPY ./scripts/ ./scripts +COPY ./scripts/install-hooks.sh ./scripts/install-hooks.sh RUN pip install --no-cache-dir -r requirements.txt RUN pip install --no-cache-dir -r requirements-dev.txt diff --git a/docker-compose-tls.yml b/docker-compose-tls.yml index b1dac31a2..8baa04cbc 100644 --- a/docker-compose-tls.yml +++ b/docker-compose-tls.yml @@ -49,6 +49,7 @@ services: volumes: - data:/data/db - ./config:/tls + - ./scripts/init_mongo.js:/docker-entrypoint-initdb.d/init_mongo.js:ro expose: - 27017 ports: diff --git a/docker-compose.yml b/docker-compose.yml index e1fd117d5..78de48024 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -42,6 +42,7 @@ services: - "MONGO_INITDB_ROOT_PASSWORD=${MONGO_INITDB_ROOT_PASSWORD}" volumes: - data:/data/db + - ./scripts/init_mongo.js:/docker-entrypoint-initdb.d/init_mongo.js:ro expose: - 27017 ports: diff --git a/scripts/init_mongo.js b/scripts/init_mongo.js new file mode 100644 index 000000000..e93e9ee53 --- /dev/null +++ b/scripts/init_mongo.js @@ -0,0 +1,17 @@ +// script to create default database collections and indexes +// on container start up + +db = new Mongo().getDB("default"); + +db.createCollection('user', { capped: false }); +db.createCollection('folder', { capped: false }); +db.folder.createIndex({ "dateCreated": -1 }); +db.folder.createIndex({ "datePublished": -1 }); +db.folder.createIndex({ "folderId": 1, unique: 1 }); +db.user.createIndex({ "userId": 1, unique: 1 }); +db.folder.createIndex( + { + text_name: "text", + } +) +db.folder.getIndexes() From 0fe6f052752ab0eea0d2f1bda841a9eff915907d Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 7 Dec 2021 15:25:42 +0000 Subject: [PATCH 088/336] Add black formatting argument --- .devcontainer/devcontainer.json | 1 + 1 file changed, 1 insertion(+) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 20c4dd801..13140fc03 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -17,6 +17,7 @@ } }, "python.formatting.provider": "black", + "python.formatting.blackArgs": ["--line-length", "120"], "python.languageServer": "Pylance", "python.linting.flake8Enabled": true, "python.linting.pylintEnabled": true, From 3e781bbd08fe69b247b208fe8c1cccd71d533ef1 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 7 Dec 2021 15:29:07 +0000 Subject: [PATCH 089/336] Add new field text_name to folder collection Folder is updated with new field text_name which is populated during folder creation from folder name as delimiter splited words. This ensures filtering name with string. Schema is updated with new field. --- metadata_backend/api/operators.py | 5 +++-- metadata_backend/helpers/schemas/folders.json | 6 +++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 8c3d014eb..90493f842 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -1,10 +1,10 @@ """Operators for handling database-related operations.""" import re +import time from abc import ABC, abstractmethod from datetime import datetime from typing import Any, Dict, List, Tuple, Union from uuid import uuid4 -import time from aiohttp import web from dateutil.relativedelta import relativedelta @@ -12,7 +12,7 @@ from multidict import MultiDictProxy from pymongo.errors import ConnectionFailure, OperationFailure -from ..conf.conf import query_map, mongo_database +from ..conf.conf import mongo_database, query_map from ..database.db_service import DBService, auto_reconnect from ..helpers.logger import LOG from ..helpers.parser import XMLToJSONParser @@ -658,6 +658,7 @@ async def create_folder(self, data: Dict) -> str: """ folder_id = self._generate_folder_id() data["folderId"] = folder_id + data["text_name"] = " ".join(re.split("[\\W_]", data["name"])) data["published"] = False data["dateCreated"] = int(time.time()) data["metadataObjects"] = data["metadataObjects"] if "metadataObjects" in data else [] diff --git a/metadata_backend/helpers/schemas/folders.json b/metadata_backend/helpers/schemas/folders.json index 942f602bd..cda2eab7c 100644 --- a/metadata_backend/helpers/schemas/folders.json +++ b/metadata_backend/helpers/schemas/folders.json @@ -14,6 +14,10 @@ "type": "string", "title": "Folder Name" }, + "text_name": { + "type": "string", + "title": "Searchable Folder Name, used for indexing" + }, "description": { "type": "string", "title": "Folder Description" @@ -1023,4 +1027,4 @@ } }, "additionalProperties": false -} \ No newline at end of file +} From c24b6f5cb7a349a58efb127489c38351de702b58 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 7 Dec 2021 15:30:57 +0000 Subject: [PATCH 090/336] Add folder query by name --- metadata_backend/api/handlers.py | 28 ++++++++++++++-------------- metadata_backend/api/operators.py | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/metadata_backend/api/handlers.py b/metadata_backend/api/handlers.py index 508bd0f83..96bbb1cba 100644 --- a/metadata_backend/api/handlers.py +++ b/metadata_backend/api/handlers.py @@ -1,33 +1,29 @@ """Handle HTTP methods for server.""" -import ujson import json -import re import mimetypes +import re from collections import Counter +from datetime import date, datetime +from distutils.util import strtobool from math import ceil from pathlib import Path -from typing import Dict, List, Tuple, Union, cast, AsyncGenerator, Any -from datetime import date, datetime +from typing import Any, AsyncGenerator, Dict, List, Tuple, Union, cast +import ujson from aiohttp import BodyPartReader, web from aiohttp.web import Request, Response -from multidict import CIMultiDict from motor.motor_asyncio import AsyncIOMotorClient -from multidict import MultiDict, MultiDictProxy +from multidict import CIMultiDict, MultiDict, MultiDictProxy from xmlschema import XMLSchemaException -from distutils.util import strtobool - -from .middlewares import decrypt_cookie, get_session -from ..conf.conf import schema_types +from ..conf.conf import aai_config, publisher, schema_types +from ..helpers.doi import DOIHandler from ..helpers.logger import LOG from ..helpers.parser import XMLToJSONParser from ..helpers.schema_loader import JSONSchemaLoader, SchemaNotFoundException, XMLSchemaLoader from ..helpers.validator import JSONValidator, XMLValidator -from ..helpers.doi import DOIHandler -from .operators import FolderOperator, Operator, XMLOperator, UserOperator - -from ..conf.conf import aai_config, publisher +from .middlewares import decrypt_cookie, get_session +from .operators import FolderOperator, Operator, UserOperator, XMLOperator class RESTAPIHandler: @@ -705,6 +701,10 @@ async def get_folders(self, req: Request) -> Response: reason = "'published' parameter must be either 'true' or 'false'" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) + if "name" in req.query: + name_param = req.query.get("name", "") + if name_param: + folder_query = {"$text": {"$search": name_param}} folder_operator = FolderOperator(db_client) folders, total_folders = await folder_operator.query_folders(folder_query, page, per_page) diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 90493f842..88ed8f5b7 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -692,7 +692,7 @@ async def query_folders(self, query: Dict, page_num: int, page_size: int) -> Tup {"$sort": {"dateCreated": -1}}, {"$skip": skips}, {"$limit": page_size}, - {"$project": {"_id": 0}}, + {"$project": {"_id": 0, "text_name": 0}}, ] data_raw = await self.db_service.do_aggregate("folder", _query) From a7443ce6cca8579dcf65f7e881b4cd8dc45e66bd Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 8 Dec 2021 13:08:58 +0000 Subject: [PATCH 091/336] Add openapi specs --- docs/specification.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/specification.yml b/docs/specification.yml index 24b746549..7fd9070b3 100644 --- a/docs/specification.yml +++ b/docs/specification.yml @@ -897,7 +897,13 @@ paths: name: published schema: type: string - description: Return folders based on the folder published value + description: Return folders based on the folder published value. Should be 'true' or 'false' + - in: query + name: name + schema: + type: string + description: Return folders containing filtered string[s] in their name + example: test folder responses: 200: description: OK From ab24a6e796a7d93060ddc716817ff8ff442c12e0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 13 Dec 2021 09:04:46 +0000 Subject: [PATCH 092/336] Bump actions/upload-artifact from 2.2.4 to 2.3.0 Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 2.2.4 to 2.3.0. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/v2.2.4...v2.3.0) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/int.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/int.yml b/.github/workflows/int.yml index dee04b94c..86e2002ba 100644 --- a/.github/workflows/int.yml +++ b/.github/workflows/int.yml @@ -40,7 +40,7 @@ jobs: - name: Persist log files if: ${{ failure() }} - uses: actions/upload-artifact@v2.2.4 + uses: actions/upload-artifact@v2.3.0 with: name: test_debugging_help path: tests @@ -69,7 +69,7 @@ jobs: - name: Persist log files if: ${{ failure() }} - uses: actions/upload-artifact@v2.2.4 + uses: actions/upload-artifact@v2.3.0 with: name: test_debugging_help path: tests From 1bf98d21204cc6335ae2ad02642193e8a5f45396 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 13 Dec 2021 09:05:34 +0000 Subject: [PATCH 093/336] Bump xmlschema from 1.9.0 to 1.9.1 Bumps [xmlschema](https://github.com/sissaschool/xmlschema) from 1.9.0 to 1.9.1. - [Release notes](https://github.com/sissaschool/xmlschema/releases) - [Changelog](https://github.com/sissaschool/xmlschema/blob/master/CHANGELOG.rst) - [Commits](https://github.com/sissaschool/xmlschema/compare/v1.9.0...v1.9.1) --- updated-dependencies: - dependency-name: xmlschema dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9fc0e1aba..1b65dbfde 100644 --- a/requirements.txt +++ b/requirements.txt @@ -68,7 +68,7 @@ urllib3==1.26.7 # via requests uvloop==0.16.0 # via -r requirements.in -xmlschema==1.9.0 +xmlschema==1.9.1 # via -r requirements.in yarl==1.7.2 # via aiohttp From 12373fa000f2e32616a6f21bca48c2dbe25b1fae Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Thu, 9 Dec 2021 13:49:10 +0000 Subject: [PATCH 094/336] Python scripts for database operations Adds python script for collections and indexes creation to be run if database is destroyed. Updates clean_db script with new functionality to only delete documents from collection. Old functionality ergo dropping database can be run with flag `--purge`. --- tests/integration/clean_db.py | 41 +++++++++++++---- tests/integration/mongo_indexes.py | 72 ++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 8 deletions(-) create mode 100755 tests/integration/mongo_indexes.py diff --git a/tests/integration/clean_db.py b/tests/integration/clean_db.py index 537f99e08..31b9c8dc6 100644 --- a/tests/integration/clean_db.py +++ b/tests/integration/clean_db.py @@ -3,15 +3,20 @@ To be utilised mostly for integration tests """ -from motor.motor_asyncio import AsyncIOMotorClient +import argparse import asyncio import logging -import argparse +import os + +from motor.motor_asyncio import AsyncIOMotorClient serverTimeout = 15000 connectTimeout = 15000 # === Global vars === +DATABASE = os.getenv("MONGO_DATABASE", "default") +AUTHDB = os.getenv("MONGO_AUTHDB", "admin") +HOST = os.getenv("MONGO_HOST", "admin") FORMAT = "[%(asctime)s][%(name)s][%(process)d %(processName)s][%(levelname)-8s](L:%(lineno)s) %(funcName)s: %(message)s" logging.basicConfig(format=FORMAT, datefmt="%Y-%m-%d %H:%M:%S") LOG = logging.getLogger(__name__) @@ -26,20 +31,40 @@ def create_db_client(url: str) -> AsyncIOMotorClient: return AsyncIOMotorClient(url, connectTimeoutMS=connectTimeout, serverSelectionTimeoutMS=serverTimeout) +async def purge_mongodb(url: str) -> None: + """Erase database.""" + client = create_db_client(url) + LOG.debug(f"current databases: {*await client.list_database_names(),}") + LOG.debug("=== Drop curent database ===") + await client.drop_database(DATABASE) + LOG.debug("=== DONE ===") + + async def clean_mongodb(url: str) -> None: """Clean Collection and recreate it.""" client = create_db_client(url) - LOG.debug(f"current databases: {*await client.list_database_names(),}") - LOG.debug("=== Drop any existing database ===") - await client.drop_database("default") + db = client[DATABASE] + LOG.debug(f"Database to clear: {DATABASE}") + collections = await db.list_collection_names() + LOG.debug(f"=== Collections to be cleared: {collections} ===") + LOG.debug("=== Delete all documents in all collections ===") + for col in collections: + x = await db[col].delete_many({}) + LOG.debug(f"{x.deleted_count}{' documents deleted'}\t{'from '}{col}") + LOG.debug("=== DONE ===") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Process some integers.") parser.add_argument("--tls", action="store_true", help="add tls configuration") + parser.add_argument("--purge", action="store_true", help="destroy database") args = parser.parse_args() - url = url = "mongodb://admin:admin@localhost:27017/default?authSource=admin" + url = f"mongodb://{AUTHDB}:{AUTHDB}@{HOST}/{DATABASE}?authSource=admin" if args.tls: _params = "?tls=true&tlsCAFile=./config/cacert&ssl_keyfile=./config/key&ssl_certfile=./config/cert" - url = f"mongodb://admin:admin@localhost:27017/default{_params}&authSource=admin" - asyncio.run(clean_mongodb(url)) + url = f"mongodb://{AUTHDB}:{AUTHDB}@{HOST}/{DATABASE}{_params}&authSource=admin" + LOG.debug(f"=== Database url {url} ===") + if args.purge: + asyncio.run(purge_mongodb(url)) + else: + asyncio.run(clean_mongodb(url)) diff --git a/tests/integration/mongo_indexes.py b/tests/integration/mongo_indexes.py new file mode 100755 index 000000000..982478ffe --- /dev/null +++ b/tests/integration/mongo_indexes.py @@ -0,0 +1,72 @@ +"""Create MongoDB default collections and indexes.""" + +import argparse +import asyncio +import logging +import os + +import pymongo +from motor.motor_asyncio import AsyncIOMotorClient +from pymongo import TEXT + +serverTimeout = 15000 +connectTimeout = 15000 + +# === Global vars === +DATABASE = os.getenv("MONGO_DATABASE", "default") +AUTHDB = os.getenv("MONGO_AUTHDB", "admin") +HOST = os.getenv("MONGO_HOST", "admin") +FORMAT = "[%(asctime)s][%(name)s][%(process)d %(processName)s][%(levelname)-8s](L:%(lineno)s) %(funcName)s: %(message)s" +logging.basicConfig(format=FORMAT, datefmt="%Y-%m-%d %H:%M:%S") +LOG = logging.getLogger(__name__) +LOG.setLevel(logging.DEBUG) + + +def create_db_client(url: str) -> AsyncIOMotorClient: + """Initialize database client for AioHTTP App. + + :returns: Coroutine-based Motor client for Mongo operations + """ + return AsyncIOMotorClient(url, connectTimeoutMS=connectTimeout, serverSelectionTimeoutMS=serverTimeout) + + +async def create_indexes(url: str) -> None: + """Clean Collection and recreate it.""" + client = create_db_client(url) + db = client[DATABASE] + LOG.debug(f"Current database: {db}") + LOG.debug("=== Create collections ===") + for col in ["folder", "user"]: + try: + await db.create_collection(col) + except pymongo.errors.CollectionInvalid as e: + LOG.debug(f"=== Collection {col} not created due to {str(e)} ===") + pass + LOG.debug("=== Create indexes ===") + + indexes = [ + db.folder.create_index([("dateCreated", -1)]), + db.folder.create_index([("folderId", 1)], unique=True), + db.folder.create_index([("text_name", TEXT)]), + db.user.create_index([("userId", 1)], unique=True), + ] + + for index in indexes: + try: + await index + except Exception as e: + LOG.debug(f"=== Indexes not created due to {str(e)} ===") + pass + LOG.debug("=== DONE ===") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Process some integers.") + parser.add_argument("--tls", action="store_true", help="add tls configuration") + args = parser.parse_args() + url = f"mongodb://{AUTHDB}:{AUTHDB}@{HOST}/{DATABASE}?authSource=admin" + if args.tls: + _params = "?tls=true&tlsCAFile=./config/cacert&ssl_keyfile=./config/key&ssl_certfile=./config/cert" + url = f"mongodb://{AUTHDB}:{AUTHDB}@{HOST}/{DATABASE}{_params}&authSource=admin" + LOG.debug(f"=== Database url {url} ===") + asyncio.run(create_indexes(url)) From e2e1070965313a8f7080914764ae6b3cf74bf6e1 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Thu, 9 Dec 2021 13:50:54 +0000 Subject: [PATCH 095/336] Add integration tests for filtering by name Adds dependencies and configs to be able running integration tests on local environment. --- .env.example | 2 +- requirements-dev.in | 3 ++- requirements-dev.txt | 2 ++ tests/integration/run_tests.py | 40 ++++++++++++++++++++++++++++++++-- 4 files changed, 43 insertions(+), 4 deletions(-) diff --git a/.env.example b/.env.example index 7b9861ec9..33b5ede87 100644 --- a/.env.example +++ b/.env.example @@ -10,7 +10,7 @@ JWK_URL=http://mockauth:8000/keyset # app urls BASE_URL=http://localhost:5430 # change to http://frontend:3000 if started using docker-compose for frontend -REDIRECT_URL=http://localhost:3000 +# REDIRECT_URL=http://localhost:3000 # logging LOG_LEVEL=DEBUG diff --git a/requirements-dev.in b/requirements-dev.in index e5a9306fa..2181f099b 100644 --- a/requirements-dev.in +++ b/requirements-dev.in @@ -1,6 +1,7 @@ +aiofiles # to run integration tests black certifi flake8 -pip-tools +pip-tools # pip depedencies management pre-commit tox diff --git a/requirements-dev.txt b/requirements-dev.txt index 49b4bcb92..b1c7301bb 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -4,6 +4,8 @@ # # pip-compile requirements-dev.in # +aiofiles==0.8.0 + # via -r requirements-dev.in backports.entry-points-selectable==1.1.1 # via virtualenv black==21.12b0 diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index d5a0183e2..028377b23 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -8,11 +8,11 @@ import asyncio import json import logging -from pathlib import Path import urllib import xml.etree.ElementTree as ET +from pathlib import Path -import aiofiles # type: ignore +import aiofiles import aiohttp from aiohttp import FormData @@ -925,6 +925,41 @@ async def test_getting_paginated_folders(sess): assert resp.status == 400 +async def test_getting_folders_filtered_by_name(sess): + """Check that /folders returns folders filtered by name. + + :param sess: HTTP session in which request call is made + """ + names = [" filter new ", "_filter_", "-filter-", "_extra-", "_2021special_"] + folders = [] + for name in names: + folder_data = {"name": f"Test{name}name", "description": "Test filtering name"} + folders.append(await post_folder(sess, folder_data)) + + async with sess.get(f"{folders_url}?name=filter") as resp: + assert resp.status == 200 + ans = await resp.json() + assert ans["page"]["totalFolders"] == 3 + + async with sess.get(f"{folders_url}?name=extra") as resp: + assert resp.status == 200 + ans = await resp.json() + assert ans["page"]["totalFolders"] == 1 + + async with sess.get(f"{folders_url}?name=2021 special") as resp: + assert resp.status == 200 + ans = await resp.json() + assert ans["page"]["totalFolders"] == 0 + + async with sess.get(f"{folders_url}?name=new extra") as resp: + assert resp.status == 200 + ans = await resp.json() + assert ans["page"]["totalFolders"] == 2 + + for folder in folders: + await delete_folder(sess, folder) + + async def test_getting_user_items(sess): """Test querying user's templates or folders in the user object with GET user request. @@ -1320,6 +1355,7 @@ async def main(): # Test getting a list of folders and draft templates owned by the user LOG.debug("=== Testing getting folders, draft folders and draft templates with pagination ===") await test_getting_paginated_folders(sess) + await test_getting_folders_filtered_by_name(sess) await test_getting_user_items(sess) # Test add, modify, validate and release action with submissions From d0c83abcc2fe84cf516b598d3b8711ea147015ad Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Thu, 9 Dec 2021 15:07:52 +0000 Subject: [PATCH 096/336] Github workflow env vars Add env vars to github workflow file as database scripts and integration tests use environment variables. --- .github/workflows/int.yml | 24 ++++++++++++++++++++++-- tests/integration/run_tests.py | 11 ++++++----- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/.github/workflows/int.yml b/.github/workflows/int.yml index dee04b94c..0da797a37 100644 --- a/.github/workflows/int.yml +++ b/.github/workflows/int.yml @@ -29,10 +29,20 @@ jobs: docker-compose --env-file .env.example up -d --build sleep 30 - - name: Run Integration test + - name: Clear database run: | python tests/integration/clean_db.py + env: + MONGO_HOST: localhost:27017 + MONGO_DATABASE: default + MONGO_AUTHDB: admin + + - name: Run Integration test + run: | python tests/integration/run_tests.py + env: + BASE_URL: http://localhost:5430 + ISS_URL: http://localhost:8000 - name: Collect logs from docker if: ${{ failure() }} @@ -58,10 +68,20 @@ jobs: docker-compose -f docker-compose-tls.yml --env-file .env.example up -d sleep 30 - - name: Run Integration test + - name: Clear database run: | python tests/integration/clean_db.py --tls + env: + MONGO_HOST: localhost:27017 + MONGO_DATABASE: default + MONGO_AUTHDB: admin + + - name: Run Integration test + run: | python tests/integration/run_tests.py + env: + BASE_URL: http://localhost:5430 + ISS_URL: http://localhost:8000 - name: Collect logs from docker if: ${{ failure() }} diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index 028377b23..bbf5a9928 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -8,6 +8,7 @@ import asyncio import json import logging +import os import urllib import xml.etree.ElementTree as ET from pathlib import Path @@ -47,8 +48,8 @@ ("experiment", "ERX000119.json", "ERX000119.json"), ("analysis", "ERZ266973.json", "ERZ266973.json"), ] -base_url = "http://localhost:5430" -mock_auth_url = "http://localhost:8000" +base_url = os.getenv("BASE_URL", "http://localhost:5430") +mock_auth_url = os.getenv("ISS_URL", "http://localhost:8000") objects_url = f"{base_url}/objects" drafts_url = f"{base_url}/drafts" templates_url = f"{base_url}/templates" @@ -357,8 +358,8 @@ async def post_folder(sess, data): """ async with sess.post(f"{folders_url}", data=json.dumps(data)) as resp: LOG.debug("Adding new folder") - assert resp.status == 201, "HTTP Status code error" ans = await resp.json() + assert resp.status == 201, f"HTTP Status code error {resp.status} {ans}" return ans["folderId"] @@ -937,13 +938,13 @@ async def test_getting_folders_filtered_by_name(sess): folders.append(await post_folder(sess, folder_data)) async with sess.get(f"{folders_url}?name=filter") as resp: - assert resp.status == 200 ans = await resp.json() + assert resp.status == 200, f"HTTP Status code error {resp.status} {ans}" assert ans["page"]["totalFolders"] == 3 async with sess.get(f"{folders_url}?name=extra") as resp: - assert resp.status == 200 ans = await resp.json() + assert resp.status == 200, f"HTTP Status code error {resp.status} {ans}" assert ans["page"]["totalFolders"] == 1 async with sess.get(f"{folders_url}?name=2021 special") as resp: From 8894f12ca9ba1b1471d69ed25c3ed3eca29496d5 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 14 Dec 2021 11:12:12 +0000 Subject: [PATCH 097/336] Add sorting optimization --- metadata_backend/api/handlers.py | 5 ++++- metadata_backend/api/operators.py | 19 ++++++++++++++++--- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/metadata_backend/api/handlers.py b/metadata_backend/api/handlers.py index 96bbb1cba..473582ae5 100644 --- a/metadata_backend/api/handlers.py +++ b/metadata_backend/api/handlers.py @@ -685,6 +685,7 @@ async def get_folders(self, req: Request) -> Response: """ page = self._get_page_param(req, "page", 1) per_page = self._get_page_param(req, "per_page", 5) + sort = {"date": True, "score": False} db_client = req.app["db_client"] user_operator = UserOperator(db_client) @@ -705,9 +706,11 @@ async def get_folders(self, req: Request) -> Response: name_param = req.query.get("name", "") if name_param: folder_query = {"$text": {"$search": name_param}} + sort["score"] = True + sort["date"] = False folder_operator = FolderOperator(db_client) - folders, total_folders = await folder_operator.query_folders(folder_query, page, per_page) + folders, total_folders = await folder_operator.query_folders(folder_query, page, per_page, sort) result = ujson.dumps( { diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 88ed8f5b7..5eacaea80 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -3,7 +3,7 @@ import time from abc import ABC, abstractmethod from datetime import datetime -from typing import Any, Dict, List, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, Union from uuid import uuid4 from aiohttp import web @@ -678,18 +678,31 @@ async def create_folder(self, data: Dict) -> str: LOG.info(f"Inserting folder with id {folder_id} to database succeeded.") return folder_id - async def query_folders(self, query: Dict, page_num: int, page_size: int) -> Tuple[List, int]: + async def query_folders( + self, query: Dict, page_num: int, page_size: int, sort_param: Optional[dict] = None + ) -> Tuple[List, int]: """Query database based on url query parameters. :param query: Dict containing query information :param page_num: Page number :param page_size: Results per page + :param sort_param: Sorting options. :returns: Paginated query result """ skips = page_size * (page_num - 1) + + if not sort_param: + sort = {"dateCreated": -1} + elif sort_param["score"] and not sort_param["date"]: + sort = {"score": {"$meta": "textScore"}, "dateCreated": -1} # type: ignore + elif sort_param["score"] and sort_param["date"]: + sort = {"dateCreated": -1, "score": {"$meta": "textScore"}} # type: ignore + else: + sort = {"dateCreated": -1} + _query = [ {"$match": query}, - {"$sort": {"dateCreated": -1}}, + {"$sort": sort}, {"$skip": skips}, {"$limit": page_size}, {"$project": {"_id": 0, "text_name": 0}}, From 67a06d600f5d09207669586d68826d14d123559d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 20 Dec 2021 09:06:02 +0000 Subject: [PATCH 098/336] Bump actions/upload-artifact from 2.3.0 to 2.3.1 Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 2.3.0 to 2.3.1. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/v2.3.0...v2.3.1) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/int.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/int.yml b/.github/workflows/int.yml index 73d94146e..c8e2cc987 100644 --- a/.github/workflows/int.yml +++ b/.github/workflows/int.yml @@ -50,7 +50,7 @@ jobs: - name: Persist log files if: ${{ failure() }} - uses: actions/upload-artifact@v2.3.0 + uses: actions/upload-artifact@v2.3.1 with: name: test_debugging_help path: tests @@ -89,7 +89,7 @@ jobs: - name: Persist log files if: ${{ failure() }} - uses: actions/upload-artifact@v2.3.0 + uses: actions/upload-artifact@v2.3.1 with: name: test_debugging_help path: tests From e38702bb74884951a324b08093e844dd1496d37e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 20 Dec 2021 09:06:45 +0000 Subject: [PATCH 099/336] Bump jsonschema from 4.2.1 to 4.3.1 Bumps [jsonschema](https://github.com/Julian/jsonschema) from 4.2.1 to 4.3.1. - [Release notes](https://github.com/Julian/jsonschema/releases) - [Changelog](https://github.com/Julian/jsonschema/blob/main/CHANGELOG.rst) - [Commits](https://github.com/Julian/jsonschema/compare/v4.2.1...v4.3.1) --- updated-dependencies: - dependency-name: jsonschema dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 1b65dbfde..310552e96 100644 --- a/requirements.txt +++ b/requirements.txt @@ -40,7 +40,7 @@ idna==3.3 # via # requests # yarl -jsonschema==4.2.1 +jsonschema==4.3.1 # via -r requirements.in motor==2.5.1 # via -r requirements.in From 62d7561af0da6e69bad50e9323eedd4945bf20f3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 20 Dec 2021 14:45:28 +0000 Subject: [PATCH 100/336] Bump cryptography from 36.0.0 to 36.0.1 Bumps [cryptography](https://github.com/pyca/cryptography) from 36.0.0 to 36.0.1. - [Release notes](https://github.com/pyca/cryptography/releases) - [Changelog](https://github.com/pyca/cryptography/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pyca/cryptography/compare/36.0.0...36.0.1) --- updated-dependencies: - dependency-name: cryptography dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 310552e96..27ba2621d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,7 +24,7 @@ charset-normalizer==2.0.7 # via # aiohttp # requests -cryptography==36.0.0 +cryptography==36.0.1 # via # -r requirements.in # authlib From 9a03a41b01ecdade52d67436ac365bf1fe1ba77b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 20 Dec 2021 15:14:25 +0000 Subject: [PATCH 101/336] Bump ujson from 4.3.0 to 5.1.0 Bumps [ujson](https://github.com/ultrajson/ultrajson) from 4.3.0 to 5.1.0. - [Release notes](https://github.com/ultrajson/ultrajson/releases) - [Commits](https://github.com/ultrajson/ultrajson/compare/4.3.0...5.1.0) --- updated-dependencies: - dependency-name: ujson dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 27ba2621d..a5b8aa1af 100644 --- a/requirements.txt +++ b/requirements.txt @@ -62,7 +62,7 @@ six==1.16.0 # via python-dateutil typing-extensions==4.0.0 # via async-timeout -ujson==4.3.0 +ujson==5.1.0 # via -r requirements.in urllib3==1.26.7 # via requests From b182802bc1f5f3c18d96b6084f3bb33f2a8ba0a2 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 14 Dec 2021 13:55:32 +0000 Subject: [PATCH 102/336] Create tests for filtering by date Add help function to add folders to database by direct connection to db. This is needed to create folders with modified 'dateCreated'-field. Tried to use freezegun library with no success. Add env vars to github workflow file for integration tests. --- .github/workflows/int.yml | 6 +- tests/integration/run_tests.py | 131 ++++++++++++++++++++++++++++++++- 2 files changed, 132 insertions(+), 5 deletions(-) diff --git a/.github/workflows/int.yml b/.github/workflows/int.yml index c8e2cc987..fb90168c4 100644 --- a/.github/workflows/int.yml +++ b/.github/workflows/int.yml @@ -43,6 +43,9 @@ jobs: env: BASE_URL: http://localhost:5430 ISS_URL: http://localhost:8000 + MONGO_HOST: localhost:27017 + MONGO_DATABASE: default + MONGO_AUTHDB: admin - name: Collect logs from docker if: ${{ failure() }} @@ -82,7 +85,8 @@ jobs: env: BASE_URL: http://localhost:5430 ISS_URL: http://localhost:8000 - + MONGO_SSL: True + - name: Collect logs from docker if: ${{ failure() }} run: docker-compose logs --no-color -t > tests/dockerlogs || true diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index bbf5a9928..419cbea1f 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -4,18 +4,21 @@ Deleting from db is currently not supported, objects added to db in different should be taken into account. """ - import asyncio import json import logging import os +import re import urllib import xml.etree.ElementTree as ET +from datetime import datetime from pathlib import Path +from uuid import uuid4 import aiofiles import aiohttp from aiohttp import FormData +from motor.motor_asyncio import AsyncIOMotorClient # === Global vars === FORMAT = "[%(asctime)s][%(name)s][%(process)d %(processName)s][%(levelname)-8s](L:%(lineno)s) %(funcName)s: %(message)s" @@ -57,6 +60,11 @@ users_url = f"{base_url}/users" submit_url = f"{base_url}/submit" publish_url = f"{base_url}/publish" +# to form direct contact to db with create_folder() +DATABASE = os.getenv("MONGO_DATABASE", "default") +AUTHDB = os.getenv("MONGO_AUTHDB", "admin") +HOST = os.getenv("MONGO_HOST", "database:27017") +TLS = os.getenv("MONGO_SSL", False) user_id = "current" test_user_given = "Given" @@ -109,7 +117,12 @@ async def create_multi_file_request_data(filepairs): path_to_file = testfiles_root / schema / filename path = path_to_file.as_posix() async with aiofiles.open(path, mode="r") as f: - request_data.add_field(schema.upper(), await f.read(), filename=filename, content_type="text/xml") + request_data.add_field( + schema.upper(), + await f.read(), + filename=filename, + content_type="text/xml", + ) return request_data @@ -414,6 +427,36 @@ async def delete_folder_publish(sess, folder_id): assert resp.status == 401, "HTTP Status code error" +async def create_folder(data, user): + """Create new object folder to database. + + :param data: Data as dict to be saved to database + :param user: User id to which data is assigned + :returns: Folder id for the folder inserted to database + """ + LOG.info("Creating new folder") + url = f"mongodb://{AUTHDB}:{AUTHDB}@{HOST}/{DATABASE}?authSource=admin" + db_client = AsyncIOMotorClient(url, connectTimeoutMS=1000, serverSelectionTimeoutMS=1000) + database = db_client[DATABASE] + + folder_id = uuid4().hex + data["folderId"] = folder_id + data["text_name"] = " ".join(re.split("[\\W_]", data["name"])) + data["drafts"] = [] + data["metadataObjects"] = [] + try: + await database["folder"].insert_one(data) + find_by_id = {"userId": user} + append_op = {"$push": {"folders": {"$each": [folder_id], "$position": 0}}} + await database["user"].find_one_and_update( + find_by_id, append_op, projection={"_id": False}, return_document=True + ) + return folder_id + + except Exception as e: + LOG.error(f"Folder creation failed due to {str(e)}") + + async def patch_user(sess, user_id, real_user_id, json_patch): """Patch one user object within session, return userId. @@ -940,7 +983,7 @@ async def test_getting_folders_filtered_by_name(sess): async with sess.get(f"{folders_url}?name=filter") as resp: ans = await resp.json() assert resp.status == 200, f"HTTP Status code error {resp.status} {ans}" - assert ans["page"]["totalFolders"] == 3 + assert ans["page"]["totalFolders"] == 3, f'Shold be 3 returned {ans["page"]["totalFolders"]}' async with sess.get(f"{folders_url}?name=extra") as resp: ans = await resp.json() @@ -961,6 +1004,81 @@ async def test_getting_folders_filtered_by_name(sess): await delete_folder(sess, folder) +async def test_getting_folders_filtered_by_date_created(sess): + """Check that /folders returns folders filtered by date created. + + :param sess: HTTP session in which request call is made + """ + async with sess.get(f"{users_url}/current") as resp: + ans = await resp.json() + user = ans["userId"] + + folders = [] + format = "%Y-%m-%d %H:%M:%S" + + # Test dateCreated within a year + # Create folders with different dateCreated + timestamps = ["2014-12-31 00:00:00", "2015-01-01 00:00:00", "2015-07-15 00:00:00", "2016-01-01 00:00:00"] + for stamp in timestamps: + folder_data = { + "name": f"Test date {stamp}", + "description": "Test filtering date", + "dateCreated": datetime.strptime(stamp, format).timestamp(), + } + folders.append(await create_folder(folder_data, user)) + + async with sess.get(f"{folders_url}?date_created_start=2015-01-01&date_created_end=2015-12-31") as resp: + ans = await resp.json() + assert resp.status == 200, f"returned status {resp.status}, error {ans}" + assert ans["page"]["totalFolders"] == 2, f'Shold be 2 returned {ans["page"]["totalFolders"]}' + + # Test dateCreated within a month + # Create folders with different dateCreated + timestamps = ["2013-01-31 00:00:00", "2013-02-02 00:00:00", "2013-03-29 00:00:00", "2013-04-01 00:00:00"] + for stamp in timestamps: + folder_data = { + "name": f"Test date {stamp}", + "description": "Test filtering date", + "dateCreated": datetime.strptime(stamp, format).timestamp(), + } + folders.append(await create_folder(folder_data, user)) + + async with sess.get(f"{folders_url}?date_created_start=2013-02-01&date_created_end=2013-03-30") as resp: + ans = await resp.json() + assert resp.status == 200, f"returned status {resp.status}, error {ans}" + assert ans["page"]["totalFolders"] == 2, f'Shold be 2 returned {ans["page"]["totalFolders"]}' + + # Test dateCreated within a day + # Create folders with different dateCreated + timestamps = [ + "2012-01-14 23:59:59", + "2012-01-15 00:00:01", + "2012-01-15 23:59:59", + "2012-01-16 00:00:01", + ] + for stamp in timestamps: + folder_data = { + "name": f"Test date {stamp}", + "description": "Test filtering date", + "dateCreated": datetime.strptime(stamp, format).timestamp(), + } + folders.append(await create_folder(folder_data, user)) + + async with sess.get(f"{folders_url}?date_created_start=2012-01-15&date_created_end=2012-01-15") as resp: + ans = await resp.json() + assert resp.status == 200, f"returned status {resp.status}, error {ans}" + assert ans["page"]["totalFolders"] == 2, f'Shold be 2 returned {ans["page"]["totalFolders"]}' + + # Test parameters date_created_... and name together + async with sess.get(f"{folders_url}?name=2013&date_created_start=2012-01-01&date_created_end=2016-12-31") as resp: + ans = await resp.json() + assert resp.status == 200, f"returned status {resp.status}, error {ans}" + assert ans["page"]["totalFolders"] == 4, f'Shold be 4 returned {ans["page"]["totalFolders"]}' + + for folder in folders: + await delete_folder(sess, folder) + + async def test_getting_user_items(sess): """Test querying user's templates or folders in the user object with GET user request. @@ -1356,8 +1474,13 @@ async def main(): # Test getting a list of folders and draft templates owned by the user LOG.debug("=== Testing getting folders, draft folders and draft templates with pagination ===") await test_getting_paginated_folders(sess) - await test_getting_folders_filtered_by_name(sess) await test_getting_user_items(sess) + LOG.debug("=== Testing getting folders filtered with name and date created ===") + await test_getting_folders_filtered_by_name(sess) + # too much of a hassle to make test work with tls db connection in github + # must be improven in next integration test iteration + if not TLS: + await test_getting_folders_filtered_by_date_created(sess) # Test add, modify, validate and release action with submissions LOG.debug("=== Testing actions within submissions ===") From 0da8d88303055d6e842bac5568d9802bdc0c0283 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 14 Dec 2021 14:05:28 +0000 Subject: [PATCH 103/336] Add functionality for filtering by date --- metadata_backend/api/handlers.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/metadata_backend/api/handlers.py b/metadata_backend/api/handlers.py index 473582ae5..d43ed51b0 100644 --- a/metadata_backend/api/handlers.py +++ b/metadata_backend/api/handlers.py @@ -702,6 +702,7 @@ async def get_folders(self, req: Request) -> Response: reason = "'published' parameter must be either 'true' or 'false'" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) + if "name" in req.query: name_param = req.query.get("name", "") if name_param: @@ -709,6 +710,27 @@ async def get_folders(self, req: Request) -> Response: sort["score"] = True sort["date"] = False + format_incoming = "%Y-%m-%d" + format_query = "%Y-%m-%d %H:%M:%S" + if "date_created_start" in req.query and "date_created_end" in req.query: + date_param_start = req.query.get("date_created_start", "") + date_param_end = req.query.get("date_created_end", "") + + if datetime.strptime(date_param_start, format_incoming) and datetime.strptime( + date_param_end, format_incoming + ): + query_start = datetime.strptime(date_param_start + " 00:00:00", format_query).timestamp() + query_end = datetime.strptime(date_param_end + " 23:59:59", format_query).timestamp() + folder_query["dateCreated"] = {"$gte": query_start, "$lte": query_end} + else: + reason = f"'date_created_start' and 'date_created_end' parameters must be formated as {format_incoming}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + if "name" in req.query and "date_created_start" in req.query: + sort["score"] = True + sort["date"] = True + folder_operator = FolderOperator(db_client) folders, total_folders = await folder_operator.query_folders(folder_query, page, per_page, sort) From 998fa6c9e7d2554401e1346ed8b69eaf7e866959 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 14 Dec 2021 14:49:23 +0000 Subject: [PATCH 104/336] Add openapi specs and update testing docs --- docs/specification.yml | 19 ++++++++++++++++++- docs/test.rst | 6 ++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/docs/specification.yml b/docs/specification.yml index 7fd9070b3..14447547f 100644 --- a/docs/specification.yml +++ b/docs/specification.yml @@ -897,13 +897,30 @@ paths: name: published schema: type: string + example: true description: Return folders based on the folder published value. Should be 'true' or 'false' - in: query name: name schema: type: string - description: Return folders containing filtered string[s] in their name example: test folder + description: Return folders containing filtered string[s] in their name + - in: query + name: date_created_start + schema: + type: string + example: "2015-01-01" + description: | + Returns folders created between provided dates. + MUST be used with parameter 'date_created_end'. + - in: query + name: date_created_end + schema: + type: string + example: "2015-12-31" + description: | + Returns folders created between provided dates. + MUST be used with parameter 'date_created_start'. responses: 200: description: OK diff --git a/docs/test.rst b/docs/test.rst index 0ffe20134..1eaa6107a 100644 --- a/docs/test.rst +++ b/docs/test.rst @@ -42,8 +42,10 @@ Integration tests required a running backend, follow the instructions in :ref:`d After the backend has been successfully set up, run the following in the backend repository root directory: ``python tests/integration/run_tests.py``. This command will run a series of integration tests. -To clean db before or after each integration tests run: ``python tests/integration/clean_db.py`` (``--tls`` -argument can be added if Mongodb is started via ``docker-compose-tls.yml``. +To clean db before or after each integration tests run: ``python tests/integration/clean_db.py`` (``--tls`` argument +can be added if Mongodb is started via ``docker-compose-tls.yml``). Script clean_db.py will delete all documents in all collections in the database. +To erase the database run: ``python tests/integration/clean_db.py --purge``. After that indexes need to be recreated. +To do that run: ``python tests/integration/mongo_indexes.py`` (``--tls`` argument can be added if Mongodb is started via ``docker-compose-tls.yml``). Performance Testing From a2accd7cc1333d3af82bcf70e4a3b49cfde2c15b Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 22 Dec 2021 14:10:33 +0000 Subject: [PATCH 105/336] Replace git misspell service with rojopolis spellchecker --- .github/workflows/docs.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index f7999a430..14704d969 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -12,10 +12,9 @@ jobs: runs-on: ${{ matrix.os }} steps: - - name: Spell check install - run: curl -L https://git.io/misspell | bash - - name: Spell check docs - run: bin/misspell -error docs/* + - uses: actions/checkout@master + - uses: rojopolis/spellcheck-github-actions@0.20.0 + name: Spellcheck code_docs: strategy: From bdbeb77ea64239204326662742274fd0270a5e7a Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 22 Dec 2021 15:36:19 +0000 Subject: [PATCH 106/336] Add spellcheck config file --- .spellcheck.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .spellcheck.yml diff --git a/.spellcheck.yml b/.spellcheck.yml new file mode 100644 index 000000000..5ef675ddf --- /dev/null +++ b/.spellcheck.yml @@ -0,0 +1,11 @@ +matrix: +- name: Markdown + aspell: + lang: en + dictionary: + encoding: utf-8 + pipeline: + - pyspelling.filters.markdown: + sources: + - 'docs/*.rst' + default_encoding: utf-8 From 54c08a2469d25cbf4535f1ead79708c075f7cda5 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 22 Dec 2021 16:11:44 +0000 Subject: [PATCH 107/336] Add wordlist and fix some typos --- .spellcheck.yml | 2 + .wordlist.txt | 165 +++++++++++++++++++++++++++++++++++++++++++++ README.md | 4 +- docs/submitter.rst | 6 +- docs/test.rst | 2 +- 5 files changed, 173 insertions(+), 6 deletions(-) create mode 100644 .wordlist.txt diff --git a/.spellcheck.yml b/.spellcheck.yml index 5ef675ddf..2a4b87d4c 100644 --- a/.spellcheck.yml +++ b/.spellcheck.yml @@ -3,6 +3,8 @@ matrix: aspell: lang: en dictionary: + wordlists: + - .wordlist.txt encoding: utf-8 pipeline: - pyspelling.filters.markdown: diff --git a/.wordlist.txt b/.wordlist.txt new file mode 100644 index 000000000..c329639f4 --- /dev/null +++ b/.wordlist.txt @@ -0,0 +1,165 @@ +AAI +accessionId +addObjectToDrafts +addObjectToFolder +Ajv +api +apisauce +asciicast +asciinema +Async +async +autenticate +auth +AUTHDB +automodule +autosummary +backend +baseURL +CERTREQS +CLI +conf +const +createFromJSON +createFromXML +createNewDraftFolder +createSlice +CSC +CSCfi +cscfi +css +currentSubmissionType +DAC +Dataset +destructure +ebi +EGA +ega +ENA +ena +enasequence +ENV +env +eppn +eslint +eslintrc +FhJ +fileName +folderID +followingly +FormData +formData +frontend +FWYs +genindex +Github +github +githubusercontent +GroupedBySchema +gunicorn +hostname +href +html +http +https +INITDB +inteded +io +javascript +js +JSON +json +JSONContent +JWK +Keyfile +Kubernetes +li +localhost +lt +makeStyles +maxdepth +md +metadataObjects +Metdata +middlewares +modindex +mongo +Mongodb +mongodb +mTFEFsWsNUbP +Nav +NeIC +neic +newdraft +noindex +npm +npx +objectDetails +ObjectInsideFolder +ObjectInsideFolderWithTags +ObjectStatus +ObjectSubmissionTypes +ObjectTags +objectType +ObjectTypes +OIDC +ol +OpenID +openid +overriden +phenome +pre +prettierrc +ProviderMetadata +py +PyCQA +pycqa +quickstart +readthedocs +redux +REFERER +reqs +rootReducer +rst +schemas +SDA +sda +seprately +SLLVERSION +spesified +sra +src +SSL +ssl +submissionFolder +submissionFolderSlice +submissionType +svg +TLS +tls +toctree +tox +UI +ui +uk +ul +un +url +useDispatch +UserInfo +useSelector +validator +withStyles +WizardComponents +wizardObject +WizardObjectIndex +WizardSavedObjectsList +WizardSavedObjectsListProps +WizardShowSummaryStep +WizardSteps +wizardSubmissionFolderSlice +xml +XMLFile +XSD +yaml +yml diff --git a/README.md b/README.md index 661581066..31f38133a 100644 --- a/README.md +++ b/README.md @@ -26,8 +26,8 @@ Server can then be found from `http://localhost:5430`. - Install project by running: `pip install .` in project root - Setup mongodb and env variables via desired way, details: - Server expects to find mongodb instance running, specified with following environment variables: - - `MONGO_USERNAME`, username for connecting to mondogdb instance - - `MONGO_PASSWORD`, password for connecting to mondogdb instance + - `MONGO_USERNAME`, username for connecting to mongodb instance + - `MONGO_PASSWORD`, password for connecting to mongodb instance - `MONGO_HOST`, host and port for mongodb instance (e.g. `localhost:27017`) - `MONGO_DATABASE`, If a specific database is to be used, set the name here. - `MONGO_AUTHDB`, if `MONGO_DATABASE` is set and the user doesn't exists in the database, set this to the database where the user exists (e.g. `admin`) diff --git a/docs/submitter.rst b/docs/submitter.rst index 153d0072a..5c694a643 100644 --- a/docs/submitter.rst +++ b/docs/submitter.rst @@ -96,10 +96,10 @@ For installing ``metadata-submitter`` backend do the following: .. hint:: Before running the application have MongoDB running. - MongoDB Server expects to find MongoDB instance running, spesified with following environmental variables: + MongoDB Server expects to find MongoDB instance running, specified with following environmental variables: - - ``MONGO_INITDB_ROOT_USERNAME`` (username for admin user to mondogdb instance) - - ``MONGO_INITDB_ROOT_PASSWORD`` (password for admin user to mondogdb instance) + - ``MONGO_INITDB_ROOT_USERNAME`` (username for admin user to mongodb instance) + - ``MONGO_INITDB_ROOT_PASSWORD`` (password for admin user to mongodb instance) - ``MONGO_HOST`` (host and port for MongoDB instance, e.g. `localhost:27017`) To run the backend from command line set the environment variables required and use: diff --git a/docs/test.rst b/docs/test.rst index 1eaa6107a..c8fc903a6 100644 --- a/docs/test.rst +++ b/docs/test.rst @@ -79,7 +79,7 @@ Run Jest-based tests with ``npm test``. Check code formatting and style errors w Respectively for formatting errors in ``json/yaml/css/md`` -files, use ``npm run format:check`` or ``npm run format``. Possible type errors can be checked with ``npm run flow``. -We're following recommended settings from ``eslint``, ``react`` and ``prettier`` - packages witha a couple of exceptions, +We're following recommended settings from ``eslint``, ``react`` and ``prettier`` - packages with a couple of exceptions, which can be found in ``.eslintrc`` and ``.prettierrc``. Linting, formatting and testing are also configured for you as a git pre-commit, which is recommended to use to avoid fails on CI pipeline. From ebdf188df13d80417dea2ec47b995f8dd54f654e Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Thu, 23 Dec 2021 09:28:59 +0000 Subject: [PATCH 108/336] Fixes more typos --- .wordlist.txt | 6 ------ docs/index.rst | 2 +- docs/metadata.rst | 2 +- docs/submitter.rst | 4 ++-- docs/test.rst | 4 ++-- 5 files changed, 6 insertions(+), 12 deletions(-) diff --git a/.wordlist.txt b/.wordlist.txt index c329639f4..69e52ff6f 100644 --- a/.wordlist.txt +++ b/.wordlist.txt @@ -9,7 +9,6 @@ asciicast asciinema Async async -autenticate auth AUTHDB automodule @@ -63,7 +62,6 @@ html http https INITDB -inteded io javascript js @@ -80,7 +78,6 @@ makeStyles maxdepth md metadataObjects -Metdata middlewares modindex mongo @@ -106,7 +103,6 @@ OIDC ol OpenID openid -overriden phenome pre prettierrc @@ -124,9 +120,7 @@ rst schemas SDA sda -seprately SLLVERSION -spesified sra src SSL diff --git a/docs/index.rst b/docs/index.rst index bb12b6853..23edca2a6 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -12,7 +12,7 @@ Single Page Application frontend. Metadata Submitter is divided intro :ref:`backend` and :ref:`frontend`, both of them coming together in a Single Page Application that aims to streamline working with metadata and providing a submission process through which researchers can submit and publish metadata. -The application's inteded use is with `NeIC SDA (Sensitive Data Archive) `_ stand-alone version, and it +The application's intended use is with `NeIC SDA (Sensitive Data Archive) `_ stand-alone version, and it consists out of the box includes the `ENA (European Nucleotide Archive) `_ metadata model, model which is used also by the `European Genome-phenome Archive (EGA) `_. diff --git a/docs/metadata.rst b/docs/metadata.rst index 3651c5a9e..8126ce978 100644 --- a/docs/metadata.rst +++ b/docs/metadata.rst @@ -30,7 +30,7 @@ Relationships between objects ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Each of the objects are connected between each other by references, usually in the form of an ``accessionId``. -Some of the relationships are illustrated in the Metdata ENA Model figure, however in more detail they are connected as follows: +Some of the relationships are illustrated in the Metadata ENA Model figure, however in more detail they are connected as follows: - ``Study`` - usually other objects point to it, as it represents one of the main objects of a ``Submission``; - ``Analysis`` - contains references to: diff --git a/docs/submitter.rst b/docs/submitter.rst index 5c694a643..14f70cd3e 100644 --- a/docs/submitter.rst +++ b/docs/submitter.rst @@ -79,8 +79,8 @@ the table below. +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -.. note:: If just ``MONGO_DATABASE`` is specified it will autenticate the user against it. - If just ``MONGO_AUTHDB`` is specified it will autenticate the user against it. +.. note:: If just ``MONGO_DATABASE`` is specified it will authenticate the user against it. + If just ``MONGO_AUTHDB`` is specified it will authenticate the user against it. If both ``MONGO_DATABASE`` and ``MONGO_AUTHDB`` are specified, the client will attempt to authenticate the specified user to the MONGO_AUTHDB database. If both ``MONGO_DATABASE`` and ``MONGO_AUTHDB`` are unspecified, the client will attempt to authenticate the specified user to the admin database. diff --git a/docs/test.rst b/docs/test.rst index c8fc903a6..54fa5b393 100644 --- a/docs/test.rst +++ b/docs/test.rst @@ -21,7 +21,7 @@ and `flake8 `_ (coding style guide) $ tox -p auto -To run environments seprately use: +To run environments separately use: .. code-block:: console @@ -64,7 +64,7 @@ running the following commands in the repository root directory will run differe The configuration values for running performance tests are predefined in the ``locust.conf`` file in the repository root directory. All configuration options (`as defined here `_) -can be overriden and new options can be added by either editing the current ``locust.conf`` file or running the test with additional tags, e.g.: +can be overridden and new options can be added by either editing the current ``locust.conf`` file or running the test with additional tags, e.g.: .. code-block:: console From e34bb89abc5d0b2d7bf9c87eaa95e9259370dff4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 Dec 2021 09:06:43 +0000 Subject: [PATCH 109/336] Bump jsonschema from 4.3.1 to 4.3.2 Bumps [jsonschema](https://github.com/Julian/jsonschema) from 4.3.1 to 4.3.2. - [Release notes](https://github.com/Julian/jsonschema/releases) - [Changelog](https://github.com/Julian/jsonschema/blob/main/CHANGELOG.rst) - [Commits](https://github.com/Julian/jsonschema/compare/v4.3.1...v4.3.2) --- updated-dependencies: - dependency-name: jsonschema dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index a5b8aa1af..8c9e75cb3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -40,7 +40,7 @@ idna==3.3 # via # requests # yarl -jsonschema==4.3.1 +jsonschema==4.3.2 # via -r requirements.in motor==2.5.1 # via -r requirements.in From 33cf9ea50411116dd170fcb24aef6b9ed0c851d2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 28 Dec 2021 12:48:02 +0000 Subject: [PATCH 110/336] Bump xmlschema from 1.9.1 to 1.9.2 Bumps [xmlschema](https://github.com/sissaschool/xmlschema) from 1.9.1 to 1.9.2. - [Release notes](https://github.com/sissaschool/xmlschema/releases) - [Changelog](https://github.com/sissaschool/xmlschema/blob/master/CHANGELOG.rst) - [Commits](https://github.com/sissaschool/xmlschema/compare/v1.9.1...v1.9.2) --- updated-dependencies: - dependency-name: xmlschema dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8c9e75cb3..506c37b11 100644 --- a/requirements.txt +++ b/requirements.txt @@ -68,7 +68,7 @@ urllib3==1.26.7 # via requests uvloop==0.16.0 # via -r requirements.in -xmlschema==1.9.1 +xmlschema==1.9.2 # via -r requirements.in yarl==1.7.2 # via aiohttp From 6df4a65a9f50661c18b7a7694175752313675824 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 28 Dec 2021 10:13:53 +0000 Subject: [PATCH 111/336] Refactors handlers and their tests Separated most of handlers to own files inside handlers folder. Refactored tests for new set up. Fixed imports in server.py. --- metadata_backend/api/handlers.py | 1368 ----------------- metadata_backend/api/handlers/__init__.py | 1 + metadata_backend/api/handlers/api_handlers.py | 307 ++++ .../api/handlers/folder_handler.py | 322 ++++ .../api/handlers/object_handler.py | 256 +++ .../api/handlers/submission_handler.py | 166 ++ .../api/handlers/templates_handler.py | 163 ++ metadata_backend/api/handlers/user_handler.py | 221 +++ metadata_backend/server.py | 23 +- tests/test_handlers.py | 544 ++++--- 10 files changed, 1770 insertions(+), 1601 deletions(-) delete mode 100644 metadata_backend/api/handlers.py create mode 100644 metadata_backend/api/handlers/__init__.py create mode 100644 metadata_backend/api/handlers/api_handlers.py create mode 100644 metadata_backend/api/handlers/folder_handler.py create mode 100644 metadata_backend/api/handlers/object_handler.py create mode 100644 metadata_backend/api/handlers/submission_handler.py create mode 100644 metadata_backend/api/handlers/templates_handler.py create mode 100644 metadata_backend/api/handlers/user_handler.py diff --git a/metadata_backend/api/handlers.py b/metadata_backend/api/handlers.py deleted file mode 100644 index d43ed51b0..000000000 --- a/metadata_backend/api/handlers.py +++ /dev/null @@ -1,1368 +0,0 @@ -"""Handle HTTP methods for server.""" -import json -import mimetypes -import re -from collections import Counter -from datetime import date, datetime -from distutils.util import strtobool -from math import ceil -from pathlib import Path -from typing import Any, AsyncGenerator, Dict, List, Tuple, Union, cast - -import ujson -from aiohttp import BodyPartReader, web -from aiohttp.web import Request, Response -from motor.motor_asyncio import AsyncIOMotorClient -from multidict import CIMultiDict, MultiDict, MultiDictProxy -from xmlschema import XMLSchemaException - -from ..conf.conf import aai_config, publisher, schema_types -from ..helpers.doi import DOIHandler -from ..helpers.logger import LOG -from ..helpers.parser import XMLToJSONParser -from ..helpers.schema_loader import JSONSchemaLoader, SchemaNotFoundException, XMLSchemaLoader -from ..helpers.validator import JSONValidator, XMLValidator -from .middlewares import decrypt_cookie, get_session -from .operators import FolderOperator, Operator, UserOperator, XMLOperator - - -class RESTAPIHandler: - """Handler for REST API methods.""" - - def _check_schema_exists(self, schema_type: str) -> None: - """Check if schema type exists. - - :param schema_type: schema type. - :raises: HTTPNotFound if schema does not exist. - """ - if schema_type not in schema_types.keys(): - reason = f"Specified schema {schema_type} was not found." - LOG.error(reason) - raise web.HTTPNotFound(reason=reason) - - def _get_page_param(self, req: Request, name: str, default: int) -> int: - """Handle page parameter value extracting. - - :param req: GET Request - :param param_name: Name of the parameter - :param default: Default value in case parameter not specified in request - :returns: Page parameter value - """ - try: - param = int(req.query.get(name, default)) - except ValueError: - reason = f"{name} parameter must be a number, now it is {req.query.get(name)}" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - if param < 1: - reason = f"{name} parameter must be over 0" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - return param - - async def _handle_check_ownedby_user(self, req: Request, collection: str, accession_id: str) -> bool: - """Check if object belongs to user. - - For this we need to check the object is in exactly 1 folder and we need to check - that folder belongs to a user. If the folder is published that means it can be - browsed by other users as well. - - :param req: HTTP request - :param collection: collection or schema of document - :param doc_id: document accession id - :raises: HTTPUnauthorized if accession id does not belong to user - :returns: bool - """ - db_client = req.app["db_client"] - current_user = get_session(req)["user_info"] - user_op = UserOperator(db_client) - _check = False - - if collection != "folders": - - folder_op = FolderOperator(db_client) - check, folder_id, published = await folder_op.check_object_in_folder(collection, accession_id) - - if published: - _check = True - elif check: - # if the draft object is found in folder we just need to check if the folder belongs to user - _check = await user_op.check_user_has_doc("folders", current_user, folder_id) - elif collection.startswith("template"): - # if collection is template but not found in a folder - # we also check if object is in templates of the user - # they will be here if they will not be deleted after publish - _check = await user_op.check_user_has_doc(collection, current_user, accession_id) - else: - _check = False - else: - _check = await user_op.check_user_has_doc(collection, current_user, accession_id) - - if not _check: - reason = f"The ID: {accession_id} does not belong to current user." - LOG.error(reason) - raise web.HTTPUnauthorized(reason=reason) - - return _check - - async def _get_collection_objects( - self, folder_op: AsyncIOMotorClient, collection: str, seq: List - ) -> AsyncGenerator: - """Get objects ids based on folder and collection. - - Considering that many objects will be returned good to have a generator. - - :param req: HTTP request - :param collection: collection or schema of document - :param seq: list of folders - :returns: AsyncGenerator - """ - for el in seq: - result = await folder_op.get_collection_objects(el, collection) - - yield result - - async def _handle_user_objects_collection(self, req: Request, collection: str) -> List: - """Retrieve list of objects accession ids belonging to user in collection. - - :param req: HTTP request - :param collection: collection or schema of document - :returns: List - """ - db_client = req.app["db_client"] - current_user = get_session(req)["user_info"] - user_op = UserOperator(db_client) - folder_op = FolderOperator(db_client) - - user = await user_op.read_user(current_user) - res = self._get_collection_objects(folder_op, collection, user["folders"]) - - dt = [] - async for r in res: - dt.extend(r) - - return dt - - async def _filter_by_user(self, req: Request, collection: str, seq: List) -> AsyncGenerator: - """For a list of objects check if these are owned by a user. - - This can be called using a partial from functools. - - :param req: HTTP request - :param collection: collection or schema of document - :param seq: list of folders - :returns: AsyncGenerator - """ - for el in seq: - if await self._handle_check_ownedby_user(req, collection, el["accessionId"]): - yield el - - async def _get_data(self, req: Request) -> Dict: - """Get the data content from a request. - - :param req: POST/PUT/PATCH request - :raises: HTTPBadRequest if request does not have proper JSON data - :returns: JSON content of the request - """ - try: - content = await req.json() - return content - except json.decoder.JSONDecodeError as e: - reason = "JSON is not correctly formatted." f" See: {e}" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - - async def get_schema_types(self, req: Request) -> Response: - """Get all possible metadata schema types from database. - - Basically returns which objects user can submit and query for. - :param req: GET Request - :returns: JSON list of schema types - """ - types_json = ujson.dumps([x["description"] for x in schema_types.values()], escape_forward_slashes=False) - LOG.info(f"GET schema types. Retrieved {len(schema_types)} schemas.") - return web.Response(body=types_json, status=200, content_type="application/json") - - async def get_json_schema(self, req: Request) -> Response: - """Get all JSON Schema for a specific schema type. - - Basically returns which objects user can submit and query for. - :param req: GET Request - :raises: HTTPBadRequest if request does not find the schema - :returns: JSON list of schema types - """ - schema_type = req.match_info["schema"] - self._check_schema_exists(schema_type) - - try: - schema = JSONSchemaLoader().get_schema(schema_type) - LOG.info(f"{schema_type} schema loaded.") - return web.Response( - body=ujson.dumps(schema, escape_forward_slashes=False), status=200, content_type="application/json" - ) - - except SchemaNotFoundException as error: - reason = f"{error} ({schema_type})" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - - async def _header_links(self, url: str, page: int, size: int, total_objects: int) -> CIMultiDict[str]: - """Create link header for pagination. - - :param url: base url for request - :param page: current page - :param size: results per page - :param total_objects: total objects to compute the total pages - :returns: JSON with query results - """ - total_pages = ceil(total_objects / size) - prev_link = f'<{url}?page={page-1}&per_page={size}>; rel="prev", ' if page > 1 else "" - next_link = f'<{url}?page={page+1}&per_page={size}>; rel="next", ' if page < total_pages else "" - last_link = f'<{url}?page={total_pages}&per_page={size}>; rel="last"' if page < total_pages else "" - comma = ", " if page > 1 and page < total_pages else "" - first_link = f'<{url}?page=1&per_page={size}>; rel="first"{comma}' if page > 1 else "" - links = f"{prev_link}{next_link}{first_link}{last_link}" - link_headers = CIMultiDict(Link=f"{links}") - LOG.debug("Link headers created") - return link_headers - - -class ObjectAPIHandler(RESTAPIHandler): - """API Handler for Objects.""" - - async def _handle_query(self, req: Request) -> Response: - """Handle query results. - - :param req: GET request with query parameters - :returns: JSON with query results - """ - collection = req.match_info["schema"] - req_format = req.query.get("format", "json").lower() - if req_format == "xml": - reason = "xml-formatted query results are not supported" - raise web.HTTPBadRequest(reason=reason) - - page = self._get_page_param(req, "page", 1) - per_page = self._get_page_param(req, "per_page", 10) - db_client = req.app["db_client"] - - filter_list = await self._handle_user_objects_collection(req, collection) - data, page_num, page_size, total_objects = await Operator(db_client).query_metadata_database( - collection, req.query, page, per_page, filter_list - ) - - result = ujson.dumps( - { - "page": { - "page": page_num, - "size": page_size, - "totalPages": ceil(total_objects / per_page), - "totalObjects": total_objects, - }, - "objects": data, - }, - escape_forward_slashes=False, - ) - url = f"{req.scheme}://{req.host}{req.path}" - link_headers = await self._header_links(url, page_num, per_page, total_objects) - LOG.debug(f"Pagination header links: {link_headers}") - LOG.info(f"Querying for objects in {collection} resulted in {total_objects} objects ") - return web.Response( - body=result, - status=200, - headers=link_headers, - content_type="application/json", - ) - - async def get_object(self, req: Request) -> Response: - """Get one metadata object by its accession id. - - Returns original XML object from backup if format query parameter is - set, otherwise JSON. - - :param req: GET request - :returns: JSON or XML response containing metadata object - """ - accession_id = req.match_info["accessionId"] - schema_type = req.match_info["schema"] - self._check_schema_exists(schema_type) - collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type - - req_format = req.query.get("format", "json").lower() - db_client = req.app["db_client"] - operator = XMLOperator(db_client) if req_format == "xml" else Operator(db_client) - type_collection = f"xml-{collection}" if req_format == "xml" else collection - - await operator.check_exists(collection, accession_id) - - await self._handle_check_ownedby_user(req, collection, accession_id) - - data, content_type = await operator.read_metadata_object(type_collection, accession_id) - - data = data if req_format == "xml" else ujson.dumps(data, escape_forward_slashes=False) - LOG.info(f"GET object with accesssion ID {accession_id} from schema {collection}.") - return web.Response(body=data, status=200, content_type=content_type) - - async def post_object(self, req: Request) -> Response: - """Save metadata object to database. - - For JSON request body we validate it is consistent with the - associated JSON schema. - - :param req: POST request - :returns: JSON response containing accessionId for submitted object - """ - schema_type = req.match_info["schema"] - self._check_schema_exists(schema_type) - collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type - - db_client = req.app["db_client"] - content: Union[Dict, str] - operator: Union[Operator, XMLOperator] - if req.content_type == "multipart/form-data": - files = await _extract_xml_upload(req, extract_one=True) - content, _ = files[0] - operator = XMLOperator(db_client) - else: - content = await self._get_data(req) - if not req.path.startswith("/drafts"): - JSONValidator(content, schema_type).validate - operator = Operator(db_client) - - accession_id = await operator.create_metadata_object(collection, content) - - body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) - url = f"{req.scheme}://{req.host}{req.path}" - location_headers = CIMultiDict(Location=f"{url}/{accession_id}") - LOG.info(f"POST object with accesssion ID {accession_id} in schema {collection} was successful.") - return web.Response( - body=body, - status=201, - headers=location_headers, - content_type="application/json", - ) - - async def query_objects(self, req: Request) -> Response: - """Query metadata objects from database. - - :param req: GET request with query parameters (can be empty). - :returns: Query results as JSON - """ - schema_type = req.match_info["schema"] - self._check_schema_exists(schema_type) - return await self._handle_query(req) - - async def delete_object(self, req: Request) -> Response: - """Delete metadata object from database. - - :param req: DELETE request - :raises: HTTPUnauthorized if folder published - :raises: HTTPUnprocessableEntity if object does not belong to current user - :returns: HTTPNoContent response - """ - schema_type = req.match_info["schema"] - self._check_schema_exists(schema_type) - collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type - - accession_id = req.match_info["accessionId"] - db_client = req.app["db_client"] - - await Operator(db_client).check_exists(collection, accession_id) - - await self._handle_check_ownedby_user(req, collection, accession_id) - - folder_op = FolderOperator(db_client) - exists, folder_id, published = await folder_op.check_object_in_folder(collection, accession_id) - if exists: - if published: - reason = "published objects cannot be deleted." - LOG.error(reason) - raise web.HTTPUnauthorized(reason=reason) - await folder_op.remove_object(folder_id, collection, accession_id) - else: - reason = "This object does not seem to belong to any user." - LOG.error(reason) - raise web.HTTPUnprocessableEntity(reason=reason) - - accession_id = await Operator(db_client).delete_metadata_object(collection, accession_id) - - LOG.info(f"DELETE object with accession ID {accession_id} in schema {collection} was successful.") - return web.Response(status=204) - - async def put_object(self, req: Request) -> Response: - """Replace metadata object in database. - - For JSON request we don't allow replacing in the DB. - - :param req: PUT request - :raises: HTTPUnsupportedMediaType if JSON replace is attempted - :returns: JSON response containing accessionId for submitted object - """ - schema_type = req.match_info["schema"] - accession_id = req.match_info["accessionId"] - self._check_schema_exists(schema_type) - collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type - - db_client = req.app["db_client"] - content: Union[Dict, str] - operator: Union[Operator, XMLOperator] - if req.content_type == "multipart/form-data": - files = await _extract_xml_upload(req, extract_one=True) - content, _ = files[0] - operator = XMLOperator(db_client) - else: - content = await self._get_data(req) - if not req.path.startswith("/drafts"): - reason = "Replacing objects only allowed for XML." - LOG.error(reason) - raise web.HTTPUnsupportedMediaType(reason=reason) - operator = Operator(db_client) - - await operator.check_exists(collection, accession_id) - - await self._handle_check_ownedby_user(req, collection, accession_id) - - accession_id = await operator.replace_metadata_object(collection, accession_id, content) - - body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) - LOG.info(f"PUT object with accession ID {accession_id} in schema {collection} was successful.") - return web.Response(body=body, status=200, content_type="application/json") - - async def patch_object(self, req: Request) -> Response: - """Update metadata object in database. - - We do not support patch for XML. - - :param req: PATCH request - :raises: HTTPUnauthorized if object is in published folder - :returns: JSON response containing accessionId for submitted object - """ - schema_type = req.match_info["schema"] - accession_id = req.match_info["accessionId"] - self._check_schema_exists(schema_type) - collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type - - db_client = req.app["db_client"] - operator: Union[Operator, XMLOperator] - if req.content_type == "multipart/form-data": - reason = "XML patching is not possible." - raise web.HTTPUnsupportedMediaType(reason=reason) - else: - content = await self._get_data(req) - operator = Operator(db_client) - - await operator.check_exists(collection, accession_id) - - await self._handle_check_ownedby_user(req, collection, accession_id) - - folder_op = FolderOperator(db_client) - exists, _, published = await folder_op.check_object_in_folder(collection, accession_id) - if exists: - if published: - reason = "Published objects cannot be updated." - LOG.error(reason) - raise web.HTTPUnauthorized(reason=reason) - - accession_id = await operator.update_metadata_object(collection, accession_id, content) - - body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) - LOG.info(f"PATCH object with accession ID {accession_id} in schema {collection} was successful.") - return web.Response(body=body, status=200, content_type="application/json") - - -class TemplatesAPIHandler(RESTAPIHandler): - """API Handler for Templates.""" - - async def get_template(self, req: Request) -> Response: - """Get one metadata template by its accession id. - - Returns JSON. - - :param req: GET request - :returns: JSON response containing template - """ - accession_id = req.match_info["accessionId"] - schema_type = req.match_info["schema"] - self._check_schema_exists(schema_type) - collection = f"template-{schema_type}" - - db_client = req.app["db_client"] - operator = Operator(db_client) - - await operator.check_exists(collection, accession_id) - - await self._handle_check_ownedby_user(req, collection, accession_id) - - data, content_type = await operator.read_metadata_object(collection, accession_id) - - data = ujson.dumps(data, escape_forward_slashes=False) - LOG.info(f"GET template with accesssion ID {accession_id} from schema {collection}.") - return web.Response(body=data, status=200, content_type=content_type) - - async def post_template(self, req: Request) -> Response: - """Save metadata template to database. - - For JSON request body we validate it is consistent with the - associated JSON schema. - - :param req: POST request - :returns: JSON response containing accessionId for submitted template - """ - schema_type = req.match_info["schema"] - self._check_schema_exists(schema_type) - collection = f"template-{schema_type}" - - db_client = req.app["db_client"] - content = await self._get_data(req) - - user_op = UserOperator(db_client) - current_user = get_session(req)["user_info"] - - operator = Operator(db_client) - - if isinstance(content, list): - tmpl_list = [] - for num, tmpl in enumerate(content): - if "template" not in tmpl: - reason = f"template key is missing from request body for element: {num}." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - accession_id = await operator.create_metadata_object(collection, tmpl["template"]) - data = [{"accessionId": accession_id, "schema": collection}] - if "tags" in tmpl: - data[0]["tags"] = tmpl["tags"] - await user_op.assign_objects(current_user, "templates", data) - tmpl_list.append({"accessionId": accession_id}) - - body = ujson.dumps(tmpl_list, escape_forward_slashes=False) - else: - if "template" not in content: - reason = "template key is missing from request body." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - accession_id = await operator.create_metadata_object(collection, content["template"]) - data = [{"accessionId": accession_id, "schema": collection}] - if "tags" in content: - data[0]["tags"] = content["tags"] - await user_op.assign_objects(current_user, "templates", data) - - body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) - - url = f"{req.scheme}://{req.host}{req.path}" - location_headers = CIMultiDict(Location=f"{url}/{accession_id}") - LOG.info(f"POST template with accesssion ID {accession_id} in schema {collection} was successful.") - return web.Response( - body=body, - status=201, - headers=location_headers, - content_type="application/json", - ) - - async def patch_template(self, req: Request) -> Response: - """Update metadata template in database. - - :param req: PATCH request - :raises: HTTPUnauthorized if template is in published folder - :returns: JSON response containing accessionId for submitted template - """ - schema_type = req.match_info["schema"] - accession_id = req.match_info["accessionId"] - self._check_schema_exists(schema_type) - collection = f"template-{schema_type}" - - db_client = req.app["db_client"] - operator: Union[Operator, XMLOperator] - - content = await self._get_data(req) - operator = Operator(db_client) - - await operator.check_exists(collection, accession_id) - - await self._handle_check_ownedby_user(req, collection, accession_id) - - accession_id = await operator.update_metadata_object(collection, accession_id, content) - - body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) - LOG.info(f"PATCH template with accession ID {accession_id} in schema {collection} was successful.") - return web.Response(body=body, status=200, content_type="application/json") - - async def delete_template(self, req: Request) -> Response: - """Delete metadata template from database. - - :param req: DELETE request - :raises: HTTPUnauthorized if folder published - :raises: HTTPUnprocessableEntity if template does not belong to current user - :returns: HTTPNoContent response - """ - schema_type = req.match_info["schema"] - self._check_schema_exists(schema_type) - collection = f"template-{schema_type}" - - accession_id = req.match_info["accessionId"] - db_client = req.app["db_client"] - - await Operator(db_client).check_exists(collection, accession_id) - - await self._handle_check_ownedby_user(req, collection, accession_id) - - user_op = UserOperator(db_client) - current_user = get_session(req)["user_info"] - check_user = await user_op.check_user_has_doc(collection, current_user, accession_id) - if check_user: - await user_op.remove_objects(current_user, "templates", [accession_id]) - else: - reason = "This template does not seem to belong to any user." - LOG.error(reason) - raise web.HTTPUnprocessableEntity(reason=reason) - - accession_id = await Operator(db_client).delete_metadata_object(collection, accession_id) - - LOG.info(f"DELETE template with accession ID {accession_id} in schema {collection} was successful.") - return web.Response(status=204) - - -class FolderAPIHandler(RESTAPIHandler): - """API Handler for folders.""" - - def _check_patch_folder(self, patch_ops: Any) -> None: - """Check patch operations in request are valid. - - We check that ``metadataObjects`` and ``drafts`` have ``_required_values``. - For tags we check that the ``submissionType`` takes either ``XML`` or - ``Form`` as values. - :param patch_ops: JSON patch request - :raises: HTTPBadRequest if request does not fullfil one of requirements - :raises: HTTPUnauthorized if request tries to do anything else than add or replace - :returns: None - """ - _required_paths = ["/name", "/description"] - _required_values = ["schema", "accessionId"] - _arrays = ["/metadataObjects/-", "/drafts/-", "/doiInfo"] - _tags = re.compile("^/(metadataObjects|drafts)/[0-9]*/(tags)$") - - for op in patch_ops: - if _tags.match(op["path"]): - LOG.info(f"{op['op']} on tags in folder") - if "submissionType" in op["value"].keys() and op["value"]["submissionType"] not in ["XML", "Form"]: - reason = "submissionType is restricted to either 'XML' or 'Form' values." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - pass - else: - if all(i not in op["path"] for i in _required_paths + _arrays): - reason = f"Request contains '{op['path']}' key that cannot be updated to folders." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - if op["op"] in ["remove", "copy", "test", "move"]: - reason = f"{op['op']} on {op['path']} is not allowed." - LOG.error(reason) - raise web.HTTPUnauthorized(reason=reason) - if op["op"] == "replace" and op["path"] in _arrays: - reason = f"{op['op']} on {op['path']}; replacing all objects is not allowed." - LOG.error(reason) - raise web.HTTPUnauthorized(reason=reason) - if op["path"] in _arrays and op["path"] != "/doiInfo": - _ops = op["value"] if isinstance(op["value"], list) else [op["value"]] - for item in _ops: - if not all(key in item.keys() for key in _required_values): - reason = "accessionId and schema are required fields." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - if ( - "tags" in item - and "submissionType" in item["tags"] - and item["tags"]["submissionType"] not in ["XML", "Form"] - ): - reason = "submissionType is restricted to either 'XML' or 'Form' values." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - - async def get_folders(self, req: Request) -> Response: - """Get a set of folders owned by the user with pagination values. - - :param req: GET Request - :returns: JSON list of folders available for the user - """ - page = self._get_page_param(req, "page", 1) - per_page = self._get_page_param(req, "per_page", 5) - sort = {"date": True, "score": False} - db_client = req.app["db_client"] - - user_operator = UserOperator(db_client) - current_user = get_session(req)["user_info"] - user = await user_operator.read_user(current_user) - - folder_query = {"folderId": {"$in": user["folders"]}} - # Check if only published or draft folders are requestsed - if "published" in req.query: - pub_param = req.query.get("published", "").title() - if pub_param in ["True", "False"]: - folder_query["published"] = {"$eq": bool(strtobool(pub_param))} - else: - reason = "'published' parameter must be either 'true' or 'false'" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - - if "name" in req.query: - name_param = req.query.get("name", "") - if name_param: - folder_query = {"$text": {"$search": name_param}} - sort["score"] = True - sort["date"] = False - - format_incoming = "%Y-%m-%d" - format_query = "%Y-%m-%d %H:%M:%S" - if "date_created_start" in req.query and "date_created_end" in req.query: - date_param_start = req.query.get("date_created_start", "") - date_param_end = req.query.get("date_created_end", "") - - if datetime.strptime(date_param_start, format_incoming) and datetime.strptime( - date_param_end, format_incoming - ): - query_start = datetime.strptime(date_param_start + " 00:00:00", format_query).timestamp() - query_end = datetime.strptime(date_param_end + " 23:59:59", format_query).timestamp() - folder_query["dateCreated"] = {"$gte": query_start, "$lte": query_end} - else: - reason = f"'date_created_start' and 'date_created_end' parameters must be formated as {format_incoming}" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - - if "name" in req.query and "date_created_start" in req.query: - sort["score"] = True - sort["date"] = True - - folder_operator = FolderOperator(db_client) - folders, total_folders = await folder_operator.query_folders(folder_query, page, per_page, sort) - - result = ujson.dumps( - { - "page": { - "page": page, - "size": per_page, - "totalPages": ceil(total_folders / per_page), - "totalFolders": total_folders, - }, - "folders": folders, - }, - escape_forward_slashes=False, - ) - - url = f"{req.scheme}://{req.host}{req.path}" - link_headers = await self._header_links(url, page, per_page, total_folders) - LOG.debug(f"Pagination header links: {link_headers}") - LOG.info(f"Querying for user's folders resulted in {total_folders} folders") - return web.Response( - body=result, - status=200, - headers=link_headers, - content_type="application/json", - ) - - async def post_folder(self, req: Request) -> Response: - """Save object folder to database. - - Also assigns the folder to the current user. - - :param req: POST request - :returns: JSON response containing folder ID for submitted folder - """ - db_client = req.app["db_client"] - content = await self._get_data(req) - - JSONValidator(content, "folders").validate - - operator = FolderOperator(db_client) - folder = await operator.create_folder(content) - - user_op = UserOperator(db_client) - current_user = get_session(req)["user_info"] - await user_op.assign_objects(current_user, "folders", [folder]) - - body = ujson.dumps({"folderId": folder}, escape_forward_slashes=False) - - url = f"{req.scheme}://{req.host}{req.path}" - location_headers = CIMultiDict(Location=f"{url}/{folder}") - LOG.info(f"POST new folder with ID {folder} was successful.") - return web.Response(body=body, status=201, headers=location_headers, content_type="application/json") - - async def get_folder(self, req: Request) -> Response: - """Get one object folder by its folder id. - - :param req: GET request - :raises: HTTPNotFound if folder not owned by user - :returns: JSON response containing object folder - """ - folder_id = req.match_info["folderId"] - db_client = req.app["db_client"] - operator = FolderOperator(db_client) - - await operator.check_folder_exists(folder_id) - - await self._handle_check_ownedby_user(req, "folders", folder_id) - - folder = await operator.read_folder(folder_id) - - LOG.info(f"GET folder with ID {folder_id} was successful.") - return web.Response( - body=ujson.dumps(folder, escape_forward_slashes=False), status=200, content_type="application/json" - ) - - async def patch_folder(self, req: Request) -> Response: - """Update object folder with a specific folder id. - - :param req: PATCH request - :returns: JSON response containing folder ID for updated folder - """ - folder_id = req.match_info["folderId"] - db_client = req.app["db_client"] - - operator = FolderOperator(db_client) - - await operator.check_folder_exists(folder_id) - - # Check patch operations in request are valid - patch_ops = await self._get_data(req) - self._check_patch_folder(patch_ops) - - # Validate against folders schema if DOI is being added - for op in patch_ops: - if op["path"] == "/doiInfo": - curr_folder = await operator.read_folder(folder_id) - curr_folder["doiInfo"] = op["value"] - JSONValidator(curr_folder, "folders").validate - - await self._handle_check_ownedby_user(req, "folders", folder_id) - - upd_folder = await operator.update_folder(folder_id, patch_ops if isinstance(patch_ops, list) else [patch_ops]) - - body = ujson.dumps({"folderId": upd_folder}, escape_forward_slashes=False) - LOG.info(f"PATCH folder with ID {upd_folder} was successful.") - return web.Response(body=body, status=200, content_type="application/json") - - async def publish_folder(self, req: Request) -> Response: - """Update object folder specifically into published state. - - :param req: PATCH request - :returns: JSON response containing folder ID for updated folder - """ - folder_id = req.match_info["folderId"] - db_client = req.app["db_client"] - operator = FolderOperator(db_client) - - await operator.check_folder_exists(folder_id) - - await self._handle_check_ownedby_user(req, "folders", folder_id) - - folder = await operator.read_folder(folder_id) - - obj_ops = Operator(db_client) - - # Create draft DOI and delete draft objects from the folder - doi = DOIHandler() - doi_data = await doi.create_draft_doi() - identifier = {"identifierType": "DOI", "doi": doi_data["fullDOI"]} - - for obj in folder["drafts"]: - await obj_ops.delete_metadata_object(obj["schema"], obj["accessionId"]) - - # Patch the folder into a published state - patch = [ - {"op": "replace", "path": "/published", "value": True}, - {"op": "replace", "path": "/drafts", "value": []}, - {"op": "add", "path": "/datePublished", "value": int(datetime.now().timestamp())}, - {"op": "add", "path": "/extraInfo/identifier", "value": identifier}, - {"op": "add", "path": "/extraInfo/url", "value": doi_data["dataset"]}, - {"op": "add", "path": "/extraInfo/publisher", "value": publisher}, - { - "op": "add", - "path": "/extraInfo/types", - "value": { - "ris": "DATA", - "bibtex": "misc", - "citeproc": "dataset", - "schemaOrg": "Dataset", - "resourceTypeGeneral": "Dataset", - }, - }, - {"op": "add", "path": "/extraInfo/publicationYear", "value": date.today().year}, - ] - new_folder = await operator.update_folder(folder_id, patch) - - body = ujson.dumps({"folderId": new_folder}, escape_forward_slashes=False) - LOG.info(f"Patching folder with ID {new_folder} was successful.") - return web.Response(body=body, status=200, content_type="application/json") - - async def delete_folder(self, req: Request) -> Response: - """Delete object folder from database. - - :param req: DELETE request - :returns: HTTP No Content response - """ - folder_id = req.match_info["folderId"] - db_client = req.app["db_client"] - operator = FolderOperator(db_client) - - await operator.check_folder_exists(folder_id) - await operator.check_folder_published(folder_id) - - await self._handle_check_ownedby_user(req, "folders", folder_id) - - obj_ops = Operator(db_client) - - folder = await operator.read_folder(folder_id) - - for obj in folder["drafts"] + folder["metadataObjects"]: - await obj_ops.delete_metadata_object(obj["schema"], obj["accessionId"]) - - _folder_id = await operator.delete_folder(folder_id) - - user_op = UserOperator(db_client) - current_user = get_session(req)["user_info"] - await user_op.remove_objects(current_user, "folders", [folder_id]) - - LOG.info(f"DELETE folder with ID {_folder_id} was successful.") - return web.Response(status=204) - - -class UserAPIHandler(RESTAPIHandler): - """API Handler for users.""" - - def _check_patch_user(self, patch_ops: Any) -> None: - """Check patch operations in request are valid. - - We check that ``folders`` have string values (one or a list) - and ``drafts`` have ``_required_values``. - For tags we check that the ``submissionType`` takes either ``XML`` or - ``Form`` as values. - :param patch_ops: JSON patch request - :raises: HTTPBadRequest if request does not fullfil one of requirements - :raises: HTTPUnauthorized if request tries to do anything else than add or replace - :returns: None - """ - _arrays = ["/templates/-", "/folders/-"] - _required_values = ["schema", "accessionId"] - _tags = re.compile("^/(templates)/[0-9]*/(tags)$") - for op in patch_ops: - if _tags.match(op["path"]): - LOG.info(f"{op['op']} on tags in folder") - if "submissionType" in op["value"].keys() and op["value"]["submissionType"] not in ["XML", "Form"]: - reason = "submissionType is restricted to either 'XML' or 'Form' values." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - pass - else: - if all(i not in op["path"] for i in _arrays): - reason = f"Request contains '{op['path']}' key that cannot be updated to user object" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - if op["op"] in ["remove", "copy", "test", "move", "replace"]: - reason = f"{op['op']} on {op['path']} is not allowed." - LOG.error(reason) - raise web.HTTPUnauthorized(reason=reason) - if op["path"] == "/folders/-": - if not (isinstance(op["value"], str) or isinstance(op["value"], list)): - reason = "We only accept string folder IDs." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - if op["path"] == "/templates/-": - _ops = op["value"] if isinstance(op["value"], list) else [op["value"]] - for item in _ops: - if not all(key in item.keys() for key in _required_values): - reason = "accessionId and schema are required fields." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - if ( - "tags" in item - and "submissionType" in item["tags"] - and item["tags"]["submissionType"] not in ["XML", "Form"] - ): - reason = "submissionType is restricted to either 'XML' or 'Form' values." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - - async def get_user(self, req: Request) -> Response: - """Get one user by its user ID. - - :param req: GET request - :raises: HTTPUnauthorized if not current user - :returns: JSON response containing user object or list of user templates or user folders by id - """ - user_id = req.match_info["userId"] - if user_id != "current": - LOG.info(f"User ID {user_id} was requested") - raise web.HTTPUnauthorized(reason="Only current user retrieval is allowed") - - current_user = get_session(req)["user_info"] - - item_type = req.query.get("items", "").lower() - if item_type: - # Return only list of templates or list of folder IDs owned by the user - result, link_headers = await self._get_user_items(req, current_user, item_type) - return web.Response( - body=ujson.dumps(result, escape_forward_slashes=False), - status=200, - headers=link_headers, - content_type="application/json", - ) - else: - # Return whole user object if templates or folders are not specified in query - db_client = req.app["db_client"] - operator = UserOperator(db_client) - user = await operator.read_user(current_user) - LOG.info(f"GET user with ID {user_id} was successful.") - return web.Response( - body=ujson.dumps(user, escape_forward_slashes=False), status=200, content_type="application/json" - ) - - async def patch_user(self, req: Request) -> Response: - """Update user object with a specific user ID. - - :param req: PATCH request - :raises: HTTPUnauthorized if not current user - :returns: JSON response containing user ID for updated user object - """ - user_id = req.match_info["userId"] - if user_id != "current": - LOG.info(f"User ID {user_id} patch was requested") - raise web.HTTPUnauthorized(reason="Only current user operations are allowed") - db_client = req.app["db_client"] - - patch_ops = await self._get_data(req) - self._check_patch_user(patch_ops) - - operator = UserOperator(db_client) - - current_user = get_session(req)["user_info"] - user = await operator.update_user(current_user, patch_ops if isinstance(patch_ops, list) else [patch_ops]) - - body = ujson.dumps({"userId": user}) - LOG.info(f"PATCH user with ID {user} was successful.") - return web.Response(body=body, status=200, content_type="application/json") - - async def delete_user(self, req: Request) -> Response: - """Delete user from database. - - :param req: DELETE request - :raises: HTTPUnauthorized if not current user - :returns: HTTPNoContent response - """ - user_id = req.match_info["userId"] - if user_id != "current": - LOG.info(f"User ID {user_id} delete was requested") - raise web.HTTPUnauthorized(reason="Only current user deletion is allowed") - db_client = req.app["db_client"] - operator = UserOperator(db_client) - fold_ops = FolderOperator(db_client) - obj_ops = Operator(db_client) - - current_user = get_session(req)["user_info"] - user = await operator.read_user(current_user) - - for folder_id in user["folders"]: - _folder = await fold_ops.read_folder(folder_id) - if "published" in _folder and not _folder["published"]: - for obj in _folder["drafts"] + _folder["metadataObjects"]: - await obj_ops.delete_metadata_object(obj["schema"], obj["accessionId"]) - await fold_ops.delete_folder(folder_id) - - for tmpl in user["templates"]: - await obj_ops.delete_metadata_object(tmpl["schema"], tmpl["accessionId"]) - - await operator.delete_user(current_user) - LOG.info(f"DELETE user with ID {current_user} was successful.") - - cookie = decrypt_cookie(req) - - try: - req.app["Session"].pop(cookie["id"]) - req.app["Cookies"].remove(cookie["id"]) - except KeyError: - pass - - response = web.HTTPSeeOther(f"{aai_config['redirect']}/") - response.headers["Location"] = ( - "/" if aai_config["redirect"] == aai_config["domain"] else f"{aai_config['redirect']}/" - ) - LOG.debug("Logged out user ") - raise response - - async def _get_user_items(self, req: Request, user: Dict, item_type: str) -> Tuple[Dict, CIMultiDict[str]]: - """Get draft templates owned by the user with pagination values. - - :param req: GET request - :param user: User object - :param item_type: Name of the items ("templates" or "folders") - :raises: HTTPUnauthorized if not current user - :returns: Paginated list of user draft templates and link header - """ - # Check item_type parameter is not faulty - if item_type not in ["templates", "folders"]: - reason = f"{item_type} is a faulty item parameter. Should be either folders or templates" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - - page = self._get_page_param(req, "page", 1) - per_page = self._get_page_param(req, "per_page", 5) - - db_client = req.app["db_client"] - operator = UserOperator(db_client) - user_id = req.match_info["userId"] - - query = {"userId": user} - - items, total_items = await operator.filter_user(query, item_type, page, per_page) - LOG.info(f"GET user with ID {user_id} was successful.") - - result = { - "page": { - "page": page, - "size": per_page, - "totalPages": ceil(total_items / per_page), - "total" + item_type.title(): total_items, - }, - item_type: items, - } - - url = f"{req.scheme}://{req.host}{req.path}" - link_headers = await self._header_links(url, page, per_page, total_items) - LOG.debug(f"Pagination header links: {link_headers}") - LOG.info(f"Querying for user's {item_type} resulted in {total_items} {item_type}") - return result, link_headers - - -class SubmissionAPIHandler: - """Handler for non-rest API methods.""" - - async def submit(self, req: Request) -> Response: - """Handle submission.xml containing submissions to server. - - First submission info is parsed and then for every action in submission - (add/modify/validate) corresponding operation is performed. - Finally submission info itself is added. - - :param req: Multipart POST request with submission.xml and files - :raises: HTTPBadRequest if request is missing some parameters or cannot be processed - :returns: XML-based receipt from submission - """ - files = await _extract_xml_upload(req) - schema_types = Counter(file[1] for file in files) - if "submission" not in schema_types: - reason = "There must be a submission.xml file in submission." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - if schema_types["submission"] > 1: - reason = "You should submit only one submission.xml file." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - submission_xml = files[0][0] - submission_json = XMLToJSONParser().parse("submission", submission_xml) - - # Check what actions should be performed, collect them to dictionary - actions: Dict[str, List] = {} - for action_set in submission_json["actions"]["action"]: - for action, attr in action_set.items(): - if not attr: - reason = f"""You also need to provide necessary - information for submission action. - Now {action} was provided without any - extra information.""" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - LOG.debug(f"submission has action {action}") - if attr["schema"] in actions: - set = [] - set.append(actions[attr["schema"]]) - set.append(action) - actions[attr["schema"]] = set - else: - actions[attr["schema"]] = action - - # Go through parsed files and do the actual action - results: List[Dict] = [] - db_client = req.app["db_client"] - for file in files: - content_xml = file[0] - schema_type = file[1] - if schema_type == "submission": - LOG.debug("file has schema of submission type, continuing ...") - continue # No need to use submission xml - action = actions[schema_type] - if isinstance(action, List): - for item in action: - result = await self._execute_action(schema_type, content_xml, db_client, item) - results.append(result) - else: - result = await self._execute_action(schema_type, content_xml, db_client, action) - results.append(result) - - body = ujson.dumps(results, escape_forward_slashes=False) - LOG.info(f"Processed a submission of {len(results)} actions.") - return web.Response(body=body, status=200, content_type="application/json") - - async def validate(self, req: Request) -> Response: - """Handle validating an XML file sent to endpoint. - - :param req: Multipart POST request with submission.xml and files - :returns: JSON response indicating if validation was successful or not - """ - files = await _extract_xml_upload(req, extract_one=True) - xml_content, schema_type = files[0] - validator = await self._perform_validation(schema_type, xml_content) - return web.Response(body=validator.resp_body, content_type="application/json") - - async def _perform_validation(self, schema_type: str, xml_content: str) -> XMLValidator: - """Validate an xml. - - :param schema_type: Schema type of the object to validate. - :param xml_content: Metadata object - :raises: HTTPBadRequest if schema load fails - :returns: JSON response indicating if validation was successful or not - """ - try: - schema = XMLSchemaLoader().get_schema(schema_type) - LOG.info(f"{schema_type} schema loaded.") - return XMLValidator(schema, xml_content) - - except (SchemaNotFoundException, XMLSchemaException) as error: - reason = f"{error} ({schema_type})" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - - async def _execute_action(self, schema: str, content: str, db_client: AsyncIOMotorClient, action: str) -> Dict: - """Complete the command in the action set of the submission file. - - Only "add/modify/validate" actions are supported. - - :param schema: Schema type of the object in question - :param content: Metadata object referred to in submission - :param db_client: Database client for database operations - :param action: Type of action to be done - :raises: HTTPBadRequest if an incorrect or non-supported action is called - :returns: Dict containing specific action that was completed - """ - if action == "add": - result = { - "accessionId": await XMLOperator(db_client).create_metadata_object(schema, content), - "schema": schema, - } - LOG.debug(f"added some content in {schema} ...") - return result - - elif action == "modify": - data_as_json = XMLToJSONParser().parse(schema, content) - if "accessionId" in data_as_json: - accession_id = data_as_json["accessionId"] - else: - alias = data_as_json["alias"] - query = MultiDictProxy(MultiDict([("alias", alias)])) - data, _, _, _ = await Operator(db_client).query_metadata_database(schema, query, 1, 1, []) - if len(data) > 1: - reason = "Alias in provided XML file corresponds with more than one existing metadata object." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - accession_id = data[0]["accessionId"] - data_as_json.pop("accessionId", None) - result = { - "accessionId": await Operator(db_client).update_metadata_object(schema, accession_id, data_as_json), - "schema": schema, - } - LOG.debug(f"modified some content in {schema} ...") - return result - - elif action == "validate": - validator = await self._perform_validation(schema, content) - return ujson.loads(validator.resp_body) - - else: - reason = f"Action {action} in XML is not supported." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - - -class StaticHandler: - """Handler for static routes, mostly frontend and 404.""" - - def __init__(self, frontend_static_files: Path) -> None: - """Initialize path to frontend static files folder.""" - self.path = frontend_static_files - - async def frontend(self, req: Request) -> Response: - """Serve requests related to frontend SPA. - - :param req: GET request - :returns: Response containing frontpage static file - """ - serve_path = self.path.joinpath("./" + req.path) - - if not serve_path.exists() or not serve_path.is_file(): - LOG.debug(f"{serve_path} was not found or is not a file - serving index.html") - serve_path = self.path.joinpath("./index.html") - - LOG.debug(f"Serve Frontend SPA {req.path} by {serve_path}.") - - mime_type = mimetypes.guess_type(serve_path.as_posix()) - - return Response(body=serve_path.read_bytes(), content_type=(mime_type[0] or "text/html")) - - def setup_static(self) -> Path: - """Set path for static js files and correct return mimetypes. - - :returns: Path to static js files folder - """ - mimetypes.init() - mimetypes.types_map[".js"] = "application/javascript" - mimetypes.types_map[".js.map"] = "application/json" - mimetypes.types_map[".svg"] = "image/svg+xml" - mimetypes.types_map[".css"] = "text/css" - mimetypes.types_map[".css.map"] = "application/json" - LOG.debug("static paths for SPA set.") - return self.path / "static" - - -# Private functions shared between handlers -async def _extract_xml_upload(req: Request, extract_one: bool = False) -> List[Tuple[str, str]]: - """Extract submitted xml-file(s) from multi-part request. - - Files are sorted to spesific order by their schema priorities (e.g. - submission should be processed before study). - - :param req: POST request containing "multipart/form-data" upload - :raises: HTTPBadRequest if request is not valid for multipart or multiple files sent. HTTPNotFound if - schema was not found. - :returns: content and schema type for each uploaded file, sorted by schema - type. - """ - files: List[Tuple[str, str]] = [] - try: - reader = await req.multipart() - except AssertionError: - reason = "Request does not have valid multipart/form content" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - while True: - part = await reader.next() - # Following is probably error in aiohttp type hints, fixing so - # mypy doesn't complain about it. No runtime consequences. - part = cast(BodyPartReader, part) - if not part: - break - if extract_one and files: - reason = "Only one file can be sent to this endpoint at a time." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - if part.name: - schema_type = part.name.lower() - if schema_type not in schema_types: - reason = f"Specified schema {schema_type} was not found." - LOG.error(reason) - raise web.HTTPNotFound(reason=reason) - data = [] - while True: - chunk = await part.read_chunk() - if not chunk: - break - data.append(chunk) - xml_content = "".join(x.decode("UTF-8") for x in data) - files.append((xml_content, schema_type)) - LOG.debug(f"processed file in {schema_type}") - return sorted(files, key=lambda x: schema_types[x[1]]["priority"]) diff --git a/metadata_backend/api/handlers/__init__.py b/metadata_backend/api/handlers/__init__.py new file mode 100644 index 000000000..f3b5ffee8 --- /dev/null +++ b/metadata_backend/api/handlers/__init__.py @@ -0,0 +1 @@ +"""API handlers.""" diff --git a/metadata_backend/api/handlers/api_handlers.py b/metadata_backend/api/handlers/api_handlers.py new file mode 100644 index 000000000..e50092292 --- /dev/null +++ b/metadata_backend/api/handlers/api_handlers.py @@ -0,0 +1,307 @@ +"""Handle HTTP methods for server.""" +import json +import mimetypes +from math import ceil +from pathlib import Path +from typing import AsyncGenerator, Dict, List, Tuple, cast + +import ujson +from aiohttp import BodyPartReader, web +from aiohttp.web import Request, Response +from motor.motor_asyncio import AsyncIOMotorClient +from multidict import CIMultiDict + +from ...conf.conf import schema_types +from ...helpers.logger import LOG +from ...helpers.schema_loader import JSONSchemaLoader, SchemaNotFoundException +from ..middlewares import get_session +from ..operators import FolderOperator, UserOperator + + +class RESTAPIHandler: + """Handler for REST API methods.""" + + def _check_schema_exists(self, schema_type: str) -> None: + """Check if schema type exists. + + :param schema_type: schema type. + :raises: HTTPNotFound if schema does not exist. + """ + if schema_type not in schema_types.keys(): + reason = f"Specified schema {schema_type} was not found." + LOG.error(reason) + raise web.HTTPNotFound(reason=reason) + + def _get_page_param(self, req: Request, name: str, default: int) -> int: + """Handle page parameter value extracting. + + :param req: GET Request + :param param_name: Name of the parameter + :param default: Default value in case parameter not specified in request + :returns: Page parameter value + """ + try: + param = int(req.query.get(name, default)) + except ValueError: + reason = f"{name} parameter must be a number, now it is {req.query.get(name)}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + if param < 1: + reason = f"{name} parameter must be over 0" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + return param + + async def _handle_check_ownedby_user(self, req: Request, collection: str, accession_id: str) -> bool: + """Check if object belongs to user. + + For this we need to check the object is in exactly 1 folder and we need to check + that folder belongs to a user. If the folder is published that means it can be + browsed by other users as well. + + :param req: HTTP request + :param collection: collection or schema of document + :param doc_id: document accession id + :raises: HTTPUnauthorized if accession id does not belong to user + :returns: bool + """ + db_client = req.app["db_client"] + current_user = get_session(req)["user_info"] + user_op = UserOperator(db_client) + _check = False + + if collection != "folders": + + folder_op = FolderOperator(db_client) + check, folder_id, published = await folder_op.check_object_in_folder(collection, accession_id) + if published: + _check = True + elif check: + # if the draft object is found in folder we just need to check if the folder belongs to user + _check = await user_op.check_user_has_doc("folders", current_user, folder_id) + elif collection.startswith("template"): + # if collection is template but not found in a folder + # we also check if object is in templates of the user + # they will be here if they will not be deleted after publish + _check = await user_op.check_user_has_doc(collection, current_user, accession_id) + else: + _check = False + else: + _check = await user_op.check_user_has_doc(collection, current_user, accession_id) + + if not _check: + reason = f"The ID: {accession_id} does not belong to current user." + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + + return _check + + async def _get_collection_objects( + self, folder_op: AsyncIOMotorClient, collection: str, seq: List + ) -> AsyncGenerator: + """Get objects ids based on folder and collection. + + Considering that many objects will be returned good to have a generator. + + :param req: HTTP request + :param collection: collection or schema of document + :param seq: list of folders + :returns: AsyncGenerator + """ + for el in seq: + result = await folder_op.get_collection_objects(el, collection) + + yield result + + async def _handle_user_objects_collection(self, req: Request, collection: str) -> List: + """Retrieve list of objects accession ids belonging to user in collection. + + :param req: HTTP request + :param collection: collection or schema of document + :returns: List + """ + db_client = req.app["db_client"] + current_user = get_session(req)["user_info"] + user_op = UserOperator(db_client) + folder_op = FolderOperator(db_client) + + user = await user_op.read_user(current_user) + res = self._get_collection_objects(folder_op, collection, user["folders"]) + + dt = [] + async for r in res: + dt.extend(r) + + return dt + + async def _filter_by_user(self, req: Request, collection: str, seq: List) -> AsyncGenerator: + """For a list of objects check if these are owned by a user. + + This can be called using a partial from functools. + + :param req: HTTP request + :param collection: collection or schema of document + :param seq: list of folders + :returns: AsyncGenerator + """ + for el in seq: + if await self._handle_check_ownedby_user(req, collection, el["accessionId"]): + yield el + + async def _get_data(self, req: Request) -> Dict: + """Get the data content from a request. + + :param req: POST/PUT/PATCH request + :raises: HTTPBadRequest if request does not have proper JSON data + :returns: JSON content of the request + """ + try: + content = await req.json() + return content + except json.decoder.JSONDecodeError as e: + reason = "JSON is not correctly formatted." f" See: {e}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + async def get_schema_types(self, req: Request) -> Response: + """Get all possible metadata schema types from database. + + Basically returns which objects user can submit and query for. + :param req: GET Request + :returns: JSON list of schema types + """ + types_json = ujson.dumps([x["description"] for x in schema_types.values()], escape_forward_slashes=False) + LOG.info(f"GET schema types. Retrieved {len(schema_types)} schemas.") + return web.Response(body=types_json, status=200, content_type="application/json") + + async def get_json_schema(self, req: Request) -> Response: + """Get all JSON Schema for a specific schema type. + + Basically returns which objects user can submit and query for. + :param req: GET Request + :raises: HTTPBadRequest if request does not find the schema + :returns: JSON list of schema types + """ + schema_type = req.match_info["schema"] + self._check_schema_exists(schema_type) + + try: + schema = JSONSchemaLoader().get_schema(schema_type) + LOG.info(f"{schema_type} schema loaded.") + return web.Response( + body=ujson.dumps(schema, escape_forward_slashes=False), status=200, content_type="application/json" + ) + + except SchemaNotFoundException as error: + reason = f"{error} ({schema_type})" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + async def _header_links(self, url: str, page: int, size: int, total_objects: int) -> CIMultiDict[str]: + """Create link header for pagination. + + :param url: base url for request + :param page: current page + :param size: results per page + :param total_objects: total objects to compute the total pages + :returns: JSON with query results + """ + total_pages = ceil(total_objects / size) + prev_link = f'<{url}?page={page-1}&per_page={size}>; rel="prev", ' if page > 1 else "" + next_link = f'<{url}?page={page+1}&per_page={size}>; rel="next", ' if page < total_pages else "" + last_link = f'<{url}?page={total_pages}&per_page={size}>; rel="last"' if page < total_pages else "" + comma = ", " if page > 1 and page < total_pages else "" + first_link = f'<{url}?page=1&per_page={size}>; rel="first"{comma}' if page > 1 else "" + links = f"{prev_link}{next_link}{first_link}{last_link}" + link_headers = CIMultiDict(Link=f"{links}") + LOG.debug("Link headers created") + return link_headers + + +class StaticHandler: + """Handler for static routes, mostly frontend and 404.""" + + def __init__(self, frontend_static_files: Path) -> None: + """Initialize path to frontend static files folder.""" + self.path = frontend_static_files + + async def frontend(self, req: Request) -> Response: + """Serve requests related to frontend SPA. + + :param req: GET request + :returns: Response containing frontpage static file + """ + serve_path = self.path.joinpath("./" + req.path) + + if not serve_path.exists() or not serve_path.is_file(): + LOG.debug(f"{serve_path} was not found or is not a file - serving index.html") + serve_path = self.path.joinpath("./index.html") + + LOG.debug(f"Serve Frontend SPA {req.path} by {serve_path}.") + + mime_type = mimetypes.guess_type(serve_path.as_posix()) + + return Response(body=serve_path.read_bytes(), content_type=(mime_type[0] or "text/html")) + + def setup_static(self) -> Path: + """Set path for static js files and correct return mimetypes. + + :returns: Path to static js files folder + """ + mimetypes.init() + mimetypes.types_map[".js"] = "application/javascript" + mimetypes.types_map[".js.map"] = "application/json" + mimetypes.types_map[".svg"] = "image/svg+xml" + mimetypes.types_map[".css"] = "text/css" + mimetypes.types_map[".css.map"] = "application/json" + LOG.debug("static paths for SPA set.") + return self.path / "static" + + +# Private functions shared between handlers +async def _extract_xml_upload(req: Request, extract_one: bool = False) -> List[Tuple[str, str]]: + """Extract submitted xml-file(s) from multi-part request. + + Files are sorted to spesific order by their schema priorities (e.g. + submission should be processed before study). + + :param req: POST request containing "multipart/form-data" upload + :raises: HTTPBadRequest if request is not valid for multipart or multiple files sent. HTTPNotFound if + schema was not found. + :returns: content and schema type for each uploaded file, sorted by schema + type. + """ + files: List[Tuple[str, str]] = [] + try: + reader = await req.multipart() + except AssertionError: + reason = "Request does not have valid multipart/form content" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + while True: + part = await reader.next() + # Following is probably error in aiohttp type hints, fixing so + # mypy doesn't complain about it. No runtime consequences. + part = cast(BodyPartReader, part) + if not part: + break + if extract_one and files: + reason = "Only one file can be sent to this endpoint at a time." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + if part.name: + schema_type = part.name.lower() + if schema_type not in schema_types: + reason = f"Specified schema {schema_type} was not found." + LOG.error(reason) + raise web.HTTPNotFound(reason=reason) + data = [] + while True: + chunk = await part.read_chunk() + if not chunk: + break + data.append(chunk) + xml_content = "".join(x.decode("UTF-8") for x in data) + files.append((xml_content, schema_type)) + LOG.debug(f"processed file in {schema_type}") + return sorted(files, key=lambda x: schema_types[x[1]]["priority"]) diff --git a/metadata_backend/api/handlers/folder_handler.py b/metadata_backend/api/handlers/folder_handler.py new file mode 100644 index 000000000..799029109 --- /dev/null +++ b/metadata_backend/api/handlers/folder_handler.py @@ -0,0 +1,322 @@ +"""Handle HTTP methods for server.""" +import re +from datetime import date, datetime +from distutils.util import strtobool +from math import ceil +from typing import Any + +import ujson +from aiohttp import web +from aiohttp.web import Request, Response +from multidict import CIMultiDict + +from ...conf.conf import publisher +from ...helpers.doi import DOIHandler +from ...helpers.logger import LOG +from ...helpers.validator import JSONValidator +from .api_handlers import RESTAPIHandler +from ..middlewares import get_session +from ..operators import FolderOperator, Operator, UserOperator + + +class FolderAPIHandler(RESTAPIHandler): + """API Handler for folders.""" + + def _check_patch_folder(self, patch_ops: Any) -> None: + """Check patch operations in request are valid. + + We check that ``metadataObjects`` and ``drafts`` have ``_required_values``. + For tags we check that the ``submissionType`` takes either ``XML`` or + ``Form`` as values. + :param patch_ops: JSON patch request + :raises: HTTPBadRequest if request does not fullfil one of requirements + :raises: HTTPUnauthorized if request tries to do anything else than add or replace + :returns: None + """ + _required_paths = ["/name", "/description"] + _required_values = ["schema", "accessionId"] + _arrays = ["/metadataObjects/-", "/drafts/-", "/doiInfo"] + _tags = re.compile("^/(metadataObjects|drafts)/[0-9]*/(tags)$") + + for op in patch_ops: + if _tags.match(op["path"]): + LOG.info(f"{op['op']} on tags in folder") + if "submissionType" in op["value"].keys() and op["value"]["submissionType"] not in ["XML", "Form"]: + reason = "submissionType is restricted to either 'XML' or 'Form' values." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + pass + else: + if all(i not in op["path"] for i in _required_paths + _arrays): + reason = f"Request contains '{op['path']}' key that cannot be updated to folders." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + if op["op"] in ["remove", "copy", "test", "move"]: + reason = f"{op['op']} on {op['path']} is not allowed." + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + if op["op"] == "replace" and op["path"] in _arrays: + reason = f"{op['op']} on {op['path']}; replacing all objects is not allowed." + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + if op["path"] in _arrays and op["path"] != "/doiInfo": + _ops = op["value"] if isinstance(op["value"], list) else [op["value"]] + for item in _ops: + if not all(key in item.keys() for key in _required_values): + reason = "accessionId and schema are required fields." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + if ( + "tags" in item + and "submissionType" in item["tags"] + and item["tags"]["submissionType"] not in ["XML", "Form"] + ): + reason = "submissionType is restricted to either 'XML' or 'Form' values." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + async def get_folders(self, req: Request) -> Response: + """Get a set of folders owned by the user with pagination values. + + :param req: GET Request + :returns: JSON list of folders available for the user + """ + page = self._get_page_param(req, "page", 1) + per_page = self._get_page_param(req, "per_page", 5) + sort = {"date": True, "score": False} + db_client = req.app["db_client"] + + user_operator = UserOperator(db_client) + current_user = get_session(req)["user_info"] + user = await user_operator.read_user(current_user) + + folder_query = {"folderId": {"$in": user["folders"]}} + # Check if only published or draft folders are requestsed + if "published" in req.query: + pub_param = req.query.get("published", "").title() + if pub_param in ["True", "False"]: + folder_query["published"] = {"$eq": bool(strtobool(pub_param))} + else: + reason = "'published' parameter must be either 'true' or 'false'" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + if "name" in req.query: + name_param = req.query.get("name", "") + if name_param: + folder_query = {"$text": {"$search": name_param}} + sort["score"] = True + sort["date"] = False + + format_incoming = "%Y-%m-%d" + format_query = "%Y-%m-%d %H:%M:%S" + if "date_created_start" in req.query and "date_created_end" in req.query: + date_param_start = req.query.get("date_created_start", "") + date_param_end = req.query.get("date_created_end", "") + + if datetime.strptime(date_param_start, format_incoming) and datetime.strptime( + date_param_end, format_incoming + ): + query_start = datetime.strptime(date_param_start + " 00:00:00", format_query).timestamp() + query_end = datetime.strptime(date_param_end + " 23:59:59", format_query).timestamp() + folder_query["dateCreated"] = {"$gte": query_start, "$lte": query_end} + else: + reason = f"'date_created_start' and 'date_created_end' parameters must be formated as {format_incoming}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + if "name" in req.query and "date_created_start" in req.query: + sort["score"] = True + sort["date"] = True + + folder_operator = FolderOperator(db_client) + folders, total_folders = await folder_operator.query_folders(folder_query, page, per_page, sort) + + result = ujson.dumps( + { + "page": { + "page": page, + "size": per_page, + "totalPages": ceil(total_folders / per_page), + "totalFolders": total_folders, + }, + "folders": folders, + }, + escape_forward_slashes=False, + ) + + url = f"{req.scheme}://{req.host}{req.path}" + link_headers = await self._header_links(url, page, per_page, total_folders) + LOG.debug(f"Pagination header links: {link_headers}") + LOG.info(f"Querying for user's folders resulted in {total_folders} folders") + return web.Response( + body=result, + status=200, + headers=link_headers, + content_type="application/json", + ) + + async def post_folder(self, req: Request) -> Response: + """Save object folder to database. + + Also assigns the folder to the current user. + + :param req: POST request + :returns: JSON response containing folder ID for submitted folder + """ + db_client = req.app["db_client"] + content = await self._get_data(req) + + JSONValidator(content, "folders").validate + + operator = FolderOperator(db_client) + folder = await operator.create_folder(content) + + user_op = UserOperator(db_client) + current_user = get_session(req)["user_info"] + await user_op.assign_objects(current_user, "folders", [folder]) + + body = ujson.dumps({"folderId": folder}, escape_forward_slashes=False) + + url = f"{req.scheme}://{req.host}{req.path}" + location_headers = CIMultiDict(Location=f"{url}/{folder}") + LOG.info(f"POST new folder with ID {folder} was successful.") + return web.Response(body=body, status=201, headers=location_headers, content_type="application/json") + + async def get_folder(self, req: Request) -> Response: + """Get one object folder by its folder id. + + :param req: GET request + :raises: HTTPNotFound if folder not owned by user + :returns: JSON response containing object folder + """ + folder_id = req.match_info["folderId"] + db_client = req.app["db_client"] + operator = FolderOperator(db_client) + + await operator.check_folder_exists(folder_id) + + await self._handle_check_ownedby_user(req, "folders", folder_id) + + folder = await operator.read_folder(folder_id) + + LOG.info(f"GET folder with ID {folder_id} was successful.") + return web.Response( + body=ujson.dumps(folder, escape_forward_slashes=False), status=200, content_type="application/json" + ) + + async def patch_folder(self, req: Request) -> Response: + """Update object folder with a specific folder id. + + :param req: PATCH request + :returns: JSON response containing folder ID for updated folder + """ + folder_id = req.match_info["folderId"] + db_client = req.app["db_client"] + + operator = FolderOperator(db_client) + + await operator.check_folder_exists(folder_id) + + # Check patch operations in request are valid + patch_ops = await self._get_data(req) + self._check_patch_folder(patch_ops) + + # Validate against folders schema if DOI is being added + for op in patch_ops: + if op["path"] == "/doiInfo": + curr_folder = await operator.read_folder(folder_id) + curr_folder["doiInfo"] = op["value"] + JSONValidator(curr_folder, "folders").validate + + await self._handle_check_ownedby_user(req, "folders", folder_id) + + upd_folder = await operator.update_folder(folder_id, patch_ops if isinstance(patch_ops, list) else [patch_ops]) + + body = ujson.dumps({"folderId": upd_folder}, escape_forward_slashes=False) + LOG.info(f"PATCH folder with ID {upd_folder} was successful.") + return web.Response(body=body, status=200, content_type="application/json") + + async def publish_folder(self, req: Request) -> Response: + """Update object folder specifically into published state. + + :param req: PATCH request + :returns: JSON response containing folder ID for updated folder + """ + folder_id = req.match_info["folderId"] + db_client = req.app["db_client"] + operator = FolderOperator(db_client) + + await operator.check_folder_exists(folder_id) + + await self._handle_check_ownedby_user(req, "folders", folder_id) + + folder = await operator.read_folder(folder_id) + + obj_ops = Operator(db_client) + + # Create draft DOI and delete draft objects from the folder + doi = DOIHandler() + doi_data = await doi.create_draft_doi() + identifier = {"identifierType": "DOI", "doi": doi_data["fullDOI"]} + + for obj in folder["drafts"]: + await obj_ops.delete_metadata_object(obj["schema"], obj["accessionId"]) + + # Patch the folder into a published state + patch = [ + {"op": "replace", "path": "/published", "value": True}, + {"op": "replace", "path": "/drafts", "value": []}, + {"op": "add", "path": "/datePublished", "value": int(datetime.now().timestamp())}, + {"op": "add", "path": "/extraInfo/identifier", "value": identifier}, + {"op": "add", "path": "/extraInfo/url", "value": doi_data["dataset"]}, + {"op": "add", "path": "/extraInfo/publisher", "value": publisher}, + { + "op": "add", + "path": "/extraInfo/types", + "value": { + "ris": "DATA", + "bibtex": "misc", + "citeproc": "dataset", + "schemaOrg": "Dataset", + "resourceTypeGeneral": "Dataset", + }, + }, + {"op": "add", "path": "/extraInfo/publicationYear", "value": date.today().year}, + ] + new_folder = await operator.update_folder(folder_id, patch) + + body = ujson.dumps({"folderId": new_folder}, escape_forward_slashes=False) + LOG.info(f"Patching folder with ID {new_folder} was successful.") + return web.Response(body=body, status=200, content_type="application/json") + + async def delete_folder(self, req: Request) -> Response: + """Delete object folder from database. + + :param req: DELETE request + :returns: HTTP No Content response + """ + folder_id = req.match_info["folderId"] + db_client = req.app["db_client"] + operator = FolderOperator(db_client) + + await operator.check_folder_exists(folder_id) + await operator.check_folder_published(folder_id) + + await self._handle_check_ownedby_user(req, "folders", folder_id) + + obj_ops = Operator(db_client) + + folder = await operator.read_folder(folder_id) + + for obj in folder["drafts"] + folder["metadataObjects"]: + await obj_ops.delete_metadata_object(obj["schema"], obj["accessionId"]) + + _folder_id = await operator.delete_folder(folder_id) + + user_op = UserOperator(db_client) + current_user = get_session(req)["user_info"] + await user_op.remove_objects(current_user, "folders", [folder_id]) + + LOG.info(f"DELETE folder with ID {_folder_id} was successful.") + return web.Response(status=204) diff --git a/metadata_backend/api/handlers/object_handler.py b/metadata_backend/api/handlers/object_handler.py new file mode 100644 index 000000000..27b627e29 --- /dev/null +++ b/metadata_backend/api/handlers/object_handler.py @@ -0,0 +1,256 @@ +"""Handle HTTP methods for server.""" +from math import ceil +from typing import Dict, Union + +import ujson +from aiohttp import web +from aiohttp.web import Request, Response +from multidict import CIMultiDict + +from ...helpers.logger import LOG +from ...helpers.validator import JSONValidator +from .api_handlers import RESTAPIHandler, _extract_xml_upload +from ..operators import FolderOperator, Operator, XMLOperator + + +class ObjectAPIHandler(RESTAPIHandler): + """API Handler for Objects.""" + + async def _handle_query(self, req: Request) -> Response: + """Handle query results. + + :param req: GET request with query parameters + :returns: JSON with query results + """ + collection = req.match_info["schema"] + req_format = req.query.get("format", "json").lower() + if req_format == "xml": + reason = "xml-formatted query results are not supported" + raise web.HTTPBadRequest(reason=reason) + + page = self._get_page_param(req, "page", 1) + per_page = self._get_page_param(req, "per_page", 10) + db_client = req.app["db_client"] + + filter_list = await self._handle_user_objects_collection(req, collection) + data, page_num, page_size, total_objects = await Operator(db_client).query_metadata_database( + collection, req.query, page, per_page, filter_list + ) + + result = ujson.dumps( + { + "page": { + "page": page_num, + "size": page_size, + "totalPages": ceil(total_objects / per_page), + "totalObjects": total_objects, + }, + "objects": data, + }, + escape_forward_slashes=False, + ) + url = f"{req.scheme}://{req.host}{req.path}" + link_headers = await self._header_links(url, page_num, per_page, total_objects) + LOG.debug(f"Pagination header links: {link_headers}") + LOG.info(f"Querying for objects in {collection} resulted in {total_objects} objects ") + return web.Response( + body=result, + status=200, + headers=link_headers, + content_type="application/json", + ) + + async def get_object(self, req: Request) -> Response: + """Get one metadata object by its accession id. + + Returns original XML object from backup if format query parameter is + set, otherwise JSON. + + :param req: GET request + :returns: JSON or XML response containing metadata object + """ + accession_id = req.match_info["accessionId"] + schema_type = req.match_info["schema"] + self._check_schema_exists(schema_type) + collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type + + req_format = req.query.get("format", "json").lower() + db_client = req.app["db_client"] + operator = XMLOperator(db_client) if req_format == "xml" else Operator(db_client) + type_collection = f"xml-{collection}" if req_format == "xml" else collection + + await operator.check_exists(collection, accession_id) + + await self._handle_check_ownedby_user(req, collection, accession_id) + + data, content_type = await operator.read_metadata_object(type_collection, accession_id) + + data = data if req_format == "xml" else ujson.dumps(data, escape_forward_slashes=False) + LOG.info(f"GET object with accesssion ID {accession_id} from schema {collection}.") + return web.Response(body=data, status=200, content_type=content_type) + + async def post_object(self, req: Request) -> Response: + """Save metadata object to database. + + For JSON request body we validate it is consistent with the + associated JSON schema. + + :param req: POST request + :returns: JSON response containing accessionId for submitted object + """ + schema_type = req.match_info["schema"] + self._check_schema_exists(schema_type) + collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type + + db_client = req.app["db_client"] + content: Union[Dict, str] + operator: Union[Operator, XMLOperator] + if req.content_type == "multipart/form-data": + files = await _extract_xml_upload(req, extract_one=True) + content, _ = files[0] + operator = XMLOperator(db_client) + else: + content = await self._get_data(req) + if not req.path.startswith("/drafts"): + JSONValidator(content, schema_type).validate + operator = Operator(db_client) + + accession_id = await operator.create_metadata_object(collection, content) + + body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) + url = f"{req.scheme}://{req.host}{req.path}" + location_headers = CIMultiDict(Location=f"{url}/{accession_id}") + LOG.info(f"POST object with accesssion ID {accession_id} in schema {collection} was successful.") + return web.Response( + body=body, + status=201, + headers=location_headers, + content_type="application/json", + ) + + async def query_objects(self, req: Request) -> Response: + """Query metadata objects from database. + + :param req: GET request with query parameters (can be empty). + :returns: Query results as JSON + """ + schema_type = req.match_info["schema"] + self._check_schema_exists(schema_type) + return await self._handle_query(req) + + async def delete_object(self, req: Request) -> Response: + """Delete metadata object from database. + + :param req: DELETE request + :raises: HTTPUnauthorized if folder published + :raises: HTTPUnprocessableEntity if object does not belong to current user + :returns: HTTPNoContent response + """ + schema_type = req.match_info["schema"] + self._check_schema_exists(schema_type) + collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type + + accession_id = req.match_info["accessionId"] + db_client = req.app["db_client"] + + await Operator(db_client).check_exists(collection, accession_id) + + await self._handle_check_ownedby_user(req, collection, accession_id) + + folder_op = FolderOperator(db_client) + exists, folder_id, published = await folder_op.check_object_in_folder(collection, accession_id) + if exists: + if published: + reason = "published objects cannot be deleted." + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + await folder_op.remove_object(folder_id, collection, accession_id) + else: + reason = "This object does not seem to belong to any user." + LOG.error(reason) + raise web.HTTPUnprocessableEntity(reason=reason) + + accession_id = await Operator(db_client).delete_metadata_object(collection, accession_id) + + LOG.info(f"DELETE object with accession ID {accession_id} in schema {collection} was successful.") + return web.Response(status=204) + + async def put_object(self, req: Request) -> Response: + """Replace metadata object in database. + + For JSON request we don't allow replacing in the DB. + + :param req: PUT request + :raises: HTTPUnsupportedMediaType if JSON replace is attempted + :returns: JSON response containing accessionId for submitted object + """ + schema_type = req.match_info["schema"] + accession_id = req.match_info["accessionId"] + self._check_schema_exists(schema_type) + collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type + + db_client = req.app["db_client"] + content: Union[Dict, str] + operator: Union[Operator, XMLOperator] + if req.content_type == "multipart/form-data": + files = await _extract_xml_upload(req, extract_one=True) + content, _ = files[0] + operator = XMLOperator(db_client) + else: + content = await self._get_data(req) + if not req.path.startswith("/drafts"): + reason = "Replacing objects only allowed for XML." + LOG.error(reason) + raise web.HTTPUnsupportedMediaType(reason=reason) + operator = Operator(db_client) + + await operator.check_exists(collection, accession_id) + + await self._handle_check_ownedby_user(req, collection, accession_id) + + accession_id = await operator.replace_metadata_object(collection, accession_id, content) + + body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) + LOG.info(f"PUT object with accession ID {accession_id} in schema {collection} was successful.") + return web.Response(body=body, status=200, content_type="application/json") + + async def patch_object(self, req: Request) -> Response: + """Update metadata object in database. + + We do not support patch for XML. + + :param req: PATCH request + :raises: HTTPUnauthorized if object is in published folder + :returns: JSON response containing accessionId for submitted object + """ + schema_type = req.match_info["schema"] + accession_id = req.match_info["accessionId"] + self._check_schema_exists(schema_type) + collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type + + db_client = req.app["db_client"] + operator: Union[Operator, XMLOperator] + if req.content_type == "multipart/form-data": + reason = "XML patching is not possible." + raise web.HTTPUnsupportedMediaType(reason=reason) + else: + content = await self._get_data(req) + operator = Operator(db_client) + + await operator.check_exists(collection, accession_id) + + await self._handle_check_ownedby_user(req, collection, accession_id) + + folder_op = FolderOperator(db_client) + exists, _, published = await folder_op.check_object_in_folder(collection, accession_id) + if exists: + if published: + reason = "Published objects cannot be updated." + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + + accession_id = await operator.update_metadata_object(collection, accession_id, content) + + body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) + LOG.info(f"PATCH object with accession ID {accession_id} in schema {collection} was successful.") + return web.Response(body=body, status=200, content_type="application/json") diff --git a/metadata_backend/api/handlers/submission_handler.py b/metadata_backend/api/handlers/submission_handler.py new file mode 100644 index 000000000..d3492e4f2 --- /dev/null +++ b/metadata_backend/api/handlers/submission_handler.py @@ -0,0 +1,166 @@ +"""Handle HTTP methods for server.""" +from collections import Counter +from typing import Dict, List + +import ujson +from aiohttp import web +from aiohttp.web import Request, Response +from motor.motor_asyncio import AsyncIOMotorClient +from multidict import MultiDict, MultiDictProxy +from xmlschema import XMLSchemaException + +from ...helpers.logger import LOG +from ...helpers.parser import XMLToJSONParser +from ...helpers.schema_loader import SchemaNotFoundException, XMLSchemaLoader +from ...helpers.validator import XMLValidator +from .api_handlers import _extract_xml_upload +from ..operators import Operator, XMLOperator + + +class SubmissionAPIHandler: + """Handler for non-rest API methods.""" + + async def submit(self, req: Request) -> Response: + """Handle submission.xml containing submissions to server. + + First submission info is parsed and then for every action in submission + (add/modify/validate) corresponding operation is performed. + Finally submission info itself is added. + + :param req: Multipart POST request with submission.xml and files + :raises: HTTPBadRequest if request is missing some parameters or cannot be processed + :returns: XML-based receipt from submission + """ + files = await _extract_xml_upload(req) + schema_types = Counter(file[1] for file in files) + if "submission" not in schema_types: + reason = "There must be a submission.xml file in submission." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + if schema_types["submission"] > 1: + reason = "You should submit only one submission.xml file." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + submission_xml = files[0][0] + submission_json = XMLToJSONParser().parse("submission", submission_xml) + + # Check what actions should be performed, collect them to dictionary + actions: Dict[str, List] = {} + for action_set in submission_json["actions"]["action"]: + for action, attr in action_set.items(): + if not attr: + reason = f"""You also need to provide necessary + information for submission action. + Now {action} was provided without any + extra information.""" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + LOG.debug(f"submission has action {action}") + if attr["schema"] in actions: + set = [] + set.append(actions[attr["schema"]]) + set.append(action) + actions[attr["schema"]] = set + else: + actions[attr["schema"]] = action + + # Go through parsed files and do the actual action + results: List[Dict] = [] + db_client = req.app["db_client"] + for file in files: + content_xml = file[0] + schema_type = file[1] + if schema_type == "submission": + LOG.debug("file has schema of submission type, continuing ...") + continue # No need to use submission xml + action = actions[schema_type] + if isinstance(action, List): + for item in action: + result = await self._execute_action(schema_type, content_xml, db_client, item) + results.append(result) + else: + result = await self._execute_action(schema_type, content_xml, db_client, action) + results.append(result) + + body = ujson.dumps(results, escape_forward_slashes=False) + LOG.info(f"Processed a submission of {len(results)} actions.") + return web.Response(body=body, status=200, content_type="application/json") + + async def validate(self, req: Request) -> Response: + """Handle validating an XML file sent to endpoint. + + :param req: Multipart POST request with submission.xml and files + :returns: JSON response indicating if validation was successful or not + """ + files = await _extract_xml_upload(req, extract_one=True) + xml_content, schema_type = files[0] + validator = await self._perform_validation(schema_type, xml_content) + return web.Response(body=validator.resp_body, content_type="application/json") + + async def _perform_validation(self, schema_type: str, xml_content: str) -> XMLValidator: + """Validate an xml. + + :param schema_type: Schema type of the object to validate. + :param xml_content: Metadata object + :raises: HTTPBadRequest if schema load fails + :returns: JSON response indicating if validation was successful or not + """ + try: + schema = XMLSchemaLoader().get_schema(schema_type) + LOG.info(f"{schema_type} schema loaded.") + return XMLValidator(schema, xml_content) + + except (SchemaNotFoundException, XMLSchemaException) as error: + reason = f"{error} ({schema_type})" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + async def _execute_action(self, schema: str, content: str, db_client: AsyncIOMotorClient, action: str) -> Dict: + """Complete the command in the action set of the submission file. + + Only "add/modify/validate" actions are supported. + + :param schema: Schema type of the object in question + :param content: Metadata object referred to in submission + :param db_client: Database client for database operations + :param action: Type of action to be done + :raises: HTTPBadRequest if an incorrect or non-supported action is called + :returns: Dict containing specific action that was completed + """ + if action == "add": + result = { + "accessionId": await XMLOperator(db_client).create_metadata_object(schema, content), + "schema": schema, + } + LOG.debug(f"added some content in {schema} ...") + return result + + elif action == "modify": + data_as_json = XMLToJSONParser().parse(schema, content) + if "accessionId" in data_as_json: + accession_id = data_as_json["accessionId"] + else: + alias = data_as_json["alias"] + query = MultiDictProxy(MultiDict([("alias", alias)])) + data, _, _, _ = await Operator(db_client).query_metadata_database(schema, query, 1, 1, []) + if len(data) > 1: + reason = "Alias in provided XML file corresponds with more than one existing metadata object." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + accession_id = data[0]["accessionId"] + data_as_json.pop("accessionId", None) + result = { + "accessionId": await Operator(db_client).update_metadata_object(schema, accession_id, data_as_json), + "schema": schema, + } + LOG.debug(f"modified some content in {schema} ...") + return result + + elif action == "validate": + validator = await self._perform_validation(schema, content) + return ujson.loads(validator.resp_body) + + else: + reason = f"Action {action} in XML is not supported." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) diff --git a/metadata_backend/api/handlers/templates_handler.py b/metadata_backend/api/handlers/templates_handler.py new file mode 100644 index 000000000..f45413142 --- /dev/null +++ b/metadata_backend/api/handlers/templates_handler.py @@ -0,0 +1,163 @@ +"""Handle HTTP methods for server.""" +from typing import Union + +import ujson +from aiohttp import web +from aiohttp.web import Request, Response +from multidict import CIMultiDict + +from ...helpers.logger import LOG +from ..middlewares import get_session +from ..operators import Operator, UserOperator, XMLOperator +from .api_handlers import RESTAPIHandler + + +class TemplatesAPIHandler(RESTAPIHandler): + """API Handler for Templates.""" + + async def get_template(self, req: Request) -> Response: + """Get one metadata template by its accession id. + + Returns JSON. + + :param req: GET request + :returns: JSON response containing template + """ + accession_id = req.match_info["accessionId"] + schema_type = req.match_info["schema"] + self._check_schema_exists(schema_type) + collection = f"template-{schema_type}" + + db_client = req.app["db_client"] + operator = Operator(db_client) + + await operator.check_exists(collection, accession_id) + + await self._handle_check_ownedby_user(req, collection, accession_id) + + data, content_type = await operator.read_metadata_object(collection, accession_id) + + data = ujson.dumps(data, escape_forward_slashes=False) + LOG.info(f"GET template with accesssion ID {accession_id} from schema {collection}.") + return web.Response(body=data, status=200, content_type=content_type) + + async def post_template(self, req: Request) -> Response: + """Save metadata template to database. + + For JSON request body we validate it is consistent with the + associated JSON schema. + + :param req: POST request + :returns: JSON response containing accessionId for submitted template + """ + schema_type = req.match_info["schema"] + self._check_schema_exists(schema_type) + collection = f"template-{schema_type}" + + db_client = req.app["db_client"] + content = await self._get_data(req) + + user_op = UserOperator(db_client) + current_user = get_session(req)["user_info"] + + operator = Operator(db_client) + + if isinstance(content, list): + tmpl_list = [] + for num, tmpl in enumerate(content): + if "template" not in tmpl: + reason = f"template key is missing from request body for element: {num}." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + accession_id = await operator.create_metadata_object(collection, tmpl["template"]) + data = [{"accessionId": accession_id, "schema": collection}] + if "tags" in tmpl: + data[0]["tags"] = tmpl["tags"] + await user_op.assign_objects(current_user, "templates", data) + tmpl_list.append({"accessionId": accession_id}) + + body = ujson.dumps(tmpl_list, escape_forward_slashes=False) + else: + if "template" not in content: + reason = "template key is missing from request body." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + accession_id = await operator.create_metadata_object(collection, content["template"]) + data = [{"accessionId": accession_id, "schema": collection}] + if "tags" in content: + data[0]["tags"] = content["tags"] + await user_op.assign_objects(current_user, "templates", data) + + body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) + + url = f"{req.scheme}://{req.host}{req.path}" + location_headers = CIMultiDict(Location=f"{url}/{accession_id}") + LOG.info(f"POST template with accesssion ID {accession_id} in schema {collection} was successful.") + return web.Response( + body=body, + status=201, + headers=location_headers, + content_type="application/json", + ) + + async def patch_template(self, req: Request) -> Response: + """Update metadata template in database. + + :param req: PATCH request + :raises: HTTPUnauthorized if template is in published folder + :returns: JSON response containing accessionId for submitted template + """ + schema_type = req.match_info["schema"] + accession_id = req.match_info["accessionId"] + self._check_schema_exists(schema_type) + collection = f"template-{schema_type}" + + db_client = req.app["db_client"] + operator: Union[Operator, XMLOperator] + + content = await self._get_data(req) + operator = Operator(db_client) + + await operator.check_exists(collection, accession_id) + + await self._handle_check_ownedby_user(req, collection, accession_id) + + accession_id = await operator.update_metadata_object(collection, accession_id, content) + + body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) + LOG.info(f"PATCH template with accession ID {accession_id} in schema {collection} was successful.") + return web.Response(body=body, status=200, content_type="application/json") + + async def delete_template(self, req: Request) -> Response: + """Delete metadata template from database. + + :param req: DELETE request + :raises: HTTPUnauthorized if folder published + :raises: HTTPUnprocessableEntity if template does not belong to current user + :returns: HTTPNoContent response + """ + schema_type = req.match_info["schema"] + self._check_schema_exists(schema_type) + collection = f"template-{schema_type}" + + accession_id = req.match_info["accessionId"] + db_client = req.app["db_client"] + + await Operator(db_client).check_exists(collection, accession_id) + + await self._handle_check_ownedby_user(req, collection, accession_id) + + user_op = UserOperator(db_client) + current_user = get_session(req)["user_info"] + check_user = await user_op.check_user_has_doc(collection, current_user, accession_id) + if check_user: + await user_op.remove_objects(current_user, "templates", [accession_id]) + else: + reason = "This template does not seem to belong to any user." + LOG.error(reason) + raise web.HTTPUnprocessableEntity(reason=reason) + + accession_id = await Operator(db_client).delete_metadata_object(collection, accession_id) + + LOG.info(f"DELETE template with accession ID {accession_id} in schema {collection} was successful.") + return web.Response(status=204) diff --git a/metadata_backend/api/handlers/user_handler.py b/metadata_backend/api/handlers/user_handler.py new file mode 100644 index 000000000..cd3a2bd9b --- /dev/null +++ b/metadata_backend/api/handlers/user_handler.py @@ -0,0 +1,221 @@ +"""Handle HTTP methods for server.""" +import re +from math import ceil +from typing import Any, Dict, Tuple + +import ujson +from aiohttp import web +from aiohttp.web import Request, Response +from multidict import CIMultiDict + +from ...conf.conf import aai_config +from ...helpers.logger import LOG +from .api_handlers import RESTAPIHandler +from ..middlewares import decrypt_cookie, get_session +from ..operators import FolderOperator, Operator, UserOperator + + +class UserAPIHandler(RESTAPIHandler): + """API Handler for users.""" + + def _check_patch_user(self, patch_ops: Any) -> None: + """Check patch operations in request are valid. + + We check that ``folders`` have string values (one or a list) + and ``drafts`` have ``_required_values``. + For tags we check that the ``submissionType`` takes either ``XML`` or + ``Form`` as values. + :param patch_ops: JSON patch request + :raises: HTTPBadRequest if request does not fullfil one of requirements + :raises: HTTPUnauthorized if request tries to do anything else than add or replace + :returns: None + """ + _arrays = ["/templates/-", "/folders/-"] + _required_values = ["schema", "accessionId"] + _tags = re.compile("^/(templates)/[0-9]*/(tags)$") + for op in patch_ops: + if _tags.match(op["path"]): + LOG.info(f"{op['op']} on tags in folder") + if "submissionType" in op["value"].keys() and op["value"]["submissionType"] not in ["XML", "Form"]: + reason = "submissionType is restricted to either 'XML' or 'Form' values." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + pass + else: + if all(i not in op["path"] for i in _arrays): + reason = f"Request contains '{op['path']}' key that cannot be updated to user object" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + if op["op"] in ["remove", "copy", "test", "move", "replace"]: + reason = f"{op['op']} on {op['path']} is not allowed." + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + if op["path"] == "/folders/-": + if not (isinstance(op["value"], str) or isinstance(op["value"], list)): + reason = "We only accept string folder IDs." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + if op["path"] == "/templates/-": + _ops = op["value"] if isinstance(op["value"], list) else [op["value"]] + for item in _ops: + if not all(key in item.keys() for key in _required_values): + reason = "accessionId and schema are required fields." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + if ( + "tags" in item + and "submissionType" in item["tags"] + and item["tags"]["submissionType"] not in ["XML", "Form"] + ): + reason = "submissionType is restricted to either 'XML' or 'Form' values." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + async def get_user(self, req: Request) -> Response: + """Get one user by its user ID. + + :param req: GET request + :raises: HTTPUnauthorized if not current user + :returns: JSON response containing user object or list of user templates or user folders by id + """ + user_id = req.match_info["userId"] + if user_id != "current": + LOG.info(f"User ID {user_id} was requested") + raise web.HTTPUnauthorized(reason="Only current user retrieval is allowed") + + current_user = get_session(req)["user_info"] + + item_type = req.query.get("items", "").lower() + if item_type: + # Return only list of templates or list of folder IDs owned by the user + result, link_headers = await self._get_user_items(req, current_user, item_type) + return web.Response( + body=ujson.dumps(result, escape_forward_slashes=False), + status=200, + headers=link_headers, + content_type="application/json", + ) + else: + # Return whole user object if templates or folders are not specified in query + db_client = req.app["db_client"] + operator = UserOperator(db_client) + user = await operator.read_user(current_user) + LOG.info(f"GET user with ID {user_id} was successful.") + return web.Response( + body=ujson.dumps(user, escape_forward_slashes=False), status=200, content_type="application/json" + ) + + async def patch_user(self, req: Request) -> Response: + """Update user object with a specific user ID. + + :param req: PATCH request + :raises: HTTPUnauthorized if not current user + :returns: JSON response containing user ID for updated user object + """ + user_id = req.match_info["userId"] + if user_id != "current": + LOG.info(f"User ID {user_id} patch was requested") + raise web.HTTPUnauthorized(reason="Only current user operations are allowed") + db_client = req.app["db_client"] + + patch_ops = await self._get_data(req) + self._check_patch_user(patch_ops) + + operator = UserOperator(db_client) + + current_user = get_session(req)["user_info"] + user = await operator.update_user(current_user, patch_ops if isinstance(patch_ops, list) else [patch_ops]) + + body = ujson.dumps({"userId": user}) + LOG.info(f"PATCH user with ID {user} was successful.") + return web.Response(body=body, status=200, content_type="application/json") + + async def delete_user(self, req: Request) -> Response: + """Delete user from database. + + :param req: DELETE request + :raises: HTTPUnauthorized if not current user + :returns: HTTPNoContent response + """ + user_id = req.match_info["userId"] + if user_id != "current": + LOG.info(f"User ID {user_id} delete was requested") + raise web.HTTPUnauthorized(reason="Only current user deletion is allowed") + db_client = req.app["db_client"] + operator = UserOperator(db_client) + fold_ops = FolderOperator(db_client) + obj_ops = Operator(db_client) + + current_user = get_session(req)["user_info"] + user = await operator.read_user(current_user) + + for folder_id in user["folders"]: + _folder = await fold_ops.read_folder(folder_id) + if "published" in _folder and not _folder["published"]: + for obj in _folder["drafts"] + _folder["metadataObjects"]: + await obj_ops.delete_metadata_object(obj["schema"], obj["accessionId"]) + await fold_ops.delete_folder(folder_id) + + for tmpl in user["templates"]: + await obj_ops.delete_metadata_object(tmpl["schema"], tmpl["accessionId"]) + + await operator.delete_user(current_user) + LOG.info(f"DELETE user with ID {current_user} was successful.") + + cookie = decrypt_cookie(req) + + try: + req.app["Session"].pop(cookie["id"]) + req.app["Cookies"].remove(cookie["id"]) + except KeyError: + pass + + response = web.HTTPSeeOther(f"{aai_config['redirect']}/") + response.headers["Location"] = ( + "/" if aai_config["redirect"] == aai_config["domain"] else f"{aai_config['redirect']}/" + ) + LOG.debug("Logged out user ") + raise response + + async def _get_user_items(self, req: Request, user: Dict, item_type: str) -> Tuple[Dict, CIMultiDict[str]]: + """Get draft templates owned by the user with pagination values. + + :param req: GET request + :param user: User object + :param item_type: Name of the items ("templates" or "folders") + :raises: HTTPUnauthorized if not current user + :returns: Paginated list of user draft templates and link header + """ + # Check item_type parameter is not faulty + if item_type not in ["templates", "folders"]: + reason = f"{item_type} is a faulty item parameter. Should be either folders or templates" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + page = self._get_page_param(req, "page", 1) + per_page = self._get_page_param(req, "per_page", 5) + + db_client = req.app["db_client"] + operator = UserOperator(db_client) + user_id = req.match_info["userId"] + + query = {"userId": user} + + items, total_items = await operator.filter_user(query, item_type, page, per_page) + LOG.info(f"GET user with ID {user_id} was successful.") + + result = { + "page": { + "page": page, + "size": per_page, + "totalPages": ceil(total_items / per_page), + "total" + item_type.title(): total_items, + }, + item_type: items, + } + + url = f"{req.scheme}://{req.host}{req.path}" + link_headers = await self._header_links(url, page, per_page, total_items) + LOG.debug(f"Pagination header links: {link_headers}") + LOG.info(f"Querying for user's {item_type} resulted in {total_items} {item_type}") + return result, link_headers diff --git a/metadata_backend/server.py b/metadata_backend/server.py index c39d3364e..a600b2d06 100644 --- a/metadata_backend/server.py +++ b/metadata_backend/server.py @@ -1,26 +1,23 @@ """Functions to launch backend server.""" import asyncio +import secrets +import time import uvloop from aiohttp import web from cryptography.fernet import Fernet -import secrets -import time -from .api.handlers import ( - RESTAPIHandler, - StaticHandler, - SubmissionAPIHandler, - FolderAPIHandler, - UserAPIHandler, - ObjectAPIHandler, - TemplatesAPIHandler, -) from .api.auth import AccessHandler -from .api.middlewares import http_error_handler, check_login +from .api.handlers.api_handlers import RESTAPIHandler, StaticHandler +from .api.handlers.folder_handler import FolderAPIHandler +from .api.handlers.object_handler import ObjectAPIHandler +from .api.handlers.submission_handler import SubmissionAPIHandler +from .api.handlers.templates_handler import TemplatesAPIHandler +from .api.handlers.user_handler import UserAPIHandler from .api.health import HealthHandler -from .conf.conf import create_db_client, frontend_static_files, aai_config +from .api.middlewares import check_login, http_error_handler +from .conf.conf import aai_config, create_db_client, frontend_static_files from .helpers.logger import LOG asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) diff --git a/tests/test_handlers.py b/tests/test_handlers.py index b7d62a419..6e62bb1b1 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -4,12 +4,13 @@ from unittest.mock import patch from aiohttp import FormData -from aiohttp.test_utils import AioHTTPTestCase - +from aiohttp.test_utils import AioHTTPTestCase, make_mocked_coro +from metadata_backend.api.handlers.api_handlers import RESTAPIHandler from metadata_backend.api.middlewares import generate_cookie -from .mockups import get_request_with_fernet from metadata_backend.server import init +from .mockups import get_request_with_fernet + class HandlersTestCase(AioHTTPTestCase): """API endpoint class test cases.""" @@ -22,6 +23,14 @@ async def get_application(self): server["Session"] = {"user_info": ["value", "value"]} return server + def authenticate(self, client): + """Authenticate client.""" + request = get_request_with_fernet() + request.app["Crypt"] = client.app["Crypt"] + cookie, cookiestring = generate_cookie(request) + client.app["Session"] = {cookie["id"]: {"access_token": "mock_token_value", "user_info": {}}} + client._session.cookie_jar.update_cookies({"MTD_SESSION": cookiestring}) + async def setUpAsync(self): """Configure default values for testing and other modules. @@ -34,6 +43,7 @@ async def setUpAsync(self): self.client = await self.get_client(self.server) await self.client.start_server() + self.authenticate(self.client) self.test_ega_string = "EGA123456" self.query_accessionId = ("EDAG3991701442770179",) @@ -66,15 +76,8 @@ async def setUpAsync(self): "templates": [], "folders": ["FOL12345678"], } - self.test_draft_doi = {"fullDOI": "10.xxxx/yyyyy", "dataset": "https://doi.org/10.xxxx/yyyyy"} - class_parser = "metadata_backend.api.handlers.XMLToJSONParser" - class_operator = "metadata_backend.api.handlers.Operator" - class_xmloperator = "metadata_backend.api.handlers.XMLOperator" - class_folderoperator = "metadata_backend.api.handlers.FolderOperator" - class_useroperator = "metadata_backend.api.handlers.UserOperator" - class_doihandler = "metadata_backend.api.handlers.DOIHandler" - operator_config = { + self.operator_config = { "read_metadata_object.side_effect": self.fake_operator_read_metadata_object, "query_metadata_database.side_effect": self.fake_operator_query_metadata_object, "create_metadata_object.side_effect": self.fake_operator_create_metadata_object, @@ -82,53 +85,27 @@ async def setUpAsync(self): "update_metadata_object.side_effect": self.fake_operator_update_metadata_object, "replace_metadata_object.side_effect": self.fake_operator_replace_metadata_object, } - xmloperator_config = { + self.xmloperator_config = { "read_metadata_object.side_effect": self.fake_xmloperator_read_metadata_object, "create_metadata_object.side_effect": self.fake_xmloperator_create_metadata_object, "replace_metadata_object.side_effect": self.fake_xmloperator_replace_metadata_object, } - folderoperator_config = { + self.folderoperator_config = { "create_folder.side_effect": self.fake_folderoperator_create_folder, "read_folder.side_effect": self.fake_folderoperator_read_folder, "delete_folder.side_effect": self.fake_folderoperator_delete_folder, "check_object_in_folder.side_effect": self.fake_folderoperator_check_object, - "get_collection_objects.side_effect": self.fake_folderoperator_get_collection_objects, } - useroperator_config = { + self.useroperator_config = { "create_user.side_effect": self.fake_useroperator_create_user, "read_user.side_effect": self.fake_useroperator_read_user, "filter_user.side_effect": self.fake_useroperator_filter_user, - "check_user_has_doc.side_effect": self.fake_useroperator_user_has_folder, } - self.patch_parser = patch(class_parser, spec=True) - self.patch_operator = patch(class_operator, **operator_config, spec=True) - self.patch_xmloperator = patch(class_xmloperator, **xmloperator_config, spec=True) - self.patch_folderoperator = patch(class_folderoperator, **folderoperator_config, spec=True) - self.patch_useroperator = patch(class_useroperator, **useroperator_config, spec=True) - self.patch_doihandler = patch(class_doihandler, spec=True) - self.MockedParser = self.patch_parser.start() - self.MockedOperator = self.patch_operator.start() - self.MockedXMLOperator = self.patch_xmloperator.start() - self.MockedFolderOperator = self.patch_folderoperator.start() - self.MockedUserOperator = self.patch_useroperator.start() - self.MockedDoiHandler = self.patch_doihandler.start() - # Set up authentication - request = get_request_with_fernet() - request.app["Crypt"] = self.client.app["Crypt"] - cookie, cookiestring = generate_cookie(request) - self.client.app["Session"] = {cookie["id"]: {"access_token": "mock_token_value", "user_info": {}}} - self.client._session.cookie_jar.update_cookies({"MTD_SESSION": cookiestring}) + RESTAPIHandler._handle_check_ownedby_user = make_mocked_coro(True) async def tearDownAsync(self): """Cleanup mocked stuff.""" - self.patch_parser.stop() - self.patch_operator.stop() - self.patch_xmloperator.stop() - self.patch_folderoperator.stop() - self.patch_useroperator.stop() - self.patch_doihandler.stop() - await self.client.close() def create_submission_data(self, files): @@ -195,14 +172,6 @@ async def fake_folderoperator_check_object(self, schema_type, accession_id): data = True, self.folder_id, False return data - async def fake_folderoperator_get_collection_objects(self, schema_type, accession_id): - """Fake get collection of objects in folder.""" - return ["EDAG3991701442770179", "EGA123456"] - - async def fake_useroperator_user_has_folder(self, schema_type, user_id, folder_id): - """Fake check object in folder.""" - return True - async def fake_useroperator_create_user(self, content): """Fake user operation to return mocked userId.""" return self.user_id @@ -215,37 +184,9 @@ async def fake_useroperator_filter_user(self, query, item_type, page, per_page): """Fake read operation to return mocked user.""" return self.test_user[item_type], len(self.test_user[item_type]) - async def test_submit_endpoint_submission_does_not_fail(self): - """Test that submission with valid SUBMISSION.xml does not fail.""" - files = [("submission", "ERA521986_valid.xml")] - data = self.create_submission_data(files) - response = await self.client.post("/submit", data=data) - self.assertEqual(response.status, 200) - self.assertEqual(response.content_type, "application/json") - async def test_submit_endpoint_fails_without_submission_xml(self): - """Test that basic POST submission fails with no submission.xml. - - User should also be notified for missing file. - """ - files = [("analysis", "ERZ266973.xml")] - data = self.create_submission_data(files) - response = await self.client.post("/submit", data=data) - failure_text = "There must be a submission.xml file in submission." - self.assertEqual(response.status, 400) - self.assertIn(failure_text, await response.text()) - - async def test_submit_endpoint_fails_with_many_submission_xmls(self): - """Test submission fails when there's too many submission.xml -files. - - User should be notified for submitting too many files. - """ - files = [("submission", "ERA521986_valid.xml"), ("submission", "ERA521986_valid2.xml")] - data = self.create_submission_data(files) - response = await self.client.post("/submit", data=data) - failure_text = "You should submit only one submission.xml file." - self.assertEqual(response.status, 400) - self.assertIn(failure_text, await response.text()) +class APIHandlerTestCase(HandlersTestCase): + """Schema API endpoint class test cases.""" async def test_correct_schema_types_are_returned(self): """Test api endpoint for all schema types.""" @@ -285,6 +226,138 @@ async def test_raises_not_found_schema(self): resp_json = await response.json() self.assertEqual(resp_json["detail"], "The provided schema type could not be found. (project)") + +class SubmissionHandlerTestCase(HandlersTestCase): + """Submission API endpoint class test cases.""" + + async def setUpAsync(self): + """Configure default values for testing and other modules. + + This patches used modules and sets default return values for their + methods. + """ + + await super().setUpAsync() + class_parser = "metadata_backend.api.handlers.submission_handler.XMLToJSONParser" + self.patch_parser = patch(class_parser, spec=True) + self.MockedParser = self.patch_parser.start() + + class_xmloperator = "metadata_backend.api.handlers.submission_handler.XMLOperator" + self.patch_xmloperator = patch(class_xmloperator, **self.xmloperator_config, spec=True) + self.MockedXMLOperator = self.patch_xmloperator.start() + + async def tearDownAsync(self): + """Cleanup mocked stuff.""" + await super().tearDownAsync() + self.patch_parser.stop() + self.patch_xmloperator.stop() + + async def test_submit_endpoint_submission_does_not_fail(self): + """Test that submission with valid SUBMISSION.xml does not fail.""" + files = [("submission", "ERA521986_valid.xml")] + data = self.create_submission_data(files) + response = await self.client.post("/submit", data=data) + self.assertEqual(response.status, 200) + self.assertEqual(response.content_type, "application/json") + + async def test_submit_endpoint_fails_without_submission_xml(self): + """Test that basic POST submission fails with no submission.xml. + + User should also be notified for missing file. + """ + files = [("analysis", "ERZ266973.xml")] + data = self.create_submission_data(files) + response = await self.client.post("/submit", data=data) + failure_text = "There must be a submission.xml file in submission." + self.assertEqual(response.status, 400) + self.assertIn(failure_text, await response.text()) + + async def test_submit_endpoint_fails_with_many_submission_xmls(self): + """Test submission fails when there's too many submission.xml -files. + + User should be notified for submitting too many files. + """ + files = [("submission", "ERA521986_valid.xml"), ("submission", "ERA521986_valid2.xml")] + data = self.create_submission_data(files) + response = await self.client.post("/submit", data=data) + failure_text = "You should submit only one submission.xml file." + self.assertEqual(response.status, 400) + self.assertIn(failure_text, await response.text()) + + async def test_validation_passes_for_valid_xml(self): + """Test validation endpoint for valid xml.""" + files = [("study", "SRP000539.xml")] + data = self.create_submission_data(files) + response = await self.client.post("/validate", data=data) + self.assertEqual(response.status, 200) + self.assertIn('{"isValid":true}', await response.text()) + + async def test_validation_fails_bad_schema(self): + """Test validation fails for bad schema and valid xml.""" + files = [("fake", "SRP000539.xml")] + data = self.create_submission_data(files) + response = await self.client.post("/validate", data=data) + self.assertEqual(response.status, 404) + + async def test_validation_fails_for_invalid_xml_syntax(self): + """Test validation endpoint for XML with bad syntax.""" + files = [("study", "SRP000539_invalid.xml")] + data = self.create_submission_data(files) + response = await self.client.post("/validate", data=data) + resp_dict = await response.json() + self.assertEqual(response.status, 200) + self.assertIn("Faulty XML file was given, mismatched tag", resp_dict["detail"]["reason"]) + + async def test_validation_fails_for_invalid_xml(self): + """Test validation endpoint for invalid xml.""" + files = [("study", "SRP000539_invalid2.xml")] + data = self.create_submission_data(files) + response = await self.client.post("/validate", data=data) + resp_dict = await response.json() + self.assertEqual(response.status, 200) + self.assertIn("value must be one of", resp_dict["detail"]["reason"]) + + async def test_validation_fails_with_too_many_files(self): + """Test validation endpoint for too many files.""" + files = [("submission", "ERA521986_valid.xml"), ("submission", "ERA521986_valid2.xml")] + data = self.create_submission_data(files) + response = await self.client.post("/validate", data=data) + reason = "Only one file can be sent to this endpoint at a time." + self.assertEqual(response.status, 400) + self.assertIn(reason, await response.text()) + + +class ObjectHandlerTestCase(HandlersTestCase): + """Object API endpoint class test cases.""" + + async def setUpAsync(self): + """Configure default values for testing and other modules. + + This patches used modules and sets default return values for their + methods. + """ + + await super().setUpAsync() + + class_xmloperator = "metadata_backend.api.handlers.object_handler.XMLOperator" + self.patch_xmloperator = patch(class_xmloperator, **self.xmloperator_config, spec=True) + self.MockedXMLOperator = self.patch_xmloperator.start() + + class_operator = "metadata_backend.api.handlers.object_handler.Operator" + self.patch_operator = patch(class_operator, **self.operator_config, spec=True) + self.MockedOperator = self.patch_operator.start() + + class_folderoperator = "metadata_backend.api.handlers.object_handler.FolderOperator" + self.patch_folderoperator = patch(class_folderoperator, **self.folderoperator_config, spec=True) + self.MockedFolderOperator = self.patch_folderoperator.start() + + async def tearDownAsync(self): + """Cleanup mocked stuff.""" + await super().tearDownAsync() + self.patch_xmloperator.stop() + self.patch_folderoperator.stop() + self.patch_operator.stop() + async def test_submit_object_works(self): """Test that submission is handled, XMLOperator is called.""" files = [("study", "SRP000539.xml")] @@ -421,6 +494,7 @@ async def test_submit_object_fails_with_too_many_files(self): self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) + # handle_check_ownedby_user.return_value = True async def test_get_object(self): """Test that accessionId returns correct JSON object.""" url = f"/objects/study/{self.query_accessionId}" @@ -447,6 +521,7 @@ async def test_get_object_as_xml(self): async def test_query_is_called_and_returns_json_in_correct_format(self): """Test query method calls operator and returns mocked JSON object.""" + RESTAPIHandler._handle_user_objects_collection = make_mocked_coro(["EDAG3991701442770179", "EGA123456"]) url = f"/objects/study?studyType=foo&name=bar&page={self.page_num}" f"&per_page={self.page_size}" response = await self.client.get(url) self.assertEqual(response.status, 200) @@ -479,48 +554,6 @@ async def test_query_fails_with_xml_format(self): self.assertEqual(response.status, 400) self.assertIn("xml-formatted query results are not supported", json_resp["detail"]) - async def test_validation_passes_for_valid_xml(self): - """Test validation endpoint for valid xml.""" - files = [("study", "SRP000539.xml")] - data = self.create_submission_data(files) - response = await self.client.post("/validate", data=data) - self.assertEqual(response.status, 200) - self.assertIn('{"isValid":true}', await response.text()) - - async def test_validation_fails_bad_schema(self): - """Test validation fails for bad schema and valid xml.""" - files = [("fake", "SRP000539.xml")] - data = self.create_submission_data(files) - response = await self.client.post("/validate", data=data) - self.assertEqual(response.status, 404) - - async def test_validation_fails_for_invalid_xml_syntax(self): - """Test validation endpoint for XML with bad syntax.""" - files = [("study", "SRP000539_invalid.xml")] - data = self.create_submission_data(files) - response = await self.client.post("/validate", data=data) - resp_dict = await response.json() - self.assertEqual(response.status, 200) - self.assertIn("Faulty XML file was given, mismatched tag", resp_dict["detail"]["reason"]) - - async def test_validation_fails_for_invalid_xml(self): - """Test validation endpoint for invalid xml.""" - files = [("study", "SRP000539_invalid2.xml")] - data = self.create_submission_data(files) - response = await self.client.post("/validate", data=data) - resp_dict = await response.json() - self.assertEqual(response.status, 200) - self.assertIn("value must be one of", resp_dict["detail"]["reason"]) - - async def test_validation_fails_with_too_many_files(self): - """Test validation endpoint for too many files.""" - files = [("submission", "ERA521986_valid.xml"), ("submission", "ERA521986_valid2.xml")] - data = self.create_submission_data(files) - response = await self.client.post("/validate", data=data) - reason = "Only one file can be sent to this endpoint at a time." - self.assertEqual(response.status, 400) - self.assertIn(reason, await response.text()) - async def test_operations_fail_for_wrong_schema_type(self): """Test 404 error is raised if incorrect schema name is given.""" get_resp = await self.client.get("/objects/bad_scehma_name/some_id") @@ -557,6 +590,174 @@ async def test_query_with_invalid_pagination_params(self): get_resp = await self.client.get("/objects/study?per_page=0") self.assertEqual(get_resp.status, 400) + +class UserHandlerTestCase(HandlersTestCase): + """User API endpoint class test cases.""" + + async def setUpAsync(self): + """Configure default values for testing and other modules. + + This patches used modules and sets default return values for their + methods. + """ + + await super().setUpAsync() + class_useroperator = "metadata_backend.api.handlers.user_handler.UserOperator" + self.patch_useroperator = patch(class_useroperator, **self.useroperator_config, spec=True) + self.MockedUserOperator = self.patch_useroperator.start() + + class_folderoperator = "metadata_backend.api.handlers.user_handler.FolderOperator" + self.patch_folderoperator = patch(class_folderoperator, **self.folderoperator_config, spec=True) + self.MockedFolderOperator = self.patch_folderoperator.start() + + class_operator = "metadata_backend.api.handlers.user_handler.Operator" + self.patch_operator = patch(class_operator, **self.operator_config, spec=True) + self.MockedOperator = self.patch_operator.start() + + async def tearDownAsync(self): + """Cleanup mocked stuff.""" + await super().tearDownAsync() + self.patch_useroperator.stop() + self.patch_folderoperator.stop() + self.patch_operator.stop() + + async def test_get_user_works(self): + """Test user object is returned when correct user id is given.""" + response = await self.client.get("/users/current") + self.assertEqual(response.status, 200) + self.MockedUserOperator().read_user.assert_called_once() + json_resp = await response.json() + self.assertEqual(self.test_user, json_resp) + + async def test_get_user_drafts_with_no_drafts(self): + """Test getting user drafts when user has no drafts.""" + response = await self.client.get("/users/current?items=templates") + self.assertEqual(response.status, 200) + self.MockedUserOperator().filter_user.assert_called_once() + json_resp = await response.json() + result = { + "page": { + "page": 1, + "size": 5, + "totalPages": 0, + "totalTemplates": 0, + }, + "templates": [], + } + self.assertEqual(json_resp, result) + + async def test_get_user_templates_with_1_template(self): + """Test getting user templates when user has 1 draft.""" + user = self.test_user + user["templates"].append(self.metadata_json) + self.MockedUserOperator().filter_user.return_value = (user["templates"], 1) + response = await self.client.get("/users/current?items=templates") + self.assertEqual(response.status, 200) + self.MockedUserOperator().filter_user.assert_called_once() + json_resp = await response.json() + result = { + "page": { + "page": 1, + "size": 5, + "totalPages": 1, + "totalTemplates": 1, + }, + "templates": [self.metadata_json], + } + self.assertEqual(json_resp, result) + + async def test_get_user_folder_list(self): + """Test get user with folders url returns a folder ID.""" + self.MockedUserOperator().filter_user.return_value = (self.test_user["folders"], 1) + response = await self.client.get("/users/current?items=folders") + self.assertEqual(response.status, 200) + self.MockedUserOperator().filter_user.assert_called_once() + json_resp = await response.json() + result = { + "page": { + "page": 1, + "size": 5, + "totalPages": 1, + "totalFolders": 1, + }, + "folders": ["FOL12345678"], + } + self.assertEqual(json_resp, result) + + async def test_get_user_items_with_bad_param(self): + """Test that error is raised if items parameter in query is not templates or folders.""" + response = await self.client.get("/users/current?items=wrong_thing") + self.assertEqual(response.status, 400) + json_resp = await response.json() + self.assertEqual( + json_resp["detail"], "wrong_thing is a faulty item parameter. Should be either folders or templates" + ) + + async def test_user_deletion_is_called(self): + """Test that user object would be deleted.""" + self.MockedUserOperator().read_user.return_value = self.test_user + self.MockedUserOperator().delete_user.return_value = None + await self.client.delete("/users/current") + self.MockedUserOperator().read_user.assert_called_once() + self.MockedUserOperator().delete_user.assert_called_once() + + async def test_update_user_fails_with_wrong_key(self): + """Test that user object does not update when forbidden keys are provided.""" + data = [{"op": "add", "path": "/userId"}] + response = await self.client.patch("/users/current", json=data) + self.assertEqual(response.status, 400) + json_resp = await response.json() + reason = "Request contains '/userId' key that cannot be updated to user object" + self.assertEqual(reason, json_resp["detail"]) + + async def test_update_user_passes(self): + """Test that user object would update with correct keys.""" + self.MockedUserOperator().update_user.return_value = self.user_id + data = [{"op": "add", "path": "/templates/-", "value": [{"accessionId": "3", "schema": "sample"}]}] + response = await self.client.patch("/users/current", json=data) + self.MockedUserOperator().update_user.assert_called_once() + self.assertEqual(response.status, 200) + json_resp = await response.json() + self.assertEqual(json_resp["userId"], self.user_id) + + +class FolderHandlerTestCase(HandlersTestCase): + """Folder API endpoint class test cases.""" + + async def setUpAsync(self): + """Configure default values for testing and other modules. + + This patches used modules and sets default return values for their + methods. + """ + + await super().setUpAsync() + + self.test_draft_doi = {"fullDOI": "10.xxxx/yyyyy", "dataset": "https://doi.org/10.xxxx/yyyyy"} + class_doihandler = "metadata_backend.api.handlers.folder_handler.DOIHandler" + self.patch_doihandler = patch(class_doihandler, spec=True) + self.MockedDoiHandler = self.patch_doihandler.start() + + class_folderoperator = "metadata_backend.api.handlers.folder_handler.FolderOperator" + self.patch_folderoperator = patch(class_folderoperator, **self.folderoperator_config, spec=True) + self.MockedFolderOperator = self.patch_folderoperator.start() + + class_useroperator = "metadata_backend.api.handlers.folder_handler.UserOperator" + self.patch_useroperator = patch(class_useroperator, **self.useroperator_config, spec=True) + self.MockedUserOperator = self.patch_useroperator.start() + + class_operator = "metadata_backend.api.handlers.folder_handler.Operator" + self.patch_operator = patch(class_operator, **self.operator_config, spec=True) + self.MockedOperator = self.patch_operator.start() + + async def tearDownAsync(self): + """Cleanup mocked stuff.""" + await super().tearDownAsync() + self.patch_doihandler.stop() + self.patch_folderoperator.stop() + self.patch_useroperator.stop() + self.patch_operator.stop() + async def test_folder_creation_works(self): """Test that folder is created and folder ID returned.""" json_req = {"name": "test", "description": "test folder"} @@ -634,6 +835,8 @@ async def test_get_folders_with_bad_params(self): async def test_get_folder_works(self): """Test folder is returned when correct folder id is given.""" + # RESTAPIHandler._handle_check_ownedby_user = make_mocked_coro(True) + response = await self.client.get("/folders/FOL12345678") self.assertEqual(response.status, 200) self.MockedFolderOperator().read_folder.assert_called_once() @@ -677,102 +880,3 @@ async def test_folder_deletion_is_called(self): self.MockedFolderOperator().read_folder.assert_called_once() self.MockedFolderOperator().delete_folder.assert_called_once() self.assertEqual(response.status, 204) - - async def test_get_user_works(self): - """Test user object is returned when correct user id is given.""" - response = await self.client.get("/users/current") - self.assertEqual(response.status, 200) - self.MockedUserOperator().read_user.assert_called_once() - json_resp = await response.json() - self.assertEqual(self.test_user, json_resp) - - async def test_get_user_drafts_with_no_drafts(self): - """Test getting user drafts when user has no drafts.""" - response = await self.client.get("/users/current?items=templates") - self.assertEqual(response.status, 200) - self.MockedUserOperator().filter_user.assert_called_once() - json_resp = await response.json() - result = { - "page": { - "page": 1, - "size": 5, - "totalPages": 0, - "totalTemplates": 0, - }, - "templates": [], - } - self.assertEqual(json_resp, result) - - async def test_get_user_templates_with_1_template(self): - """Test getting user templates when user has 1 draft.""" - user = self.test_user - user["templates"].append(self.metadata_json) - self.MockedUserOperator().filter_user.return_value = (user["templates"], 1) - response = await self.client.get("/users/current?items=templates") - self.assertEqual(response.status, 200) - self.MockedUserOperator().filter_user.assert_called_once() - json_resp = await response.json() - result = { - "page": { - "page": 1, - "size": 5, - "totalPages": 1, - "totalTemplates": 1, - }, - "templates": [self.metadata_json], - } - self.assertEqual(json_resp, result) - - async def test_get_user_folder_list(self): - """Test get user with folders url returns a folder ID.""" - self.MockedUserOperator().filter_user.return_value = (self.test_user["folders"], 1) - response = await self.client.get("/users/current?items=folders") - self.assertEqual(response.status, 200) - self.MockedUserOperator().filter_user.assert_called_once() - json_resp = await response.json() - result = { - "page": { - "page": 1, - "size": 5, - "totalPages": 1, - "totalFolders": 1, - }, - "folders": ["FOL12345678"], - } - self.assertEqual(json_resp, result) - - async def test_get_user_items_with_bad_param(self): - """Test that error is raised if items parameter in query is not templates or folders.""" - response = await self.client.get("/users/current?items=wrong_thing") - self.assertEqual(response.status, 400) - json_resp = await response.json() - self.assertEqual( - json_resp["detail"], "wrong_thing is a faulty item parameter. Should be either folders or templates" - ) - - async def test_user_deletion_is_called(self): - """Test that user object would be deleted.""" - self.MockedUserOperator().read_user.return_value = self.test_user - self.MockedUserOperator().delete_user.return_value = None - await self.client.delete("/users/current") - self.MockedUserOperator().read_user.assert_called_once() - self.MockedUserOperator().delete_user.assert_called_once() - - async def test_update_user_fails_with_wrong_key(self): - """Test that user object does not update when forbidden keys are provided.""" - data = [{"op": "add", "path": "/userId"}] - response = await self.client.patch("/users/current", json=data) - self.assertEqual(response.status, 400) - json_resp = await response.json() - reason = "Request contains '/userId' key that cannot be updated to user object" - self.assertEqual(reason, json_resp["detail"]) - - async def test_update_user_passes(self): - """Test that user object would update with correct keys.""" - self.MockedUserOperator().update_user.return_value = self.user_id - data = [{"op": "add", "path": "/templates/-", "value": [{"accessionId": "3", "schema": "sample"}]}] - response = await self.client.patch("/users/current", json=data) - self.MockedUserOperator().update_user.assert_called_once() - self.assertEqual(response.status, 200) - json_resp = await response.json() - self.assertEqual(json_resp["userId"], self.user_id) From 77fe2a7dcd02ce56228898674b01af57ade64893 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 29 Dec 2021 08:55:03 +0000 Subject: [PATCH 112/336] Update naming scheme --- metadata_backend/api/handlers/common.py | 56 +++++++++++ .../handlers/{folder_handler.py => folder.py} | 2 +- .../handlers/{object_handler.py => object.py} | 7 +- .../handlers/{api_handlers.py => restapi.py} | 95 +------------------ metadata_backend/api/handlers/static.py | 47 +++++++++ .../{submission_handler.py => submission.py} | 6 +- .../{templates_handler.py => template.py} | 2 +- .../api/handlers/{user_handler.py => user.py} | 2 +- metadata_backend/server.py | 13 +-- tests/test_handlers.py | 26 ++--- 10 files changed, 135 insertions(+), 121 deletions(-) create mode 100644 metadata_backend/api/handlers/common.py rename metadata_backend/api/handlers/{folder_handler.py => folder.py} (99%) rename metadata_backend/api/handlers/{object_handler.py => object.py} (98%) rename metadata_backend/api/handlers/{api_handlers.py => restapi.py} (69%) create mode 100644 metadata_backend/api/handlers/static.py rename metadata_backend/api/handlers/{submission_handler.py => submission.py} (97%) rename metadata_backend/api/handlers/{templates_handler.py => template.py} (99%) rename metadata_backend/api/handlers/{user_handler.py => user.py} (99%) diff --git a/metadata_backend/api/handlers/common.py b/metadata_backend/api/handlers/common.py new file mode 100644 index 000000000..ff1470dd0 --- /dev/null +++ b/metadata_backend/api/handlers/common.py @@ -0,0 +1,56 @@ +"""Functions shared between handlers.""" +from typing import List, Tuple, cast + +from aiohttp import BodyPartReader, web +from aiohttp.web import Request + +from ...conf.conf import schema_types +from ...helpers.logger import LOG + + +async def extract_xml_upload(req: Request, extract_one: bool = False) -> List[Tuple[str, str]]: + """Extract submitted xml-file(s) from multi-part request. + + Files are sorted to spesific order by their schema priorities (e.g. + submission should be processed before study). + + :param req: POST request containing "multipart/form-data" upload + :raises: HTTPBadRequest if request is not valid for multipart or multiple files sent. HTTPNotFound if + schema was not found. + :returns: content and schema type for each uploaded file, sorted by schema + type. + """ + files: List[Tuple[str, str]] = [] + try: + reader = await req.multipart() + except AssertionError: + reason = "Request does not have valid multipart/form content" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + while True: + part = await reader.next() + # Following is probably error in aiohttp type hints, fixing so + # mypy doesn't complain about it. No runtime consequences. + part = cast(BodyPartReader, part) + if not part: + break + if extract_one and files: + reason = "Only one file can be sent to this endpoint at a time." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + if part.name: + schema_type = part.name.lower() + if schema_type not in schema_types: + reason = f"Specified schema {schema_type} was not found." + LOG.error(reason) + raise web.HTTPNotFound(reason=reason) + data = [] + while True: + chunk = await part.read_chunk() + if not chunk: + break + data.append(chunk) + xml_content = "".join(x.decode("UTF-8") for x in data) + files.append((xml_content, schema_type)) + LOG.debug(f"processed file in {schema_type}") + return sorted(files, key=lambda x: schema_types[x[1]]["priority"]) diff --git a/metadata_backend/api/handlers/folder_handler.py b/metadata_backend/api/handlers/folder.py similarity index 99% rename from metadata_backend/api/handlers/folder_handler.py rename to metadata_backend/api/handlers/folder.py index 799029109..cc5910fc9 100644 --- a/metadata_backend/api/handlers/folder_handler.py +++ b/metadata_backend/api/handlers/folder.py @@ -14,7 +14,7 @@ from ...helpers.doi import DOIHandler from ...helpers.logger import LOG from ...helpers.validator import JSONValidator -from .api_handlers import RESTAPIHandler +from .restapi import RESTAPIHandler from ..middlewares import get_session from ..operators import FolderOperator, Operator, UserOperator diff --git a/metadata_backend/api/handlers/object_handler.py b/metadata_backend/api/handlers/object.py similarity index 98% rename from metadata_backend/api/handlers/object_handler.py rename to metadata_backend/api/handlers/object.py index 27b627e29..486649991 100644 --- a/metadata_backend/api/handlers/object_handler.py +++ b/metadata_backend/api/handlers/object.py @@ -9,8 +9,9 @@ from ...helpers.logger import LOG from ...helpers.validator import JSONValidator -from .api_handlers import RESTAPIHandler, _extract_xml_upload from ..operators import FolderOperator, Operator, XMLOperator +from .common import extract_xml_upload +from .restapi import RESTAPIHandler class ObjectAPIHandler(RESTAPIHandler): @@ -106,7 +107,7 @@ async def post_object(self, req: Request) -> Response: content: Union[Dict, str] operator: Union[Operator, XMLOperator] if req.content_type == "multipart/form-data": - files = await _extract_xml_upload(req, extract_one=True) + files = await extract_xml_upload(req, extract_one=True) content, _ = files[0] operator = XMLOperator(db_client) else: @@ -193,7 +194,7 @@ async def put_object(self, req: Request) -> Response: content: Union[Dict, str] operator: Union[Operator, XMLOperator] if req.content_type == "multipart/form-data": - files = await _extract_xml_upload(req, extract_one=True) + files = await extract_xml_upload(req, extract_one=True) content, _ = files[0] operator = XMLOperator(db_client) else: diff --git a/metadata_backend/api/handlers/api_handlers.py b/metadata_backend/api/handlers/restapi.py similarity index 69% rename from metadata_backend/api/handlers/api_handlers.py rename to metadata_backend/api/handlers/restapi.py index e50092292..943d58742 100644 --- a/metadata_backend/api/handlers/api_handlers.py +++ b/metadata_backend/api/handlers/restapi.py @@ -1,12 +1,10 @@ """Handle HTTP methods for server.""" import json -import mimetypes from math import ceil -from pathlib import Path -from typing import AsyncGenerator, Dict, List, Tuple, cast +from typing import AsyncGenerator, Dict, List import ujson -from aiohttp import BodyPartReader, web +from aiohttp import web from aiohttp.web import Request, Response from motor.motor_asyncio import AsyncIOMotorClient from multidict import CIMultiDict @@ -216,92 +214,3 @@ async def _header_links(self, url: str, page: int, size: int, total_objects: int link_headers = CIMultiDict(Link=f"{links}") LOG.debug("Link headers created") return link_headers - - -class StaticHandler: - """Handler for static routes, mostly frontend and 404.""" - - def __init__(self, frontend_static_files: Path) -> None: - """Initialize path to frontend static files folder.""" - self.path = frontend_static_files - - async def frontend(self, req: Request) -> Response: - """Serve requests related to frontend SPA. - - :param req: GET request - :returns: Response containing frontpage static file - """ - serve_path = self.path.joinpath("./" + req.path) - - if not serve_path.exists() or not serve_path.is_file(): - LOG.debug(f"{serve_path} was not found or is not a file - serving index.html") - serve_path = self.path.joinpath("./index.html") - - LOG.debug(f"Serve Frontend SPA {req.path} by {serve_path}.") - - mime_type = mimetypes.guess_type(serve_path.as_posix()) - - return Response(body=serve_path.read_bytes(), content_type=(mime_type[0] or "text/html")) - - def setup_static(self) -> Path: - """Set path for static js files and correct return mimetypes. - - :returns: Path to static js files folder - """ - mimetypes.init() - mimetypes.types_map[".js"] = "application/javascript" - mimetypes.types_map[".js.map"] = "application/json" - mimetypes.types_map[".svg"] = "image/svg+xml" - mimetypes.types_map[".css"] = "text/css" - mimetypes.types_map[".css.map"] = "application/json" - LOG.debug("static paths for SPA set.") - return self.path / "static" - - -# Private functions shared between handlers -async def _extract_xml_upload(req: Request, extract_one: bool = False) -> List[Tuple[str, str]]: - """Extract submitted xml-file(s) from multi-part request. - - Files are sorted to spesific order by their schema priorities (e.g. - submission should be processed before study). - - :param req: POST request containing "multipart/form-data" upload - :raises: HTTPBadRequest if request is not valid for multipart or multiple files sent. HTTPNotFound if - schema was not found. - :returns: content and schema type for each uploaded file, sorted by schema - type. - """ - files: List[Tuple[str, str]] = [] - try: - reader = await req.multipart() - except AssertionError: - reason = "Request does not have valid multipart/form content" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - while True: - part = await reader.next() - # Following is probably error in aiohttp type hints, fixing so - # mypy doesn't complain about it. No runtime consequences. - part = cast(BodyPartReader, part) - if not part: - break - if extract_one and files: - reason = "Only one file can be sent to this endpoint at a time." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - if part.name: - schema_type = part.name.lower() - if schema_type not in schema_types: - reason = f"Specified schema {schema_type} was not found." - LOG.error(reason) - raise web.HTTPNotFound(reason=reason) - data = [] - while True: - chunk = await part.read_chunk() - if not chunk: - break - data.append(chunk) - xml_content = "".join(x.decode("UTF-8") for x in data) - files.append((xml_content, schema_type)) - LOG.debug(f"processed file in {schema_type}") - return sorted(files, key=lambda x: schema_types[x[1]]["priority"]) diff --git a/metadata_backend/api/handlers/static.py b/metadata_backend/api/handlers/static.py new file mode 100644 index 000000000..5f93d6fa2 --- /dev/null +++ b/metadata_backend/api/handlers/static.py @@ -0,0 +1,47 @@ +"""Handle HTTP methods for server.""" +import mimetypes +from pathlib import Path + +from aiohttp.web import Request, Response + +from ...helpers.logger import LOG + + +class StaticHandler: + """Handler for static routes, mostly frontend and 404.""" + + def __init__(self, frontend_static_files: Path) -> None: + """Initialize path to frontend static files folder.""" + self.path = frontend_static_files + + async def frontend(self, req: Request) -> Response: + """Serve requests related to frontend SPA. + + :param req: GET request + :returns: Response containing frontpage static file + """ + serve_path = self.path.joinpath("./" + req.path) + + if not serve_path.exists() or not serve_path.is_file(): + LOG.debug(f"{serve_path} was not found or is not a file - serving index.html") + serve_path = self.path.joinpath("./index.html") + + LOG.debug(f"Serve Frontend SPA {req.path} by {serve_path}.") + + mime_type = mimetypes.guess_type(serve_path.as_posix()) + + return Response(body=serve_path.read_bytes(), content_type=(mime_type[0] or "text/html")) + + def setup_static(self) -> Path: + """Set path for static js files and correct return mimetypes. + + :returns: Path to static js files folder + """ + mimetypes.init() + mimetypes.types_map[".js"] = "application/javascript" + mimetypes.types_map[".js.map"] = "application/json" + mimetypes.types_map[".svg"] = "image/svg+xml" + mimetypes.types_map[".css"] = "text/css" + mimetypes.types_map[".css.map"] = "application/json" + LOG.debug("static paths for SPA set.") + return self.path / "static" diff --git a/metadata_backend/api/handlers/submission_handler.py b/metadata_backend/api/handlers/submission.py similarity index 97% rename from metadata_backend/api/handlers/submission_handler.py rename to metadata_backend/api/handlers/submission.py index d3492e4f2..5a95fa804 100644 --- a/metadata_backend/api/handlers/submission_handler.py +++ b/metadata_backend/api/handlers/submission.py @@ -13,8 +13,8 @@ from ...helpers.parser import XMLToJSONParser from ...helpers.schema_loader import SchemaNotFoundException, XMLSchemaLoader from ...helpers.validator import XMLValidator -from .api_handlers import _extract_xml_upload from ..operators import Operator, XMLOperator +from .common import extract_xml_upload class SubmissionAPIHandler: @@ -31,7 +31,7 @@ async def submit(self, req: Request) -> Response: :raises: HTTPBadRequest if request is missing some parameters or cannot be processed :returns: XML-based receipt from submission """ - files = await _extract_xml_upload(req) + files = await extract_xml_upload(req) schema_types = Counter(file[1] for file in files) if "submission" not in schema_types: reason = "There must be a submission.xml file in submission." @@ -92,7 +92,7 @@ async def validate(self, req: Request) -> Response: :param req: Multipart POST request with submission.xml and files :returns: JSON response indicating if validation was successful or not """ - files = await _extract_xml_upload(req, extract_one=True) + files = await extract_xml_upload(req, extract_one=True) xml_content, schema_type = files[0] validator = await self._perform_validation(schema_type, xml_content) return web.Response(body=validator.resp_body, content_type="application/json") diff --git a/metadata_backend/api/handlers/templates_handler.py b/metadata_backend/api/handlers/template.py similarity index 99% rename from metadata_backend/api/handlers/templates_handler.py rename to metadata_backend/api/handlers/template.py index f45413142..c2bccc2cc 100644 --- a/metadata_backend/api/handlers/templates_handler.py +++ b/metadata_backend/api/handlers/template.py @@ -9,7 +9,7 @@ from ...helpers.logger import LOG from ..middlewares import get_session from ..operators import Operator, UserOperator, XMLOperator -from .api_handlers import RESTAPIHandler +from .restapi import RESTAPIHandler class TemplatesAPIHandler(RESTAPIHandler): diff --git a/metadata_backend/api/handlers/user_handler.py b/metadata_backend/api/handlers/user.py similarity index 99% rename from metadata_backend/api/handlers/user_handler.py rename to metadata_backend/api/handlers/user.py index cd3a2bd9b..e77ce3d3d 100644 --- a/metadata_backend/api/handlers/user_handler.py +++ b/metadata_backend/api/handlers/user.py @@ -10,7 +10,7 @@ from ...conf.conf import aai_config from ...helpers.logger import LOG -from .api_handlers import RESTAPIHandler +from .restapi import RESTAPIHandler from ..middlewares import decrypt_cookie, get_session from ..operators import FolderOperator, Operator, UserOperator diff --git a/metadata_backend/server.py b/metadata_backend/server.py index a600b2d06..8b0be93d1 100644 --- a/metadata_backend/server.py +++ b/metadata_backend/server.py @@ -9,12 +9,13 @@ from cryptography.fernet import Fernet from .api.auth import AccessHandler -from .api.handlers.api_handlers import RESTAPIHandler, StaticHandler -from .api.handlers.folder_handler import FolderAPIHandler -from .api.handlers.object_handler import ObjectAPIHandler -from .api.handlers.submission_handler import SubmissionAPIHandler -from .api.handlers.templates_handler import TemplatesAPIHandler -from .api.handlers.user_handler import UserAPIHandler +from .api.handlers.restapi import RESTAPIHandler +from .api.handlers.static import StaticHandler +from .api.handlers.folder import FolderAPIHandler +from .api.handlers.object import ObjectAPIHandler +from .api.handlers.submission import SubmissionAPIHandler +from .api.handlers.template import TemplatesAPIHandler +from .api.handlers.user import UserAPIHandler from .api.health import HealthHandler from .api.middlewares import check_login, http_error_handler from .conf.conf import aai_config, create_db_client, frontend_static_files diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 6e62bb1b1..ccc7346b3 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -5,7 +5,7 @@ from aiohttp import FormData from aiohttp.test_utils import AioHTTPTestCase, make_mocked_coro -from metadata_backend.api.handlers.api_handlers import RESTAPIHandler +from metadata_backend.api.handlers.restapi import RESTAPIHandler from metadata_backend.api.middlewares import generate_cookie from metadata_backend.server import init @@ -238,11 +238,11 @@ async def setUpAsync(self): """ await super().setUpAsync() - class_parser = "metadata_backend.api.handlers.submission_handler.XMLToJSONParser" + class_parser = "metadata_backend.api.handlers.submission.XMLToJSONParser" self.patch_parser = patch(class_parser, spec=True) self.MockedParser = self.patch_parser.start() - class_xmloperator = "metadata_backend.api.handlers.submission_handler.XMLOperator" + class_xmloperator = "metadata_backend.api.handlers.submission.XMLOperator" self.patch_xmloperator = patch(class_xmloperator, **self.xmloperator_config, spec=True) self.MockedXMLOperator = self.patch_xmloperator.start() @@ -339,15 +339,15 @@ async def setUpAsync(self): await super().setUpAsync() - class_xmloperator = "metadata_backend.api.handlers.object_handler.XMLOperator" + class_xmloperator = "metadata_backend.api.handlers.object.XMLOperator" self.patch_xmloperator = patch(class_xmloperator, **self.xmloperator_config, spec=True) self.MockedXMLOperator = self.patch_xmloperator.start() - class_operator = "metadata_backend.api.handlers.object_handler.Operator" + class_operator = "metadata_backend.api.handlers.object.Operator" self.patch_operator = patch(class_operator, **self.operator_config, spec=True) self.MockedOperator = self.patch_operator.start() - class_folderoperator = "metadata_backend.api.handlers.object_handler.FolderOperator" + class_folderoperator = "metadata_backend.api.handlers.object.FolderOperator" self.patch_folderoperator = patch(class_folderoperator, **self.folderoperator_config, spec=True) self.MockedFolderOperator = self.patch_folderoperator.start() @@ -602,15 +602,15 @@ async def setUpAsync(self): """ await super().setUpAsync() - class_useroperator = "metadata_backend.api.handlers.user_handler.UserOperator" + class_useroperator = "metadata_backend.api.handlers.user.UserOperator" self.patch_useroperator = patch(class_useroperator, **self.useroperator_config, spec=True) self.MockedUserOperator = self.patch_useroperator.start() - class_folderoperator = "metadata_backend.api.handlers.user_handler.FolderOperator" + class_folderoperator = "metadata_backend.api.handlers.user.FolderOperator" self.patch_folderoperator = patch(class_folderoperator, **self.folderoperator_config, spec=True) self.MockedFolderOperator = self.patch_folderoperator.start() - class_operator = "metadata_backend.api.handlers.user_handler.Operator" + class_operator = "metadata_backend.api.handlers.user.Operator" self.patch_operator = patch(class_operator, **self.operator_config, spec=True) self.MockedOperator = self.patch_operator.start() @@ -734,19 +734,19 @@ async def setUpAsync(self): await super().setUpAsync() self.test_draft_doi = {"fullDOI": "10.xxxx/yyyyy", "dataset": "https://doi.org/10.xxxx/yyyyy"} - class_doihandler = "metadata_backend.api.handlers.folder_handler.DOIHandler" + class_doihandler = "metadata_backend.api.handlers.folder.DOIHandler" self.patch_doihandler = patch(class_doihandler, spec=True) self.MockedDoiHandler = self.patch_doihandler.start() - class_folderoperator = "metadata_backend.api.handlers.folder_handler.FolderOperator" + class_folderoperator = "metadata_backend.api.handlers.folder.FolderOperator" self.patch_folderoperator = patch(class_folderoperator, **self.folderoperator_config, spec=True) self.MockedFolderOperator = self.patch_folderoperator.start() - class_useroperator = "metadata_backend.api.handlers.folder_handler.UserOperator" + class_useroperator = "metadata_backend.api.handlers.folder.UserOperator" self.patch_useroperator = patch(class_useroperator, **self.useroperator_config, spec=True) self.MockedUserOperator = self.patch_useroperator.start() - class_operator = "metadata_backend.api.handlers.folder_handler.Operator" + class_operator = "metadata_backend.api.handlers.folder.Operator" self.patch_operator = patch(class_operator, **self.operator_config, spec=True) self.MockedOperator = self.patch_operator.start() From 70d5a4f6080986fab46d890f1b771592c701f8d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Thu, 1 Jul 2021 20:28:56 +0300 Subject: [PATCH 113/336] simple csv parser --- metadata_backend/helpers/csv_parser.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 metadata_backend/helpers/csv_parser.py diff --git a/metadata_backend/helpers/csv_parser.py b/metadata_backend/helpers/csv_parser.py new file mode 100644 index 000000000..7deea3942 --- /dev/null +++ b/metadata_backend/helpers/csv_parser.py @@ -0,0 +1,18 @@ +import csv, json + +csvFilePath = "" +jsonFilePath = "" + +data = {} +with open(csvFilePath) as csvFile: + csvReader = csv.DictReader(csvFile) + for row in csvReader: + for key in row: + try: + row[key] = eval(row[key]) + except: + pass + data = row + +with open(jsonFilePath, 'w') as jsonFile: + jsonFile.write(json.dumps(data, indent=4)) From a0b25640f6cf8ebfe93afba8d748ab27e579e9be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Tue, 6 Jul 2021 17:36:38 +0300 Subject: [PATCH 114/336] add new parser class --- metadata_backend/helpers/parser.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/metadata_backend/helpers/parser.py b/metadata_backend/helpers/parser.py index cab1b90c0..40743d8a4 100644 --- a/metadata_backend/helpers/parser.py +++ b/metadata_backend/helpers/parser.py @@ -1,4 +1,4 @@ -"""Tool to parse XML files to JSON.""" +"""Tool to parse XML and CSV files to JSON.""" import re from typing import Any, Dict, List, Optional, Type, Union @@ -314,6 +314,20 @@ def _load_schema(schema_type: str) -> XMLSchema: return schema +class CSVToJSONParser: + """Methods to parse and convert data from CSV files to JSON format.""" + + def parse(self, schema_type: str, content: str) -> Dict: + """Parse a CSV file, convert it to JSON and validate against JSON schema. + + :param schema_type: Schema type to be used + :param content: XML content to be parsed + :returns: XML parsed to JSON + :raises: HTTPBadRequest if error was raised during validation + """ + raise web.HTTPNotImplemented() + + def jsonpatch_mongo(identifier: Dict, json_patch: List[Dict[str, Any]]) -> List: """Convert JSONpatch object to mongo query. From 0bb8bc04c9ea529777b23a4e5cdd7cfe439d2356 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Tue, 6 Jul 2021 17:57:02 +0300 Subject: [PATCH 115/336] remove original test parser --- metadata_backend/helpers/csv_parser.py | 18 ------------------ metadata_backend/helpers/parser.py | 17 ++++++++++++----- 2 files changed, 12 insertions(+), 23 deletions(-) delete mode 100644 metadata_backend/helpers/csv_parser.py diff --git a/metadata_backend/helpers/csv_parser.py b/metadata_backend/helpers/csv_parser.py deleted file mode 100644 index 7deea3942..000000000 --- a/metadata_backend/helpers/csv_parser.py +++ /dev/null @@ -1,18 +0,0 @@ -import csv, json - -csvFilePath = "" -jsonFilePath = "" - -data = {} -with open(csvFilePath) as csvFile: - csvReader = csv.DictReader(csvFile) - for row in csvReader: - for key in row: - try: - row[key] = eval(row[key]) - except: - pass - data = row - -with open(jsonFilePath, 'w') as jsonFile: - jsonFile.write(json.dumps(data, indent=4)) diff --git a/metadata_backend/helpers/parser.py b/metadata_backend/helpers/parser.py index 40743d8a4..292b365df 100644 --- a/metadata_backend/helpers/parser.py +++ b/metadata_backend/helpers/parser.py @@ -1,6 +1,7 @@ """Tool to parse XML and CSV files to JSON.""" import re +import csv from typing import Any, Dict, List, Optional, Type, Union from aiohttp import web @@ -320,13 +321,19 @@ class CSVToJSONParser: def parse(self, schema_type: str, content: str) -> Dict: """Parse a CSV file, convert it to JSON and validate against JSON schema. - :param schema_type: Schema type to be used - :param content: XML content to be parsed - :returns: XML parsed to JSON + :param schema_type: Schema type of the file to be parsed + :param content: CSV content to be parsed + :returns: CSV parsed to JSON :raises: HTTPBadRequest if error was raised during validation """ - raise web.HTTPNotImplemented() - + csv_content = csv.DictReader(content) + result: Dict + for row in csv_content: + result = row + # TODO case for multiple rows + # TODO validate result + return result + def jsonpatch_mongo(identifier: Dict, json_patch: List[Dict[str, Any]]) -> List: """Convert JSONpatch object to mongo query. From f580b0d42f8c43767871680a12952ed7af25f859 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Mon, 12 Jul 2021 09:41:16 +0300 Subject: [PATCH 116/336] add sample parsing functionality --- metadata_backend/helpers/parser.py | 34 +++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/metadata_backend/helpers/parser.py b/metadata_backend/helpers/parser.py index 292b365df..89be31dda 100644 --- a/metadata_backend/helpers/parser.py +++ b/metadata_backend/helpers/parser.py @@ -2,13 +2,14 @@ import re import csv +import io from typing import Any, Dict, List, Optional, Type, Union from aiohttp import web from xmlschema import XMLSchema, XMLSchemaConverter, XMLSchemaException, XsdElement, XsdType from .logger import LOG -from .schema_loader import SchemaNotFoundException, XMLSchemaLoader +from .schema_loader import SchemaNotFoundException, XMLSchemaLoader, JSONSchemaLoader from .validator import JSONValidator, XMLValidator from pymongo import UpdateOne @@ -324,14 +325,31 @@ def parse(self, schema_type: str, content: str) -> Dict: :param schema_type: Schema type of the file to be parsed :param content: CSV content to be parsed :returns: CSV parsed to JSON - :raises: HTTPBadRequest if error was raised during validation + :raises: HTTPBadRequest if error was raised during parsing or validation """ - csv_content = csv.DictReader(content) - result: Dict - for row in csv_content: - result = row - # TODO case for multiple rows - # TODO validate result + # Write content string into text stream for easy parsing into an object + with io.StringIO() as file: + file.write(content) + file.seek(0) + csv_reader = csv.DictReader(file) + rows = [row for row in csv_reader] + + # CSV files should contain precisely one object + if not rows: + reason = "CSV file appears to be incomplete. No rows of data were parsed." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + if len(rows) > 1: + reason = "Multi-line CSV files are currently not supported." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + result = rows[0] + # This is required to pass validation against current sample schema + if schema_type == "sample" and "sampleName" not in result: + result["sampleName"] = '{"taxonId": 0}' + result["sampleName"] = eval(result["sampleName"]) # workaround for mypy complaint + JSONValidator(result, schema_type.lower()).validate return result From ce8473b822642ceaf5b31955a796dae488ac9cd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Mon, 12 Jul 2021 09:41:38 +0300 Subject: [PATCH 117/336] add sample test file --- tests/test_files/sample/EGAformat.csv | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 tests/test_files/sample/EGAformat.csv diff --git a/tests/test_files/sample/EGAformat.csv b/tests/test_files/sample/EGAformat.csv new file mode 100644 index 000000000..34aa281ab --- /dev/null +++ b/tests/test_files/sample/EGAformat.csv @@ -0,0 +1,2 @@ +title,alias,description,subjectId,bioSampleId,caseOrControl,gender,organismPart,cellLine,region,phenotype +test sample,test1,A test sample metadata CSV file based on the EGA submitter portal format,123456789abc,,,unknown,,,,some disease state From ed18db331fb4c1f29afaf32a1eb1a415e974353b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Mon, 12 Jul 2021 09:42:13 +0300 Subject: [PATCH 118/336] add parser unit tests --- tests/test_parser.py | 65 ++++++++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 20 deletions(-) diff --git a/tests/test_parser.py b/tests/test_parser.py index 3ce80b8b7..cf35a0c8d 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -4,7 +4,7 @@ from aiohttp import web -from metadata_backend.helpers.parser import XMLToJSONParser, jsonpatch_mongo +from metadata_backend.helpers.parser import XMLToJSONParser, CSVToJSONParser, jsonpatch_mongo from pymongo import UpdateOne @@ -15,10 +15,11 @@ class ParserTestCase(unittest.TestCase): def setUp(self): """Configure variables for tests.""" - self.parser = XMLToJSONParser() + self.xml_parser = XMLToJSONParser() + self.csv_parser = CSVToJSONParser() - def load_xml_from_file(self, submission, filename): - """Load XML as string from given file.""" + def load_file_to_text(self, submission, filename): + """Load XML or CSV as a string from given file.""" path_to_xml_file = self.TESTFILES_ROOT / submission / filename return path_to_xml_file.read_text() @@ -27,8 +28,8 @@ def test_study_is_parsed(self): Tests for some values that converted JSON should have. """ - study_xml = self.load_xml_from_file("study", "SRP000539.xml") - study_json = self.parser.parse("study", study_xml) + study_xml = self.load_file_to_text("study", "SRP000539.xml") + study_json = self.xml_parser.parse("study", study_xml) self.assertIn("Highly integrated epigenome maps in Arabidopsis", study_json["descriptor"]["studyTitle"]) self.assertIn("18423832", study_json["studyLinks"][0]["xrefId"]) @@ -37,8 +38,8 @@ def test_sample_is_parsed(self): Tests for some values that converted JSON should have. """ - sample_xml = self.load_xml_from_file("sample", "SRS001433.xml") - sample_json = self.parser.parse("sample", sample_xml) + sample_xml = self.load_file_to_text("sample", "SRS001433.xml") + sample_json = self.xml_parser.parse("sample", sample_xml) self.assertIn("Human HapMap individual NA18758", sample_json["description"]) self.assertIn("Homo sapiens", sample_json["sampleName"]["scientificName"]) @@ -47,8 +48,8 @@ def test_experiment_is_parsed(self): Tests for some values that convert JSON should have. """ - experiment_xml = self.load_xml_from_file("experiment", "ERX000119.xml") - experiment_json = self.parser.parse("experiment", experiment_xml) + experiment_xml = self.load_file_to_text("experiment", "ERX000119.xml") + experiment_json = self.xml_parser.parse("experiment", experiment_xml) self.assertIn( "SOLiD sequencing of Human HapMap individual NA18504", experiment_json["design"]["designDescription"] ) @@ -58,8 +59,8 @@ def test_run_is_parsed(self): Tests for some values that convert JSON should have. """ - run_xml = self.load_xml_from_file("run", "ERR000076.xml") - run_json = self.parser.parse("run", run_xml) + run_xml = self.load_file_to_text("run", "ERR000076.xml") + run_json = self.xml_parser.parse("run", run_xml) self.assertIn("ERA000/ERA000014/srf/BGI-FC304RWAAXX_5.srf", run_json["files"][0]["filename"]) self.assertIn("ERX000037", run_json["experimentRef"][0]["accessionId"]) @@ -68,8 +69,8 @@ def test_analysis_is_parsed(self): Tests for some values that convert JSON should have. """ - analysis_xml = self.load_xml_from_file("analysis", "ERZ266973.xml") - analysis_json = self.parser.parse("analysis", analysis_xml) + analysis_xml = self.load_file_to_text("analysis", "ERZ266973.xml") + analysis_json = self.xml_parser.parse("analysis", analysis_xml) self.assertIn( "GCA_000001405.1", analysis_json["analysisType"]["processedReads"]["assembly"]["accessionId"], @@ -80,20 +81,44 @@ def test_submission_is_parsed(self): Test for specific actions in submission. """ - submission_xml = self.load_xml_from_file("submission", "ERA521986_valid.xml") - submission_json = self.parser.parse("submission", submission_xml) + submission_xml = self.load_file_to_text("submission", "ERA521986_valid.xml") + submission_json = self.xml_parser.parse("submission", submission_xml) self.assertEqual({"schema": "study", "source": "SRP000539.xml"}, submission_json["actions"]["action"][0]["add"]) def test_error_raised_when_schema_not_found(self): - """Test 400 is returned when schema.""" + """Test 400 is returned when schema type is invalid.""" with self.assertRaises(web.HTTPBadRequest): - self.parser._load_schema("None") + self.xml_parser._load_schema("None") def test_error_raised_when_input_xml_not_valid_xml(self): """Give parser XML with broken syntax, should fail.""" - study_xml = self.load_xml_from_file("study", "SRP000539_invalid.xml") + study_xml = self.load_file_to_text("study", "SRP000539_invalid.xml") with self.assertRaises(web.HTTPBadRequest): - self.parser.parse("study", study_xml) + self.xml_parser.parse("study", study_xml) + + def test_csv_sample_is_parsed(self): + """Test that a CSV sample is parsed and validated.""" + sample_csv = self.load_file_to_text("sample", "EGAformat.csv") + result = self.csv_parser.parse("sample", sample_csv) + self.assertEqual("test sample", result["title"]) + self.assertEqual({"taxonId": 0}, result["sampleName"]) + + def test_multiline_csv_raises_error(self): + """Test 400 is raised with a multi-line CSV input.""" + with self.assertRaises(web.HTTPBadRequest): + self.csv_parser.parse("sample", "id,title\n1,something\n2,something else\n") + + def test_empty_csv_raises_error(self): + """Test 400 is raised with an empty or an incomplete CSV input.""" + with self.assertRaises(web.HTTPBadRequest): + self.csv_parser.parse("sample", "") + with self.assertRaises(web.HTTPBadRequest): + self.csv_parser.parse("sample", "id,title,description\n") + + def test_csv_parse_with_wrong_schema(self): + """Test 400 is raised with wrong schema type.""" + with self.assertRaises(web.HTTPBadRequest): + self.csv_parser.parse("wrong", "id,title\n,\n") def test_json_patch_mongo_conversion(self): """Test JSON patch to mongo query conversion.""" From 5f0c35fe44f5969f59cb4e04e87fe4978911f289 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Mon, 12 Jul 2021 09:58:56 +0300 Subject: [PATCH 119/336] adjust post object endpoint to handle csv --- metadata_backend/helpers/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata_backend/helpers/parser.py b/metadata_backend/helpers/parser.py index 89be31dda..70d8311be 100644 --- a/metadata_backend/helpers/parser.py +++ b/metadata_backend/helpers/parser.py @@ -9,7 +9,7 @@ from xmlschema import XMLSchema, XMLSchemaConverter, XMLSchemaException, XsdElement, XsdType from .logger import LOG -from .schema_loader import SchemaNotFoundException, XMLSchemaLoader, JSONSchemaLoader +from .schema_loader import SchemaNotFoundException, XMLSchemaLoader from .validator import JSONValidator, XMLValidator from pymongo import UpdateOne From 67a07b445f668ec8c4835bfa40e793c31ed375ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Wed, 14 Jul 2021 13:23:01 +0300 Subject: [PATCH 120/336] change default taxonid --- metadata_backend/helpers/parser.py | 4 +++- tests/test_parser.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/metadata_backend/helpers/parser.py b/metadata_backend/helpers/parser.py index 70d8311be..51c7a7f4c 100644 --- a/metadata_backend/helpers/parser.py +++ b/metadata_backend/helpers/parser.py @@ -347,7 +347,9 @@ def parse(self, schema_type: str, content: str) -> Dict: result = rows[0] # This is required to pass validation against current sample schema if schema_type == "sample" and "sampleName" not in result: - result["sampleName"] = '{"taxonId": 0}' + # Without TaxonID provided we assume the sample relates to + # Homo Sapien which has default TaxonID of 9606 + result["sampleName"] = '{"taxonId": 9606}' result["sampleName"] = eval(result["sampleName"]) # workaround for mypy complaint JSONValidator(result, schema_type.lower()).validate return result diff --git a/tests/test_parser.py b/tests/test_parser.py index cf35a0c8d..55c87afd2 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -101,7 +101,7 @@ def test_csv_sample_is_parsed(self): sample_csv = self.load_file_to_text("sample", "EGAformat.csv") result = self.csv_parser.parse("sample", sample_csv) self.assertEqual("test sample", result["title"]) - self.assertEqual({"taxonId": 0}, result["sampleName"]) + self.assertEqual({"taxonId": 9606}, result["sampleName"]) def test_multiline_csv_raises_error(self): """Test 400 is raised with a multi-line CSV input.""" From a6ad694799dccb2938a140d816a5f76446e2dc3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Wed, 14 Jul 2021 13:35:42 +0300 Subject: [PATCH 121/336] fix some line length fixes caused by black --- tests/test_handlers.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_handlers.py b/tests/test_handlers.py index ccc7346b3..cf4a998eb 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -383,7 +383,7 @@ async def test_submit_object_missing_field_json(self): """Test that JSON has missing property.""" json_req = {"centerName": "GEO", "alias": "GSE10966"} response = await self.client.post("/objects/study", json=json_req) - reason = "Provided input does not seem correct because: " "''descriptor' is a required property'" + reason = "Provided input does not seem correct because: ''descriptor' is a required property'" self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) @@ -395,7 +395,7 @@ async def test_submit_object_bad_field_json(self): "descriptor": {"studyTitle": "Highly", "studyType": "ceva"}, } response = await self.client.post("/objects/study", json=json_req) - reason = "Provided input does not seem correct for field: " "'descriptor'" + reason = "Provided input does not seem correct for field: 'descriptor'" self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) @@ -407,7 +407,7 @@ async def test_post_object_bad_json(self): "descriptor": {"studyTitle": "Highly", "studyType": "Other"}, } response = await self.client.post("/objects/study", data=json_req) - reason = "JSON is not correctly formatted. " "See: Expecting value: line 1 column 1" + reason = "JSON is not correctly formatted. See: Expecting value: line 1 column 1" self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) @@ -420,7 +420,7 @@ async def test_put_object_bad_json(self): } call = "/drafts/study/EGA123456" response = await self.client.put(call, data=json_req) - reason = "JSON is not correctly formatted. " "See: Expecting value: line 1 column 1" + reason = "JSON is not correctly formatted. See: Expecting value: line 1 column 1" self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) @@ -429,7 +429,7 @@ async def test_patch_object_bad_json(self): json_req = {"centerName": "GEO", "alias": "GSE10966"} call = "/drafts/study/EGA123456" response = await self.client.patch(call, data=json_req) - reason = "JSON is not correctly formatted. " "See: Expecting value: line 1 column 1" + reason = "JSON is not correctly formatted. See: Expecting value: line 1 column 1" self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) From 27a87f1205cb0d5330e1741595411d6cadeb05b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Fri, 16 Jul 2021 13:39:38 +0300 Subject: [PATCH 122/336] refactor original xml extracting method --- tests/test_middlewares.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_middlewares.py b/tests/test_middlewares.py index b56c521a5..30aa55b6a 100644 --- a/tests/test_middlewares.py +++ b/tests/test_middlewares.py @@ -25,7 +25,7 @@ async def test_bad_HTTP_request_converts_into_json_response(self): self.assertEqual(response.content_type, "application/problem+json") resp_dict = await response.json() self.assertIn("Bad Request", resp_dict["title"]) - self.assertIn("There must be a submission.xml file in submission.", resp_dict["detail"]) + self.assertIn("Submitted file was not of xml or csv type.", resp_dict["detail"]) self.assertIn("/submit", resp_dict["instance"]) async def test_bad_url_returns_json_response(self): From 7bcada6ec2ebb82ea995ef271b6da6818bee9d86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Fri, 16 Jul 2021 14:10:46 +0300 Subject: [PATCH 123/336] add csv mocks and unit test --- tests/test_handlers.py | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/tests/test_handlers.py b/tests/test_handlers.py index cf4a998eb..d7f620619 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -114,9 +114,22 @@ def create_submission_data(self, files): for schema, filename in files: schema_path = "study" if schema == "fake" else schema path_to_file = self.TESTFILES_ROOT / schema_path / filename - data.add_field( - schema.upper(), open(path_to_file.as_posix(), "r"), filename=path_to_file.name, content_type="text/xml" - ) + # Differentiate between xml and csv + if filename[-3:] == "xml": + data.add_field( + schema.upper(), + open(path_to_file.as_posix(), "r"), + filename=path_to_file.name, + content_type="text/xml", + ) + elif filename[-3:] == "csv": + # files = {schema.upper(): open(path_to_file.as_posix(), "r")} + data.add_field( + schema.upper(), + open(path_to_file.as_posix(), "r"), + filename=path_to_file.name, + content_type="text/csv", + ) return data async def fake_operator_read_metadata_object(self, schema_type, accession_id): @@ -411,6 +424,17 @@ async def test_post_object_bad_json(self): self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) + async def test_post_object_works_with_csv(self): + """Test that CSV file is parsed and submitted as json.""" + files = [("sample", "EGAformat.csv")] + data = self.create_submission_data(files) + response = await self.client.post("/objects/sample", data=data) + json_resp = await response.json() + self.assertEqual(response.status, 201) + self.assertEqual(self.test_ega_string, json_resp["accessionId"]) + self.MockedCSVParser().parse.assert_called_once() + self.MockedOperator().create_metadata_object.assert_called_once() + async def test_put_object_bad_json(self): """Test that put JSON is badly formated.""" json_req = { From 2460d5b70d9f4028edb2680f8b5aabd83de53777 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Mon, 19 Jul 2021 21:12:33 +0300 Subject: [PATCH 124/336] add integration test case --- tests/integration/run_tests.py | 41 ++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index 419cbea1f..ca80edf20 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -103,7 +103,8 @@ async def create_request_data(schema, filename): path_to_file = testfiles_root / schema / filename path = path_to_file.as_posix() async with aiofiles.open(path, mode="r") as f: - request_data.add_field(schema.upper(), await f.read(), filename=filename, content_type="text/xml") + c_type = "text/xml" if filename[-3:] == "xml" else "text/csv" + request_data.add_field(schema.upper(), await f.read(), filename=filename, content_type=c_type) return request_data @@ -496,7 +497,7 @@ async def test_crud_works(sess, schema, filename, folder_id): :param sess: HTTP session in which request call is made :param schema: name of the schema (folder) used for testing - :param filename: name of the file used for testing. + :param filename: name of the file used for testing :param folder_id: id of the folder used to group submission """ accession_id = await post_object(sess, schema, filename) @@ -526,6 +527,37 @@ async def test_crud_works(sess, schema, filename, folder_id): assert expected_true, "draft object still exists" +async def test_csv_post(sess, schema, filename, folder_id): + """Test CRUD for a submitted CSV file. + + Test case is basically the same as test_crud_works() but without the XML checks. + + :param sess: HTTP session in which request call is made + :param schema: name of the schema (folder) used for testing + :param filename: name of the file used for testing + :param folder_id: id of the folder used to group submission + """ + accession_id = await post_object(sess, schema, filename) + patch_object = [ + {"op": "add", "path": "/metadataObjects/-", "value": {"accessionId": accession_id[0], "schema": schema}} + ] + await patch_folder(sess, folder_id, patch_object) + async with sess.get(f"{objects_url}/{schema}/{accession_id[0]}") as resp: + LOG.debug(f"Checking that {accession_id[0]} JSON is in {schema}") + assert resp.status == 200, "HTTP Status code error" + + await delete_object(sess, schema, accession_id[0]) + async with sess.get(f"{objects_url}/{schema}/{accession_id[0]}") as resp: + LOG.debug(f"Checking that JSON object {accession_id[0]} was deleted") + assert resp.status == 404, "HTTP Status code error" + + async with sess.get(f"{folders_url}/{folder_id}") as resp: + LOG.debug(f"Checking that object {accession_id} was deleted from folder {folder_id}") + res = await resp.json() + expected_true = not any(d["accessionId"] == accession_id for d in res["metadataObjects"]) + assert expected_true, "draft object still exists" + + async def test_put_objects(sess, folder_id): """Test PUT reqs. @@ -1402,8 +1434,8 @@ async def main(): "description": "submission test folder 1", } submission_folder_id = await post_folder(sess, submission_folder) - await test_get_folders(sess, submission_folder_id) - await test_get_folders_objects(sess, submission_folder_id) + # await test_get_folders(sess, submission_folder_id) + # await test_get_folders_objects(sess, submission_folder_id) await test_submissions_work(sess, submission_folder_id) async with aiohttp.ClientSession() as sess: @@ -1419,6 +1451,7 @@ async def main(): basic_folder_id = await post_folder(sess, basic_folder) await asyncio.gather(*[test_crud_works(sess, schema, file, basic_folder_id) for schema, file in test_xml_files]) + await test_csv_post(sess, "sample", "EGAformat.csv", basic_folder_id) put_object_folder = { "name": "test put object", From 0f402e9365a41b30546988b29b3086c5cf94fbde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Mon, 26 Jul 2021 09:41:50 +0300 Subject: [PATCH 125/336] more refactoring --- metadata_backend/helpers/parser.py | 19 +++++++++++++++++++ tests/test_middlewares.py | 8 +++++--- tests/test_parser.py | 18 ++++++++++++++---- 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/metadata_backend/helpers/parser.py b/metadata_backend/helpers/parser.py index 51c7a7f4c..7e398fd50 100644 --- a/metadata_backend/helpers/parser.py +++ b/metadata_backend/helpers/parser.py @@ -3,6 +3,7 @@ import re import csv import io +import string from typing import Any, Dict, List, Optional, Type, Union from aiohttp import web @@ -354,6 +355,24 @@ def parse(self, schema_type: str, content: str) -> Dict: JSONValidator(result, schema_type.lower()).validate return result + def is_csv(self, content: str) -> bool: + """Quick check that CSV input is a valid CSV. + + :param content: CSV content as string + :returns: Boolean value based on validity of CSV + :raises: HTTPBadRequest if error was raised during parsing or validation + """ + try: + # Check for non-printable characters which should not be in CSV files + if not all([c in string.printable or c.isprintable() for c in content]): + return False + csv.Sniffer().sniff(content) + # No errors indicates validity of CSV + return True + except csv.Error: + # Error in getting CSV dialect would indicate invalidity + return False + def jsonpatch_mongo(identifier: Dict, json_patch: List[Dict[str, Any]]) -> List: """Convert JSONpatch object to mongo query. diff --git a/tests/test_middlewares.py b/tests/test_middlewares.py index 30aa55b6a..771c87576 100644 --- a/tests/test_middlewares.py +++ b/tests/test_middlewares.py @@ -3,6 +3,7 @@ import unittest from aiohttp import FormData, web from aiohttp.test_utils import AioHTTPTestCase +from pathlib import Path from metadata_backend.server import init from metadata_backend.api.middlewares import generate_cookie, decrypt_cookie, _check_csrf @@ -25,7 +26,7 @@ async def test_bad_HTTP_request_converts_into_json_response(self): self.assertEqual(response.content_type, "application/problem+json") resp_dict = await response.json() self.assertIn("Bad Request", resp_dict["title"]) - self.assertIn("Submitted file was not of xml or csv type.", resp_dict["detail"]) + self.assertIn("Submitted file was not proper xml or csv format.", resp_dict["detail"]) self.assertIn("/submit", resp_dict["instance"]) async def test_bad_url_returns_json_response(self): @@ -38,13 +39,14 @@ async def test_bad_url_returns_json_response(self): def _create_improper_data(): - """Create request data that produces a 404 error. + """Create request data that produces a 400 error. Submission method in API handlers raises Bad Request (400) error if 'submission' is not included on the first field of request """ + path_to_file = Path(__file__).parent / "test_files" / "study" / "SRP000539_invalid.xml" data = FormData() - data.add_field("study", "content of a file", filename="file", content_type="text/xml") + data.add_field("STUDY", open(path_to_file.as_posix(), "r"), filename="file", content_type="text/xml") return data diff --git a/tests/test_parser.py b/tests/test_parser.py index 55c87afd2..8f541dc47 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -108,6 +108,11 @@ def test_multiline_csv_raises_error(self): with self.assertRaises(web.HTTPBadRequest): self.csv_parser.parse("sample", "id,title\n1,something\n2,something else\n") + def test_csv_parse_with_wrong_schema(self): + """Test 400 is raised with wrong schema type.""" + with self.assertRaises(web.HTTPBadRequest): + self.csv_parser.parse("wrong", "id,title\n,\n") + def test_empty_csv_raises_error(self): """Test 400 is raised with an empty or an incomplete CSV input.""" with self.assertRaises(web.HTTPBadRequest): @@ -115,10 +120,15 @@ def test_empty_csv_raises_error(self): with self.assertRaises(web.HTTPBadRequest): self.csv_parser.parse("sample", "id,title,description\n") - def test_csv_parse_with_wrong_schema(self): - """Test 400 is raised with wrong schema type.""" - with self.assertRaises(web.HTTPBadRequest): - self.csv_parser.parse("wrong", "id,title\n,\n") + def test_is_csv_check(self): + """Test 400 is raised with an empty or an incomplete CSV input.""" + sample_csv = self.load_file_to_text("sample", "EGAformat.csv") + sample_xml = self.load_file_to_text("sample", "SRS001433.xml") + self.assertEqual(self.csv_parser.is_csv(sample_csv), True) + self.assertEqual(self.csv_parser.is_csv(sample_xml), False) + self.assertEqual(self.csv_parser.is_csv(""), False) + self.assertEqual(self.csv_parser.is_csv("a@b@;c@d@,e@f\ng"), False) + self.assertEqual(self.csv_parser.is_csv("id,title,description\n,\n"), False) def test_json_patch_mongo_conversion(self): """Test JSON patch to mongo query conversion.""" From da411445fb60acb7d27ac70a583ecf5298abbb2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Mon, 26 Jul 2021 09:57:26 +0300 Subject: [PATCH 126/336] comment fix --- tests/test_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_parser.py b/tests/test_parser.py index 8f541dc47..2e8aef0e3 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -121,7 +121,7 @@ def test_empty_csv_raises_error(self): self.csv_parser.parse("sample", "id,title,description\n") def test_is_csv_check(self): - """Test 400 is raised with an empty or an incomplete CSV input.""" + """Test that True is returned for CSV and False is returned for non-CSV.""" sample_csv = self.load_file_to_text("sample", "EGAformat.csv") sample_xml = self.load_file_to_text("sample", "SRS001433.xml") self.assertEqual(self.csv_parser.is_csv(sample_csv), True) From 8cd85d17d87c36b893f7c46f2dc8d001f977f65d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Thu, 29 Jul 2021 10:16:22 +0300 Subject: [PATCH 127/336] comment fixes, variable name fix, move csv validation --- metadata_backend/helpers/parser.py | 19 ------------------- tests/integration/run_tests.py | 4 ++-- tests/test_parser.py | 10 ---------- 3 files changed, 2 insertions(+), 31 deletions(-) diff --git a/metadata_backend/helpers/parser.py b/metadata_backend/helpers/parser.py index 7e398fd50..51c7a7f4c 100644 --- a/metadata_backend/helpers/parser.py +++ b/metadata_backend/helpers/parser.py @@ -3,7 +3,6 @@ import re import csv import io -import string from typing import Any, Dict, List, Optional, Type, Union from aiohttp import web @@ -355,24 +354,6 @@ def parse(self, schema_type: str, content: str) -> Dict: JSONValidator(result, schema_type.lower()).validate return result - def is_csv(self, content: str) -> bool: - """Quick check that CSV input is a valid CSV. - - :param content: CSV content as string - :returns: Boolean value based on validity of CSV - :raises: HTTPBadRequest if error was raised during parsing or validation - """ - try: - # Check for non-printable characters which should not be in CSV files - if not all([c in string.printable or c.isprintable() for c in content]): - return False - csv.Sniffer().sniff(content) - # No errors indicates validity of CSV - return True - except csv.Error: - # Error in getting CSV dialect would indicate invalidity - return False - def jsonpatch_mongo(identifier: Dict, json_patch: List[Dict[str, Any]]) -> List: """Convert JSONpatch object to mongo query. diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index ca80edf20..e57519dfd 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -1434,8 +1434,8 @@ async def main(): "description": "submission test folder 1", } submission_folder_id = await post_folder(sess, submission_folder) - # await test_get_folders(sess, submission_folder_id) - # await test_get_folders_objects(sess, submission_folder_id) + await test_get_folders(sess, submission_folder_id) + await test_get_folders_objects(sess, submission_folder_id) await test_submissions_work(sess, submission_folder_id) async with aiohttp.ClientSession() as sess: diff --git a/tests/test_parser.py b/tests/test_parser.py index 2e8aef0e3..b3146d204 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -120,16 +120,6 @@ def test_empty_csv_raises_error(self): with self.assertRaises(web.HTTPBadRequest): self.csv_parser.parse("sample", "id,title,description\n") - def test_is_csv_check(self): - """Test that True is returned for CSV and False is returned for non-CSV.""" - sample_csv = self.load_file_to_text("sample", "EGAformat.csv") - sample_xml = self.load_file_to_text("sample", "SRS001433.xml") - self.assertEqual(self.csv_parser.is_csv(sample_csv), True) - self.assertEqual(self.csv_parser.is_csv(sample_xml), False) - self.assertEqual(self.csv_parser.is_csv(""), False) - self.assertEqual(self.csv_parser.is_csv("a@b@;c@d@,e@f\ng"), False) - self.assertEqual(self.csv_parser.is_csv("id,title,description\n,\n"), False) - def test_json_patch_mongo_conversion(self): """Test JSON patch to mongo query conversion.""" json_patch = [ From 4f74b0bdfe899f3acf174e0e892f7db0745c154f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Thu, 29 Jul 2021 12:44:09 +0300 Subject: [PATCH 128/336] separate file type check to own method --- metadata_backend/helpers/parser.py | 1 + tests/test_middlewares.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/metadata_backend/helpers/parser.py b/metadata_backend/helpers/parser.py index 51c7a7f4c..057883fac 100644 --- a/metadata_backend/helpers/parser.py +++ b/metadata_backend/helpers/parser.py @@ -352,6 +352,7 @@ def parse(self, schema_type: str, content: str) -> Dict: result["sampleName"] = '{"taxonId": 9606}' result["sampleName"] = eval(result["sampleName"]) # workaround for mypy complaint JSONValidator(result, schema_type.lower()).validate + LOG.info("CSV was converted to JSON successfully.") return result diff --git a/tests/test_middlewares.py b/tests/test_middlewares.py index 771c87576..5cf6b9f8c 100644 --- a/tests/test_middlewares.py +++ b/tests/test_middlewares.py @@ -26,7 +26,7 @@ async def test_bad_HTTP_request_converts_into_json_response(self): self.assertEqual(response.content_type, "application/problem+json") resp_dict = await response.json() self.assertIn("Bad Request", resp_dict["title"]) - self.assertIn("Submitted file was not proper xml or csv format.", resp_dict["detail"]) + self.assertIn("Submitted file was not proper xml (nor csv).", resp_dict["detail"]) self.assertIn("/submit", resp_dict["instance"]) async def test_bad_url_returns_json_response(self): From 5cfc34712a0c9ca34ae3d41939a823abeca96e85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Fri, 30 Jul 2021 13:19:28 +0300 Subject: [PATCH 129/336] change xml/csv test order --- tests/test_middlewares.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_middlewares.py b/tests/test_middlewares.py index 5cf6b9f8c..cd3bfd928 100644 --- a/tests/test_middlewares.py +++ b/tests/test_middlewares.py @@ -26,7 +26,7 @@ async def test_bad_HTTP_request_converts_into_json_response(self): self.assertEqual(response.content_type, "application/problem+json") resp_dict = await response.json() self.assertIn("Bad Request", resp_dict["title"]) - self.assertIn("Submitted file was not proper xml (nor csv).", resp_dict["detail"]) + self.assertIn("Submitted file was not proper XML nor CSV.", resp_dict["detail"]) self.assertIn("/submit", resp_dict["instance"]) async def test_bad_url_returns_json_response(self): From 0979bb8756e95591321a71857f936031a4c40534 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joonatan=20M=C3=A4kinen?= Date: Fri, 6 Aug 2021 05:38:35 +0300 Subject: [PATCH 130/336] enable multiline csv file uploading --- metadata_backend/helpers/parser.py | 28 +++++++++---------- tests/integration/run_tests.py | 6 ++-- .../test_files/sample/EGA_sample_w_issue.csv | 4 +++ tests/test_files/sample/EGAformat.csv | 4 ++- tests/test_files/sample/empty.csv | 1 + tests/test_handlers.py | 11 ++++++++ tests/test_parser.py | 10 ++----- 7 files changed, 39 insertions(+), 25 deletions(-) create mode 100644 tests/test_files/sample/EGA_sample_w_issue.csv create mode 100644 tests/test_files/sample/empty.csv diff --git a/metadata_backend/helpers/parser.py b/metadata_backend/helpers/parser.py index 057883fac..53bbb26b7 100644 --- a/metadata_backend/helpers/parser.py +++ b/metadata_backend/helpers/parser.py @@ -319,7 +319,7 @@ def _load_schema(schema_type: str) -> XMLSchema: class CSVToJSONParser: """Methods to parse and convert data from CSV files to JSON format.""" - def parse(self, schema_type: str, content: str) -> Dict: + def parse(self, schema_type: str, content: str) -> List: """Parse a CSV file, convert it to JSON and validate against JSON schema. :param schema_type: Schema type of the file to be parsed @@ -339,21 +339,19 @@ def parse(self, schema_type: str, content: str) -> Dict: reason = "CSV file appears to be incomplete. No rows of data were parsed." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - if len(rows) > 1: - reason = "Multi-line CSV files are currently not supported." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - result = rows[0] - # This is required to pass validation against current sample schema - if schema_type == "sample" and "sampleName" not in result: - # Without TaxonID provided we assume the sample relates to - # Homo Sapien which has default TaxonID of 9606 - result["sampleName"] = '{"taxonId": 9606}' - result["sampleName"] = eval(result["sampleName"]) # workaround for mypy complaint - JSONValidator(result, schema_type.lower()).validate - LOG.info("CSV was converted to JSON successfully.") - return result + for row in rows: + LOG.info(row) + # This is required to pass validation against current sample schema + if schema_type == "sample" and "sampleName" not in row: + # Without TaxonID provided we assume the sample relates to + # Homo Sapien which has default TaxonID of 9606 + row["sampleName"] = '{"taxonId": 9606}' + row["sampleName"] = eval(row["sampleName"]) # workaround for mypy complaint + JSONValidator(row, schema_type.lower()).validate + + LOG.info(f"CSV was successfully converted to {len(rows)} JSON object(s).") + return rows def jsonpatch_mongo(identifier: Dict, json_patch: List[Dict[str, Any]]) -> List: diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index e57519dfd..6f5da2107 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -9,6 +9,8 @@ import logging import os import re +from pathlib import Path +from typing import List import urllib import xml.etree.ElementTree as ET from datetime import datetime @@ -149,10 +151,10 @@ async def post_object(sess, schema, filename): """ request_data = await create_request_data(schema, filename) async with sess.post(f"{objects_url}/{schema}", data=request_data) as resp: - LOG.debug(f"Adding new object to {schema}, via XML file {filename}") + LOG.debug(f"Adding new object to {schema}, via XML/CSV file {filename}") assert resp.status == 201, "HTTP Status code error" ans = await resp.json() - return ans["accessionId"], schema + return ans if isinstance(ans, List) else ans["accessionId"], schema async def post_object_json(sess, schema, filename): diff --git a/tests/test_files/sample/EGA_sample_w_issue.csv b/tests/test_files/sample/EGA_sample_w_issue.csv new file mode 100644 index 000000000..26efa6ff0 --- /dev/null +++ b/tests/test_files/sample/EGA_sample_w_issue.csv @@ -0,0 +1,4 @@ +title,alias,description,subjectId,bioSampleId,caseOrControl,gender,organismPart,cellLine,region,phenotype +test sample,test1,A test sample metadata based on the EGA submitter portal format,123456789abc,,,unknown,,,,some disease state +,,,,,,,,,, +third test sample,test3,One more test sample metadata,123456789abc,,,unknown,,,,some disease state diff --git a/tests/test_files/sample/EGAformat.csv b/tests/test_files/sample/EGAformat.csv index 34aa281ab..6261a5584 100644 --- a/tests/test_files/sample/EGAformat.csv +++ b/tests/test_files/sample/EGAformat.csv @@ -1,2 +1,4 @@ title,alias,description,subjectId,bioSampleId,caseOrControl,gender,organismPart,cellLine,region,phenotype -test sample,test1,A test sample metadata CSV file based on the EGA submitter portal format,123456789abc,,,unknown,,,,some disease state +test sample,test1,A test sample metadata based on the EGA submitter portal format,123456789abc,,,unknown,,,,some disease state +another test sample,test2,Another test sample metadata,123456789abc,,,unknown,,,,some disease state +third test sample,test3,One more test sample metadata,123456789abc,,,unknown,,,,some disease state diff --git a/tests/test_files/sample/empty.csv b/tests/test_files/sample/empty.csv new file mode 100644 index 000000000..a5ac6cc45 --- /dev/null +++ b/tests/test_files/sample/empty.csv @@ -0,0 +1 @@ +title,alias,description,subjectId,bioSampleId,caseOrControl,gender,organismPart,cellLine,region,phenotype diff --git a/tests/test_handlers.py b/tests/test_handlers.py index d7f620619..1d1902e98 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -428,6 +428,7 @@ async def test_post_object_works_with_csv(self): """Test that CSV file is parsed and submitted as json.""" files = [("sample", "EGAformat.csv")] data = self.create_submission_data(files) + self.MockedCSVParser().parse.return_value = [{}, {}, {}] response = await self.client.post("/objects/sample", data=data) json_resp = await response.json() self.assertEqual(response.status, 201) @@ -435,6 +436,16 @@ async def test_post_object_works_with_csv(self): self.MockedCSVParser().parse.assert_called_once() self.MockedOperator().create_metadata_object.assert_called_once() + async def test_post_objet_error_with_empty(self): + """Test multipart request post fails when no objects are parsed.""" + files = [("sample", "empty.csv")] + data = self.create_submission_data(files) + response = await self.client.post("/objects/sample", data=data) + json_resp = await response.json() + self.assertEqual(response.status, 400) + self.assertEqual(json_resp["detail"], "Request data seems empty.") + self.MockedCSVParser().parse.assert_called_once() + async def test_put_object_bad_json(self): """Test that put JSON is badly formated.""" json_req = { diff --git a/tests/test_parser.py b/tests/test_parser.py index b3146d204..fb5ad297d 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -100,13 +100,9 @@ def test_csv_sample_is_parsed(self): """Test that a CSV sample is parsed and validated.""" sample_csv = self.load_file_to_text("sample", "EGAformat.csv") result = self.csv_parser.parse("sample", sample_csv) - self.assertEqual("test sample", result["title"]) - self.assertEqual({"taxonId": 9606}, result["sampleName"]) - - def test_multiline_csv_raises_error(self): - """Test 400 is raised with a multi-line CSV input.""" - with self.assertRaises(web.HTTPBadRequest): - self.csv_parser.parse("sample", "id,title\n1,something\n2,something else\n") + self.assertEqual(len(result), 3) + self.assertEqual("test sample", result[0]["title"]) + self.assertEqual({"taxonId": 9606}, result[0]["sampleName"]) def test_csv_parse_with_wrong_schema(self): """Test 400 is raised with wrong schema type.""" From 531afd8790d2af0d2937e5f2e1f978b46ba2524e Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 31 Aug 2021 14:57:17 +0300 Subject: [PATCH 131/336] remove eval, as it is not ideal --- metadata_backend/helpers/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata_backend/helpers/parser.py b/metadata_backend/helpers/parser.py index 53bbb26b7..88492c098 100644 --- a/metadata_backend/helpers/parser.py +++ b/metadata_backend/helpers/parser.py @@ -347,7 +347,7 @@ def parse(self, schema_type: str, content: str) -> List: # Without TaxonID provided we assume the sample relates to # Homo Sapien which has default TaxonID of 9606 row["sampleName"] = '{"taxonId": 9606}' - row["sampleName"] = eval(row["sampleName"]) # workaround for mypy complaint + row["sampleName"] = row["sampleName"] JSONValidator(row, schema_type.lower()).validate LOG.info(f"CSV was successfully converted to {len(rows)} JSON object(s).") From 42b49591c706d9ae1342c2e62a6e10a9d68a857f Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 21 Dec 2021 03:02:06 +0200 Subject: [PATCH 132/336] cleanup some bad formatting of comments --- metadata_backend/helpers/logger.py | 2 +- tests/integration/mock_doi_api.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata_backend/helpers/logger.py b/metadata_backend/helpers/logger.py index 34af3161d..797aa58db 100644 --- a/metadata_backend/helpers/logger.py +++ b/metadata_backend/helpers/logger.py @@ -6,7 +6,7 @@ import os FORMAT = ( - "[%(asctime)s][%(name)s][%(process)d %(processName)s]" "[%(levelname)-8s](L:%(lineno)s) %(funcName)s: %(message)s" + "[%(asctime)s][%(name)s][%(process)d %(processName)s] [%(levelname)-8s](L:%(lineno)s) %(funcName)s: %(message)s" ) logging.basicConfig(format=FORMAT, datefmt="%Y-%m-%d %H:%M:%S") diff --git a/tests/integration/mock_doi_api.py b/tests/integration/mock_doi_api.py index 3641409a0..327523f65 100644 --- a/tests/integration/mock_doi_api.py +++ b/tests/integration/mock_doi_api.py @@ -12,7 +12,7 @@ async def dois(req: web.Request) -> web.Response: try: content = await req.json() except json.decoder.JSONDecodeError as e: - reason = "JSON is not correctly formatted." f" See: {e}" + reason = f"JSON is not correctly formatted. See: {e}" logging.info(reason) raise web.HTTPBadRequest(reason=reason) From 65682b5e4d26c4c91fb9e1568c61aeb92446c569 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 29 Dec 2021 13:40:47 +0200 Subject: [PATCH 133/336] add state configuration on startup event --- metadata_backend/server.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/metadata_backend/server.py b/metadata_backend/server.py index 8b0be93d1..56696eba4 100644 --- a/metadata_backend/server.py +++ b/metadata_backend/server.py @@ -33,10 +33,23 @@ async def kill_sess_on_shutdown(app: web.Application) -> None: # Purge the openstack connection from the server app["Session"].pop(key) LOG.debug("Purged connection information for %s :: %s", key, time.ctime()) - app["Cookies"] = set({}) LOG.debug("Removed session") +async def startup(server: web.Application) -> None: + """Add startup web server state configuration.""" + # Mutable_map handles cookie storage, also stores the object that provides + # the encryption we use + server["Crypt"] = Fernet(Fernet.generate_key()) + # Create a signature salt to prevent editing the signature on the client + # side. Hash function doesn't need to be cryptographically secure, it's + # just a convenient way of getting ascii output from byte values. + server["Salt"] = secrets.token_hex(64) + server["Session"] = {} + server["Cookies"] = set({}) + server["OIDC_State"] = set({}) + + async def init() -> web.Application: """Initialise server and setup routes. @@ -50,16 +63,7 @@ async def init() -> web.Application: """ server = web.Application() - # Mutable_map handles cookie storage, also stores the object that provides - # the encryption we use - server["Crypt"] = Fernet(Fernet.generate_key()) - # Create a signature salt to prevent editing the signature on the client - # side. Hash function doesn't need to be cryptographically secure, it's - # just a convenient way of getting ascii output from byte values. - server["Salt"] = secrets.token_hex(64) - server["Session"] = {} - server["Cookies"] = set({}) - server["OIDC_State"] = set({}) + server.on_startup.append(startup) server.middlewares.append(http_error_handler) server.middlewares.append(check_login) From bffee72cd1d7d889fa146a773b7c3fbd24b8b7da Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 29 Dec 2021 13:41:53 +0200 Subject: [PATCH 134/336] fix dependencies in integration tests fix wrong message in unit tests no need to import typing, plain list will do other small misses and fixes make ujson default everywhere --- metadata_backend/api/auth.py | 4 ++-- metadata_backend/helpers/schema_loader.py | 4 ++-- tests/integration/run_tests.py | 4 +--- tests/test_middlewares.py | 2 +- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/metadata_backend/api/auth.py b/metadata_backend/api/auth.py index cf85bc61d..6be0e740d 100644 --- a/metadata_backend/api/auth.py +++ b/metadata_backend/api/auth.py @@ -3,7 +3,7 @@ import secrets import urllib.parse import hashlib -import json +import ujson from aiohttp import web, BasicAuth, ClientSession from aiohttp.web import Request, Response @@ -126,7 +126,7 @@ async def callback(self, req: Request) -> Response: hashlib.sha256((cookie["id"] + cookie["referer"] + req.app["Salt"]).encode("utf-8")) ).hexdigest() - cookie_crypted = req.app["Crypt"].encrypt(json.dumps(cookie).encode("utf-8")).decode("utf-8") + cookie_crypted = req.app["Crypt"].encrypt(ujson.dumps(cookie).encode("utf-8")).decode("utf-8") response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate" response.headers["Pragma"] = "no-Cache" diff --git a/metadata_backend/helpers/schema_loader.py b/metadata_backend/helpers/schema_loader.py index 991c6a202..a30a6ee25 100644 --- a/metadata_backend/helpers/schema_loader.py +++ b/metadata_backend/helpers/schema_loader.py @@ -4,7 +4,7 @@ probably be replaced with database searching in the future. """ -import json +import ujson from abc import ABC, abstractmethod from pathlib import Path from typing import Any @@ -97,5 +97,5 @@ def get_schema(self, schema_type: str) -> dict: """ file = self._identify_file(schema_type) with file.open() as f: - schema_content = json.load(f) + schema_content = ujson.load(f) return schema_content diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index 6f5da2107..c3d815b62 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -9,8 +9,6 @@ import logging import os import re -from pathlib import Path -from typing import List import urllib import xml.etree.ElementTree as ET from datetime import datetime @@ -154,7 +152,7 @@ async def post_object(sess, schema, filename): LOG.debug(f"Adding new object to {schema}, via XML/CSV file {filename}") assert resp.status == 201, "HTTP Status code error" ans = await resp.json() - return ans if isinstance(ans, List) else ans["accessionId"], schema + return ans if isinstance(ans, list) else ans["accessionId"], schema async def post_object_json(sess, schema, filename): diff --git a/tests/test_middlewares.py b/tests/test_middlewares.py index cd3bfd928..e54ea8bf3 100644 --- a/tests/test_middlewares.py +++ b/tests/test_middlewares.py @@ -26,7 +26,7 @@ async def test_bad_HTTP_request_converts_into_json_response(self): self.assertEqual(response.content_type, "application/problem+json") resp_dict = await response.json() self.assertIn("Bad Request", resp_dict["title"]) - self.assertIn("Submitted file was not proper XML nor CSV.", resp_dict["detail"]) + self.assertIn("There must be a submission.xml file in submission.", resp_dict["detail"]) self.assertIn("/submit", resp_dict["instance"]) async def test_bad_url_returns_json_response(self): From 9f4102d9c39076107196b9853504599232c38f04 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 29 Dec 2021 16:49:12 +0200 Subject: [PATCH 135/336] allow csv only for sample objects parser now supports sample headers and validates them adjust unit tests and integration tests we onlit rows that are not ok --- docs/specification.yml | 4 +- metadata_backend/api/handlers/common.py | 166 ++++++++++++++++---- metadata_backend/api/handlers/object.py | 56 +++++-- metadata_backend/api/handlers/submission.py | 6 +- metadata_backend/helpers/parser.py | 55 +++++-- tests/integration/run_tests.py | 68 ++++++-- tests/test_handlers.py | 28 +++- 7 files changed, 298 insertions(+), 85 deletions(-) diff --git a/docs/specification.yml b/docs/specification.yml index 14447547f..48e5e69b6 100644 --- a/docs/specification.yml +++ b/docs/specification.yml @@ -317,7 +317,9 @@ paths: content: application/json: schema: - $ref: "#/components/schemas/ObjectCreated" + oneOf: + - $ref: "#/components/schemas/ObjectCreated" + - $ref: "#/components/schemas/ObjectsCreated" 400: description: Bad Request content: diff --git a/metadata_backend/api/handlers/common.py b/metadata_backend/api/handlers/common.py index ff1470dd0..b285b5f81 100644 --- a/metadata_backend/api/handlers/common.py +++ b/metadata_backend/api/handlers/common.py @@ -1,26 +1,34 @@ """Functions shared between handlers.""" -from typing import List, Tuple, cast +import csv +import string +from typing import List, Tuple, Dict, Any -from aiohttp import BodyPartReader, web +from aiohttp import BodyPartReader, web, hdrs, MultipartReader from aiohttp.web import Request +from xmlschema import XMLResource +from xmlschema.etree import ElementTree + from ...conf.conf import schema_types from ...helpers.logger import LOG +from ...helpers.parser import CSVToJSONParser -async def extract_xml_upload(req: Request, extract_one: bool = False) -> List[Tuple[str, str]]: - """Extract submitted xml-file(s) from multi-part request. +async def multipart_content( + req: Request, extract_one: bool = False, expect_xml: bool = False +) -> Tuple[List[Tuple[Any, str]], str]: + """Get content(s) and schema type(s) of a multipart request (from either csv or xml format). - Files are sorted to spesific order by their schema priorities (e.g. - submission should be processed before study). + Note: for multiple files support check: https://docs.aiohttp.org/en/stable/multipart.html#hacking-multipart :param req: POST request containing "multipart/form-data" upload - :raises: HTTPBadRequest if request is not valid for multipart or multiple files sent. HTTPNotFound if - schema was not found. - :returns: content and schema type for each uploaded file, sorted by schema - type. + :param extract_one: boolean stating whether multiple files should be handled + :param expect_xml: boolean stating if file can be expected to be XML + :raises: HTTPBadRequest for multiple different reasons + :returns: content and schema type for each uploaded file and file type of the upload """ - files: List[Tuple[str, str]] = [] + xml_files: List[Tuple[str, str]] = [] + csv_files: List[Tuple[Dict, str]] = [] try: reader = await req.multipart() except AssertionError: @@ -29,28 +37,124 @@ async def extract_xml_upload(req: Request, extract_one: bool = False) -> List[Tu raise web.HTTPBadRequest(reason=reason) while True: part = await reader.next() - # Following is probably error in aiohttp type hints, fixing so - # mypy doesn't complain about it. No runtime consequences. - part = cast(BodyPartReader, part) + # we expect a simple body part (BodyPartReader) instance here + # otherwise, it will be another MultipartReader instance for the nested multipart. + # we don't need to cast the part BodyPartReader, we fail if we get anything else + if isinstance(part, MultipartReader): + reason = "Only one file can be sent to this endpoint at a time." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) if not part: break - if extract_one and files: + if extract_one and (xml_files or csv_files): reason = "Only one file can be sent to this endpoint at a time." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - if part.name: - schema_type = part.name.lower() - if schema_type not in schema_types: - reason = f"Specified schema {schema_type} was not found." - LOG.error(reason) - raise web.HTTPNotFound(reason=reason) - data = [] - while True: - chunk = await part.read_chunk() - if not chunk: - break - data.append(chunk) - xml_content = "".join(x.decode("UTF-8") for x in data) - files.append((xml_content, schema_type)) - LOG.debug(f"processed file in {schema_type}") - return sorted(files, key=lambda x: schema_types[x[1]]["priority"]) + + # we check the multipart request header to see file type + # or we expect XML file directly + # additionally we check that the content is XML or CSV to be accurate + if expect_xml or part.headers[hdrs.CONTENT_TYPE] == "text/xml": + content, schema_type = await _extract_upload(part) + _check_xml(content) + xml_files.append((content, schema_type)) + elif part.headers[hdrs.CONTENT_TYPE] == "text/csv": + content, schema_type = await _extract_upload(part) + _check_csv(content) + csv_content = CSVToJSONParser().parse(schema_type, content) + for row in csv_content: + csv_files.append((row, schema_type)) + else: + reason = "Submitted file was not proper XML nor CSV." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + # Return extracted content + return _get_content_with_type(xml_files, csv_files) + + +async def _extract_upload(part: BodyPartReader) -> Tuple[str, str]: + """Extract a submitted file from upload. + + :param part: Multipart reader for single body part + :raises: HTTPNotFound if schema was not found + :returns: content as text and schema type for uploaded file + """ + schema_type = part.name.lower() if part.name else "none" + if schema_type not in schema_types: + reason = f"Specified schema {schema_type} was not found." + LOG.error(reason) + raise web.HTTPNotFound(reason=reason) + data = [] + while True: + chunk = await part.read_chunk() + if not chunk: + break + data.append(chunk) + xml_content = "".join(x.decode("UTF-8") for x in data) + LOG.debug(f"Processed file in {schema_type}") + return xml_content, schema_type + + +def _check_csv(content: str) -> bool: + """Check if content is in CSV format. + + :param content: Text of file content + :raises: HTTPBadRequest if both CSV validation fails + :returns: true if file is CSV + """ + try: + # Check for non-printable characters which should not be in CSV files + if not all([c in string.printable or c.isprintable() for c in content]): + raise csv.Error + csv.Sniffer().sniff(content) + # No errors indicates validity of CSV + LOG.info("Valid CSV content was extracted.") + return True + except csv.Error: + reason = "Submitted file was not proper formatted as CSV." + LOG.error(reason) + return False + + +def _check_xml(content: str) -> bool: + """Check if content is in XML format. + + :param content: Text of file content + :raises: HTTPBadRequest if both XML validation fails + :returns: name of file type + """ + try: + XMLResource(content, allow="local", defuse="always") + LOG.info("Valid XML content was extracted.") + return True + except ElementTree.ParseError as err: + reason = f"Submitted file was not proper XML. Error: {err}" + LOG.error(reason) + return False + + +def _get_content_with_type( + xml_files: List[Tuple[str, str]], csv_files: List[Tuple[Dict, str]] +) -> Tuple[List[Tuple[Any, str]], str]: + """Return either list of XML or CSV files with the file type info. + + :param xml_files: List of xml contents with schema types + :param csv_files: List of csv contents with schema types + :raises: HTTPBadRequest if both lists are populated or empty + :returns: List of xml or csv files with string stating which file type + """ + if xml_files and csv_files: + reason = "Request contained both xml and csv file types. Only one file type can be processed in this endpoint." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + elif xml_files: + # Files are sorted to spesific order by their schema priorities + # (e.g. submission should be processed before study). + return sorted(xml_files, key=lambda x: schema_types[x[1]]["priority"]), "xml" + elif csv_files: + return csv_files, "csv" + else: + reason = "Request data seems empty." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 486649991..506de11c5 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -1,6 +1,6 @@ """Handle HTTP methods for server.""" from math import ceil -from typing import Dict, Union +from typing import Dict, Union, List, Any, Tuple import ujson from aiohttp import web @@ -10,7 +10,7 @@ from ...helpers.logger import LOG from ...helpers.validator import JSONValidator from ..operators import FolderOperator, Operator, XMLOperator -from .common import extract_xml_upload +from .common import multipart_content from .restapi import RESTAPIHandler @@ -93,35 +93,62 @@ async def get_object(self, req: Request) -> Response: async def post_object(self, req: Request) -> Response: """Save metadata object to database. - For JSON request body we validate it is consistent with the - associated JSON schema. + For JSON request body we validate it is consistent with the associated JSON schema. + For CSV upload we allow it for a select number objects, currently: ``sample``. :param req: POST request :returns: JSON response containing accessionId for submitted object """ + _allowed_csv = ["sample"] schema_type = req.match_info["schema"] self._check_schema_exists(schema_type) collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type db_client = req.app["db_client"] - content: Union[Dict, str] + content: Union[Dict[str, Any], str, List[Tuple[Any, str]]] operator: Union[Operator, XMLOperator] if req.content_type == "multipart/form-data": - files = await extract_xml_upload(req, extract_one=True) - content, _ = files[0] - operator = XMLOperator(db_client) + _only_xml = False if schema_type in _allowed_csv else True + files, cont_type = await multipart_content(req, extract_one=True, expect_xml=_only_xml) + if cont_type == "xml": + # from this tuple we only care about the content + # files should be of form (content, schema) + content, _ = files[0] + else: + # for CSV files we need to tread this as a list of tuples (content, schema) + content = files + # If multipart request contains XML, XML operator is used. + # Else the multipart request is expected to contain CSV file(s) which are converted into JSON. + operator = XMLOperator(db_client) if cont_type == "xml" else Operator(db_client) else: content = await self._get_data(req) if not req.path.startswith("/drafts"): JSONValidator(content, schema_type).validate operator = Operator(db_client) - accession_id = await operator.create_metadata_object(collection, content) - - body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) + # Add a new metadata object or multiple objects if multiple were extracted url = f"{req.scheme}://{req.host}{req.path}" - location_headers = CIMultiDict(Location=f"{url}/{accession_id}") - LOG.info(f"POST object with accesssion ID {accession_id} in schema {collection} was successful.") + data: Union[List[Dict[str, str]], Dict[str, str]] + if isinstance(content, List): + LOG.debug(f"Inserting multiple objects for {schema_type}.") + ids: List[Dict[str, str]] = [] + for item in content: + accession_id = await operator.create_metadata_object(collection, item[0]) + ids.append({"accessionId": accession_id}) + LOG.info(f"POST object with accesssion ID {accession_id} in schema {collection} was successful.") + # we format like this to make it consistent with the response from /submit endpoint + data = [dict(item, **{"schema": schema_type}) for item in ids] + # we take the first result if we get multiple + location_headers = CIMultiDict(Location=f"{url}/{data[0]['accessionId']}") + else: + accession_id = await operator.create_metadata_object(collection, content) + data = {"accessionId": accession_id} + + location_headers = CIMultiDict(Location=f"{url}/{accession_id}") + LOG.info(f"POST object with accesssion ID {accession_id} in schema {collection} was successful.") + + body = ujson.dumps(data, escape_forward_slashes=False) + return web.Response( body=body, status=201, @@ -180,6 +207,7 @@ async def put_object(self, req: Request) -> Response: """Replace metadata object in database. For JSON request we don't allow replacing in the DB. + For CSV upload we don't allow replace, as it is problematic to identify fields. :param req: PUT request :raises: HTTPUnsupportedMediaType if JSON replace is attempted @@ -194,7 +222,7 @@ async def put_object(self, req: Request) -> Response: content: Union[Dict, str] operator: Union[Operator, XMLOperator] if req.content_type == "multipart/form-data": - files = await extract_xml_upload(req, extract_one=True) + files, _ = await multipart_content(req, extract_one=True, expect_xml=True) content, _ = files[0] operator = XMLOperator(db_client) else: diff --git a/metadata_backend/api/handlers/submission.py b/metadata_backend/api/handlers/submission.py index 5a95fa804..8d36b9a73 100644 --- a/metadata_backend/api/handlers/submission.py +++ b/metadata_backend/api/handlers/submission.py @@ -14,7 +14,7 @@ from ...helpers.schema_loader import SchemaNotFoundException, XMLSchemaLoader from ...helpers.validator import XMLValidator from ..operators import Operator, XMLOperator -from .common import extract_xml_upload +from .common import multipart_content class SubmissionAPIHandler: @@ -31,7 +31,7 @@ async def submit(self, req: Request) -> Response: :raises: HTTPBadRequest if request is missing some parameters or cannot be processed :returns: XML-based receipt from submission """ - files = await extract_xml_upload(req) + files, _ = await multipart_content(req, expect_xml=True) schema_types = Counter(file[1] for file in files) if "submission" not in schema_types: reason = "There must be a submission.xml file in submission." @@ -92,7 +92,7 @@ async def validate(self, req: Request) -> Response: :param req: Multipart POST request with submission.xml and files :returns: JSON response indicating if validation was successful or not """ - files = await extract_xml_upload(req, extract_one=True) + files, _ = await multipart_content(req, extract_one=True, expect_xml=True) xml_content, schema_type = files[0] validator = await self._perform_validation(schema_type, xml_content) return web.Response(body=validator.resp_body, content_type="application/json") diff --git a/metadata_backend/helpers/parser.py b/metadata_backend/helpers/parser.py index 88492c098..1228bf379 100644 --- a/metadata_backend/helpers/parser.py +++ b/metadata_backend/helpers/parser.py @@ -2,7 +2,7 @@ import re import csv -import io +from io import StringIO from typing import Any, Dict, List, Optional, Type, Union from aiohttp import web @@ -290,7 +290,7 @@ def parse(self, schema_type: str, content: str) -> Dict: raise web.HTTPBadRequest(reason=reason) # result is of type: # Union[Any, List[Any], Tuple[None, List[XMLSchemaValidationError]], - # Tuple[Any, List[XMLSchemaValidationError]], Tuple[List[Any], List[XMLSchemaValidationError]]] + # Tuple[Any, List[XMLSchemaValidationError]], Tuple[List[Any], List[XMLSchemaValidationError]]] # however we expect any type as it is easier to work with result: Any = schema.to_dict(content, converter=MetadataXMLConverter, decimal_type=float, dict_class=dict) _schema_type: str = schema_type.lower() @@ -327,31 +327,54 @@ def parse(self, schema_type: str, content: str) -> List: :returns: CSV parsed to JSON :raises: HTTPBadRequest if error was raised during parsing or validation """ - # Write content string into text stream for easy parsing into an object - with io.StringIO() as file: - file.write(content) - file.seek(0) - csv_reader = csv.DictReader(file) - rows = [row for row in csv_reader] - - # CSV files should contain precisely one object + csv_reader = csv.DictReader(StringIO(content), delimiter=",", quoting=csv.QUOTE_NONE) + + _sample_list = [ + "title", + "alias", + "description", + "subjectId", + "bioSampleId", + "caseOrControl", + "gender", + "organismPart", + "cellLine", + "region", + "phenotype", + ] + + if ( + csv_reader.fieldnames + and schema_type == "sample" + and all(elem in _sample_list for elem in csv_reader.fieldnames) + ): + LOG.debug("sample CSV file has the correct header") + else: + reason = f"{schema_type} does not contain the correct header fields: {_sample_list}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + rows = [row for row in csv_reader] + if not rows: reason = "CSV file appears to be incomplete. No rows of data were parsed." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) + _parsed = [] for row in rows: - LOG.info(row) + LOG.debug(f"current row: {row}") + _tmp: Dict[str, Any] = row # This is required to pass validation against current sample schema if schema_type == "sample" and "sampleName" not in row: # Without TaxonID provided we assume the sample relates to # Homo Sapien which has default TaxonID of 9606 - row["sampleName"] = '{"taxonId": 9606}' - row["sampleName"] = row["sampleName"] - JSONValidator(row, schema_type.lower()).validate + _tmp["sampleName"] = {"taxonId": 9606} + JSONValidator(_tmp, schema_type.lower()).validate + _parsed.append(_tmp) - LOG.info(f"CSV was successfully converted to {len(rows)} JSON object(s).") - return rows + LOG.info(f"CSV was successfully converted to {len(_parsed)} JSON object(s).") + return _parsed def jsonpatch_mongo(identifier: Dict, json_patch: List[Dict[str, Any]]) -> List: diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index c3d815b62..ca8711ee4 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -150,11 +150,27 @@ async def post_object(sess, schema, filename): request_data = await create_request_data(schema, filename) async with sess.post(f"{objects_url}/{schema}", data=request_data) as resp: LOG.debug(f"Adding new object to {schema}, via XML/CSV file {filename}") - assert resp.status == 201, "HTTP Status code error" + assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() return ans if isinstance(ans, list) else ans["accessionId"], schema +async def post_object_expect_status(sess, schema, filename, status): + """Post one metadata object within session, returns accessionId. + + :param sess: HTTP session in which request call is made + :param schema: name of the schema (folder) used for testing + :param filename: name of the file used for testing. + """ + request_data = await create_request_data(schema, filename) + async with sess.post(f"{objects_url}/{schema}", data=request_data) as resp: + LOG.debug(f"Adding new object to {schema}, via XML/CSV file {filename} and expecting status: {status}") + assert resp.status == status, f"HTTP Status code error, got {resp.status}" + if status < 400: + ans = await resp.json() + return ans if isinstance(ans, list) else ans["accessionId"], schema + + async def post_object_json(sess, schema, filename): """Post & put one metadata object within session, returns accessionId. @@ -527,35 +543,52 @@ async def test_crud_works(sess, schema, filename, folder_id): assert expected_true, "draft object still exists" -async def test_csv_post(sess, schema, filename, folder_id): +async def test_csv(sess, folder_id): """Test CRUD for a submitted CSV file. - Test case is basically the same as test_crud_works() but without the XML checks. + Test tries with good csv file first for sample object, after which we try with empty file. + After this we try with study object which is not allowed. :param sess: HTTP session in which request call is made :param schema: name of the schema (folder) used for testing :param filename: name of the file used for testing :param folder_id: id of the folder used to group submission """ - accession_id = await post_object(sess, schema, filename) + _schema = "sample" + _filename = "EGAformat.csv" + accession_id = await post_object(sess, _schema, _filename) + # there are 3 rows but only 2 are correct + print(accession_id) + assert len(accession_id[0]) == 3, f"expected nb of CSV entries does not match, we got: {len(accession_id)}" + _first_csv_row_id = accession_id[0][0]["accessionId"] patch_object = [ - {"op": "add", "path": "/metadataObjects/-", "value": {"accessionId": accession_id[0], "schema": schema}} + {"op": "add", "path": "/metadataObjects/-", "value": {"accessionId": _first_csv_row_id, "schema": _schema}} ] + await patch_folder(sess, folder_id, patch_object) - async with sess.get(f"{objects_url}/{schema}/{accession_id[0]}") as resp: - LOG.debug(f"Checking that {accession_id[0]} JSON is in {schema}") - assert resp.status == 200, "HTTP Status code error" + async with sess.get(f"{objects_url}/{_schema}/{_first_csv_row_id}") as resp: + LOG.debug(f"Checking that {_first_csv_row_id} JSON is in {_schema}") + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" - await delete_object(sess, schema, accession_id[0]) - async with sess.get(f"{objects_url}/{schema}/{accession_id[0]}") as resp: - LOG.debug(f"Checking that JSON object {accession_id[0]} was deleted") - assert resp.status == 404, "HTTP Status code error" + await delete_object(sess, _schema, _first_csv_row_id) + async with sess.get(f"{objects_url}/{_schema}/{_first_csv_row_id}") as resp: + LOG.debug(f"Checking that JSON object {_first_csv_row_id} was deleted") + assert resp.status == 404, f"HTTP Status code error, got {resp.status}" async with sess.get(f"{folders_url}/{folder_id}") as resp: - LOG.debug(f"Checking that object {accession_id} was deleted from folder {folder_id}") + LOG.debug(f"Checking that object {_first_csv_row_id} was deleted from folder {folder_id}") res = await resp.json() - expected_true = not any(d["accessionId"] == accession_id for d in res["metadataObjects"]) - assert expected_true, "draft object still exists" + expected_true = not any(d["accessionId"] == _first_csv_row_id for d in res["metadataObjects"]) + assert expected_true, f"object {_first_csv_row_id} still exists" + + _filename = "empty.csv" + # status should be 400 + await post_object_expect_status(sess, _schema, _filename, 400) + + _filename = "EGA_sample_w_issue.csv" + # status should be 201 but we expect 3 rows, as the CSV has 4 rows one of which is empty + accession_id = await post_object_expect_status(sess, _schema, _filename, 201) + assert len(accession_id[0]) == 3, f"expected nb of CSV entries does not match, we got: {len(accession_id)}" async def test_put_objects(sess, folder_id): @@ -1450,8 +1483,11 @@ async def main(): } basic_folder_id = await post_folder(sess, basic_folder) + # test XML files await asyncio.gather(*[test_crud_works(sess, schema, file, basic_folder_id) for schema, file in test_xml_files]) - await test_csv_post(sess, "sample", "EGAformat.csv", basic_folder_id) + + # test CSV files + await test_csv(sess, basic_folder_id) put_object_folder = { "name": "test put object", diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 1d1902e98..2156a586b 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -1,7 +1,7 @@ """Test API endpoints from handlers module.""" from pathlib import Path -from unittest.mock import patch +from unittest.mock import patch, call from aiohttp import FormData from aiohttp.test_utils import AioHTTPTestCase, make_mocked_coro @@ -132,6 +132,13 @@ def create_submission_data(self, files): ) return data + def get_file_data(self, schema, filename): + """Read file contents as plain text.""" + path_to_file = self.TESTFILES_ROOT / schema / filename + with open(path_to_file.as_posix(), mode="r") as csv_file: + _reader = csv_file.read() + return _reader + async def fake_operator_read_metadata_object(self, schema_type, accession_id): """Fake read operation to return mocked JSON.""" return (self.metadata_json, "application/json") @@ -360,6 +367,10 @@ async def setUpAsync(self): self.patch_operator = patch(class_operator, **self.operator_config, spec=True) self.MockedOperator = self.patch_operator.start() + class_csv_parser = "metadata_backend.api.handlers.common.CSVToJSONParser" + self.patch_csv_parser = patch(class_csv_parser, spec=True) + self.MockedCSVParser = self.patch_csv_parser.start() + class_folderoperator = "metadata_backend.api.handlers.object.FolderOperator" self.patch_folderoperator = patch(class_folderoperator, **self.folderoperator_config, spec=True) self.MockedFolderOperator = self.patch_folderoperator.start() @@ -368,6 +379,7 @@ async def tearDownAsync(self): """Cleanup mocked stuff.""" await super().tearDownAsync() self.patch_xmloperator.stop() + self.patch_csv_parser.stop() self.patch_folderoperator.stop() self.patch_operator.stop() @@ -428,13 +440,21 @@ async def test_post_object_works_with_csv(self): """Test that CSV file is parsed and submitted as json.""" files = [("sample", "EGAformat.csv")] data = self.create_submission_data(files) + file_content = self.get_file_data("sample", "EGAformat.csv") self.MockedCSVParser().parse.return_value = [{}, {}, {}] response = await self.client.post("/objects/sample", data=data) json_resp = await response.json() self.assertEqual(response.status, 201) - self.assertEqual(self.test_ega_string, json_resp["accessionId"]) - self.MockedCSVParser().parse.assert_called_once() - self.MockedOperator().create_metadata_object.assert_called_once() + self.assertEqual(self.test_ega_string, json_resp[0]["accessionId"]) + parse_calls = [ + call( + "sample", + file_content, + ) + ] + op_calls = [call("sample", {}), call("sample", {}), call("sample", {})] + self.MockedCSVParser().parse.assert_has_calls(parse_calls, any_order=True) + self.MockedOperator().create_metadata_object.assert_has_calls(op_calls, any_order=True) async def test_post_objet_error_with_empty(self): """Test multipart request post fails when no objects are parsed.""" From 0e05481070582f1cd5fb681d30cf033f3d0546bd Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 30 Dec 2021 12:28:43 +0200 Subject: [PATCH 136/336] small fixes to integration tests to add more meaningful messages to be able to run locally --- tests/integration/clean_db.py | 2 +- tests/integration/run_tests.py | 123 ++++++++++++++++----------------- 2 files changed, 62 insertions(+), 63 deletions(-) diff --git a/tests/integration/clean_db.py b/tests/integration/clean_db.py index 31b9c8dc6..7ba2e82f1 100644 --- a/tests/integration/clean_db.py +++ b/tests/integration/clean_db.py @@ -16,7 +16,7 @@ # === Global vars === DATABASE = os.getenv("MONGO_DATABASE", "default") AUTHDB = os.getenv("MONGO_AUTHDB", "admin") -HOST = os.getenv("MONGO_HOST", "admin") +HOST = os.getenv("MONGO_HOST", "localhost") FORMAT = "[%(asctime)s][%(name)s][%(process)d %(processName)s][%(levelname)-8s](L:%(lineno)s) %(funcName)s: %(message)s" logging.basicConfig(format=FORMAT, datefmt="%Y-%m-%d %H:%M:%S") LOG = logging.getLogger(__name__) diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index ca8711ee4..74b4c6b4c 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -63,7 +63,7 @@ # to form direct contact to db with create_folder() DATABASE = os.getenv("MONGO_DATABASE", "default") AUTHDB = os.getenv("MONGO_AUTHDB", "admin") -HOST = os.getenv("MONGO_HOST", "database:27017") +HOST = os.getenv("MONGO_HOST", "localhost:27017") TLS = os.getenv("MONGO_SSL", False) user_id = "current" @@ -181,7 +181,7 @@ async def post_object_json(sess, schema, filename): request_data = await create_request_json_data(schema, filename) async with sess.post(f"{objects_url}/{schema}", data=request_data) as resp: LOG.debug(f"Adding new object to {schema}, via JSON file {filename}") - assert resp.status == 201, "HTTP Status code error" + assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() return ans["accessionId"] @@ -195,7 +195,7 @@ async def delete_object(sess, schema, accession_id): """ async with sess.delete(f"{objects_url}/{schema}/{accession_id}") as resp: LOG.debug(f"Deleting object {accession_id} from {schema}") - assert resp.status == 204, "HTTP Status code error" + assert resp.status == 204, f"HTTP Status code error, got {resp.status}" async def post_draft(sess, schema, filename): @@ -208,7 +208,7 @@ async def post_draft(sess, schema, filename): request_data = await create_request_data(schema, filename) async with sess.post(f"{drafts_url}/{schema}", data=request_data) as resp: LOG.debug(f"Adding new draft object to {schema}, via XML file {filename}") - assert resp.status == 201, "HTTP Status code error" + assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() return ans["accessionId"] @@ -223,7 +223,7 @@ async def post_draft_json(sess, schema, filename): request_data = await create_request_json_data(schema, filename) async with sess.post(f"{drafts_url}/{schema}", data=request_data) as resp: LOG.debug(f"Adding new draft object to {schema}, via JSON file {filename}") - assert resp.status == 201, "HTTP Status code error" + assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() return ans["accessionId"] @@ -237,7 +237,7 @@ async def get_draft(sess, schema, draft_id, expected_status=200): """ async with sess.get(f"{drafts_url}/{schema}/{draft_id}") as resp: LOG.debug(f"Checking that {draft_id} JSON exists") - assert resp.status == expected_status, "HTTP Status code error" + assert resp.status == expected_status, f"HTTP Status code error, got {resp.status}" ans = await resp.json() return json.dumps(ans) @@ -253,7 +253,7 @@ async def put_draft(sess, schema, draft_id, update_filename): request_data = await create_request_json_data(schema, update_filename) async with sess.put(f"{drafts_url}/{schema}/{draft_id}", data=request_data) as resp: LOG.debug(f"Replace draft object in {schema}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" ans_put = await resp.json() assert ans_put["accessionId"] == draft_id, "accession ID error" return ans_put["accessionId"] @@ -270,7 +270,7 @@ async def put_object_json(sess, schema, accession_id, update_filename): request_data = await create_request_json_data(schema, update_filename) async with sess.put(f"{objects_url}/{schema}/{accession_id}", data=request_data) as resp: LOG.debug(f"Try to replace object in {schema}") - assert resp.status == 415, "HTTP Status code error" + assert resp.status == 415, f"HTTP Status code error, got {resp.status}" async def put_object_xml(sess, schema, accession_id, update_filename): @@ -284,7 +284,7 @@ async def put_object_xml(sess, schema, accession_id, update_filename): request_data = await create_request_data(schema, update_filename) async with sess.put(f"{objects_url}/{schema}/{accession_id}", data=request_data) as resp: LOG.debug(f"Replace object with XML data in {schema}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" ans_put = await resp.json() assert ans_put["accessionId"] == accession_id, "accession ID error" return ans_put["accessionId"] @@ -301,7 +301,7 @@ async def patch_draft(sess, schema, draft_id, update_filename): request_data = await create_request_json_data(schema, update_filename) async with sess.patch(f"{drafts_url}/{schema}/{draft_id}", data=request_data) as resp: LOG.debug(f"Update draft object in {schema}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" ans_put = await resp.json() assert ans_put["accessionId"] == draft_id, "accession ID error" return ans_put["accessionId"] @@ -316,7 +316,7 @@ async def delete_draft(sess, schema, draft_id): """ async with sess.delete(f"{drafts_url}/{schema}/{draft_id}") as resp: LOG.debug(f"Deleting draft object {draft_id} from {schema}") - assert resp.status == 204, "HTTP Status code error" + assert resp.status == 204, f"HTTP Status code error, got {resp.status}" async def post_template_json(sess, schema, filename): @@ -329,7 +329,7 @@ async def post_template_json(sess, schema, filename): request_data = await create_request_json_data(schema, filename) async with sess.post(f"{templates_url}/{schema}", data=request_data) as resp: LOG.debug(f"Adding new template object to {schema}, via JSON file {filename}") - assert resp.status == 201, "HTTP Status code error" + assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() if isinstance(ans, list): return ans @@ -346,7 +346,7 @@ async def get_template(sess, schema, template_id): """ async with sess.get(f"{templates_url}/{schema}/{template_id}") as resp: LOG.debug(f"Checking that {template_id} JSON exists") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" ans = await resp.json() return json.dumps(ans) @@ -362,7 +362,7 @@ async def patch_template(sess, schema, template_id, update_filename): request_data = await create_request_json_data(schema, update_filename) async with sess.patch(f"{templates_url}/{schema}/{template_id}", data=request_data) as resp: LOG.debug(f"Update draft object in {schema}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" ans_put = await resp.json() assert ans_put["accessionId"] == template_id, "accession ID error" return ans_put["accessionId"] @@ -377,7 +377,7 @@ async def delete_template(sess, schema, template_id): """ async with sess.delete(f"{templates_url}/{schema}/{template_id}") as resp: LOG.debug(f"Deleting template object {template_id} from {schema}") - assert resp.status == 204, "HTTP Status code error" + assert resp.status == 204, f"HTTP Status code error, got {resp.status}" async def post_folder(sess, data): @@ -402,7 +402,7 @@ async def patch_folder(sess, folder_id, json_patch): """ async with sess.patch(f"{folders_url}/{folder_id}", data=json.dumps(json_patch)) as resp: LOG.debug(f"Updating folder {folder_id}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" ans_patch = await resp.json() assert ans_patch["folderId"] == folder_id, "folder ID error" return ans_patch["folderId"] @@ -416,7 +416,7 @@ async def publish_folder(sess, folder_id): """ async with sess.patch(f"{publish_url}/{folder_id}") as resp: LOG.debug(f"Publishing folder {folder_id}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" ans = await resp.json() assert ans["folderId"] == folder_id, "folder ID error" return ans["folderId"] @@ -430,7 +430,7 @@ async def delete_folder(sess, folder_id): """ async with sess.delete(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Deleting folder {folder_id}") - assert resp.status == 204, "HTTP Status code error" + assert resp.status == 204, f"HTTP Status code error, got {resp.status}" async def delete_folder_publish(sess, folder_id): @@ -441,7 +441,7 @@ async def delete_folder_publish(sess, folder_id): """ async with sess.delete(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Deleting folder {folder_id}") - assert resp.status == 401, "HTTP Status code error" + assert resp.status == 401, f"HTTP Status code error, got {resp.status}" async def create_folder(data, user): @@ -484,7 +484,7 @@ async def patch_user(sess, user_id, real_user_id, json_patch): """ async with sess.patch(f"{users_url}/current", data=json.dumps(json_patch)) as resp: LOG.debug(f"Updating user {real_user_id}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" ans_patch = await resp.json() assert ans_patch["userId"] == real_user_id, "user ID error" return ans_patch["userId"] @@ -500,7 +500,7 @@ async def delete_user(sess, user_id): LOG.debug(f"Deleting user {user_id}") # we expect 404 as there is no frontend assert str(resp.url) == f"{base_url}/", "redirect url user delete differs" - assert resp.status == 404, "HTTP Status code error" + assert resp.status == 404, f"HTTP Status code error, got {resp.status}" # === Integration tests === @@ -523,24 +523,24 @@ async def test_crud_works(sess, schema, filename, folder_id): await patch_folder(sess, folder_id, patch_object) async with sess.get(f"{objects_url}/{schema}/{accession_id[0]}") as resp: LOG.debug(f"Checking that {accession_id[0]} JSON is in {schema}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" async with sess.get(f"{objects_url}/{schema}/{accession_id[0]}?format=xml") as resp: LOG.debug(f"Checking that {accession_id[0]} XML is in {schema}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" await delete_object(sess, schema, accession_id[0]) async with sess.get(f"{objects_url}/{schema}/{accession_id[0]}") as resp: LOG.debug(f"Checking that JSON object {accession_id[0]} was deleted") - assert resp.status == 404, "HTTP Status code error" + assert resp.status == 404, f"HTTP Status code error, got {resp.status}" async with sess.get(f"{objects_url}/{schema}/{accession_id[0]}?format=xml") as resp: LOG.debug(f"Checking that XML object {accession_id[0]} was deleted") - assert resp.status == 404, "HTTP Status code error" + assert resp.status == 404, f"HTTP Status code error, got {resp.status}" async with sess.get(f"{folders_url}/{folder_id}") as resp: - LOG.debug(f"Checking that object {accession_id} was deleted from folder {folder_id}") + LOG.debug(f"Checking that object {accession_id[0]} was deleted from folder {folder_id}") res = await resp.json() - expected_true = not any(d["accessionId"] == accession_id for d in res["metadataObjects"]) - assert expected_true, "draft object still exists" + expected_true = not any(d["accessionId"] == accession_id[0] for d in res["metadataObjects"]) + assert expected_true, f"object {accession_id[0]} still exists" async def test_csv(sess, folder_id): @@ -558,7 +558,6 @@ async def test_csv(sess, folder_id): _filename = "EGAformat.csv" accession_id = await post_object(sess, _schema, _filename) # there are 3 rows but only 2 are correct - print(accession_id) assert len(accession_id[0]) == 3, f"expected nb of CSV entries does not match, we got: {len(accession_id)}" _first_csv_row_id = accession_id[0][0]["accessionId"] patch_object = [ @@ -631,18 +630,18 @@ async def test_crud_drafts_works(sess, schema, orginal_file, update_file, folder accession_id = await put_draft(sess, schema, draft_id, update_file) async with sess.get(f"{drafts_url}/{schema}/{accession_id}") as resp: LOG.debug(f"Checking that {accession_id} JSON is in {schema}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" await delete_draft(sess, schema, accession_id) async with sess.get(f"{drafts_url}/{schema}/{accession_id}") as resp: LOG.debug(f"Checking that JSON object {accession_id} was deleted") - assert resp.status == 404, "HTTP Status code error" + assert resp.status == 404, f"HTTP Status code error, got {resp.status}" async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that JSON object {accession_id} was deleted from folder {folder_id}") res = await resp.json() expected_true = not any(d["accessionId"] == accession_id for d in res["drafts"]) - assert expected_true, "draft object still exists" + assert expected_true, f"draft object {accession_id} still exists" async def test_patch_drafts_works(sess, schema, orginal_file, update_file, folder_id): @@ -669,12 +668,12 @@ async def test_patch_drafts_works(sess, schema, orginal_file, update_file, folde res = await resp.json() assert res["centerName"] == "GEOM", "object centerName content mismatch" assert res["alias"] == "GSE10968", "object alias content mismatch" - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" await delete_draft(sess, schema, accession_id) async with sess.get(f"{drafts_url}/{schema}/{accession_id}") as resp: LOG.debug(f"Checking that JSON object {accession_id} was deleted") - assert resp.status == 404, "HTTP Status code error" + assert resp.status == 404, f"HTTP Status code error, got {resp.status}" async def test_querying_works(sess, folder_id): @@ -722,7 +721,7 @@ async def test_querying_works(sess, folder_id): async def do_one_query(schema, key, value, expected_status): async with sess.get(f"{objects_url}/{schema}?{key}={value}") as resp: - assert resp.status == expected_status, "HTTP Status code error" + assert resp.status == expected_status, f"HTTP Status code error, got {resp.status}" for schema, schema_queries in queries.items(): LOG.debug(f"Querying {schema} collection with working params") @@ -789,7 +788,7 @@ async def test_crud_folders_works(sess): folder_id = await post_folder(sess, folder_data) async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was created") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" # Create draft from test XML file and patch the draft into the newly created folder draft_id = await post_draft(sess, "sample", "SRS001433.xml") @@ -811,7 +810,7 @@ async def test_crud_folders_works(sess): draft_data = await get_draft(sess, "sample", draft_id) async with sess.post(f"{objects_url}/sample", data=draft_data) as resp: LOG.debug("Adding draft to actual objects") - assert resp.status == 201, "HTTP Status code error" + assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() assert ans["accessionId"] != draft_id, "draft id does not match expected" accession_id = ans["accessionId"] @@ -854,7 +853,7 @@ async def test_crud_folders_works(sess): async with sess.get(f"{drafts_url}/sample/{draft_id}") as resp: LOG.debug(f"Checking that JSON object {accession_id} was deleted") - assert resp.status == 404, "HTTP Status code error" + assert resp.status == 404, f"HTTP Status code error, got {resp.status}" async def test_crud_folders_works_no_publish(sess): @@ -867,7 +866,7 @@ async def test_crud_folders_works_no_publish(sess): folder_id = await post_folder(sess, folder_data) async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was created") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" # Create draft from test XML file and patch the draft into the newly created folder draft_id = await post_draft(sess, "sample", "SRS001433.xml") @@ -889,7 +888,7 @@ async def test_crud_folders_works_no_publish(sess): draft = await get_draft(sess, "sample", draft_id) async with sess.post(f"{objects_url}/sample", data=draft) as resp: LOG.debug("Adding draft to actual objects") - assert resp.status == 201, "HTTP Status code error" + assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() assert ans["accessionId"] != draft_id, "draft id does not match expected" accession_id = ans["accessionId"] @@ -913,7 +912,7 @@ async def test_crud_folders_works_no_publish(sess): await delete_folder(sess, folder_id) async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was deleted") - assert resp.status == 404, "HTTP Status code error" + assert resp.status == 404, f"HTTP Status code error, got {resp.status}" async with sess.get(f"{users_url}/current") as resp: LOG.debug(f"Checking that folder {folder_id} was deleted from current user") @@ -932,7 +931,7 @@ async def test_adding_doi_info_to_folder_works(sess): folder_id = await post_folder(sess, folder_data) async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was created") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" # Get correctly formatted DOI info and patch it into the new folder successfully doi_data_raw = await create_request_json_data("doi", "test_doi.json") @@ -953,7 +952,7 @@ async def test_adding_doi_info_to_folder_works(sess): patch_add_bad_doi = [{"op": "add", "path": "/doiInfo", "value": {"identifier": {}}}] async with sess.patch(f"{folders_url}/{folder_id}", data=json.dumps(patch_add_bad_doi)) as resp: LOG.debug(f"Tried updating folder {folder_id}") - assert resp.status == 400, "HTTP Status code error" + assert resp.status == 400, f"HTTP Status code error, got {resp.status}" res = await resp.json() assert res["detail"] == "Provided input does not seem correct for field: 'doiInfo'", "expected error mismatch" @@ -967,7 +966,7 @@ async def test_adding_doi_info_to_folder_works(sess): patch_add_bad_doi = [{"op": "add", "path": "/extraInfo", "value": {"publisher": "something"}}] async with sess.patch(f"{folders_url}/{folder_id}", data=json.dumps(patch_add_bad_doi)) as resp: LOG.debug(f"Tried updating folder {folder_id}") - assert resp.status == 400, "HTTP Status code error" + assert resp.status == 400, f"HTTP Status code error, got {resp.status}" res = await resp.json() assert res["detail"] == "Request contains '/extraInfo' key that cannot be updated to folders.", "error mismatch" @@ -975,7 +974,7 @@ async def test_adding_doi_info_to_folder_works(sess): await delete_folder(sess, folder_id) async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was deleted") - assert resp.status == 404, "HTTP Status code error" + assert resp.status == 404, f"HTTP Status code error, got {resp.status}" async def test_getting_paginated_folders(sess): @@ -1152,7 +1151,7 @@ async def test_getting_user_items(sess): # Get real user ID async with sess.get(f"{users_url}/{user_id}") as resp: LOG.debug(f"Reading user {user_id}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" # Add template to user template_id = await post_template_json(sess, "study", "SRP000539_template.json") @@ -1160,7 +1159,7 @@ async def test_getting_user_items(sess): # Test querying for list of user draft templates async with sess.get(f"{users_url}/{user_id}?items=templates") as resp: LOG.debug(f"Reading user {user_id} templates") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" ans = await resp.json() assert ans["page"]["page"] == 1 assert ans["page"]["size"] == 5 @@ -1170,7 +1169,7 @@ async def test_getting_user_items(sess): async with sess.get(f"{users_url}/{user_id}?items=templates&per_page=3") as resp: LOG.debug(f"Reading user {user_id} templates") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" ans = await resp.json() assert ans["page"]["page"] == 1 assert ans["page"]["size"] == 3 @@ -1181,7 +1180,7 @@ async def test_getting_user_items(sess): # Test querying for the list of folder IDs async with sess.get(f"{users_url}/{user_id}?items=folders") as resp: LOG.debug(f"Reading user {user_id} folder list") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" ans = await resp.json() assert ans["page"]["page"] == 1 assert ans["page"]["size"] == 5 @@ -1192,7 +1191,7 @@ async def test_getting_user_items(sess): # Test the same with a bad query param async with sess.get(f"{users_url}/{user_id}?items=bad") as resp: LOG.debug(f"Reading user {user_id} but with faulty item descriptor") - assert resp.status == 400, "HTTP Status code error" + assert resp.status == 400, f"HTTP Status code error, got {resp.status}" async def test_crud_users_works(sess): @@ -1203,7 +1202,7 @@ async def test_crud_users_works(sess): # Check user exists in database (requires an user object to be mocked) async with sess.get(f"{users_url}/{user_id}") as resp: LOG.debug(f"Reading user {user_id}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" response = await resp.json() real_user_id = response["userId"] @@ -1286,7 +1285,7 @@ async def test_crud_users_works(sess): # this check is not needed but good to do async with sess.get(f"{users_url}/{user_id}") as resp: LOG.debug(f"Checking that user {user_id} was deleted") - assert resp.status == 401, "HTTP Status code error" + assert resp.status == 401, f"HTTP Status code error, got {resp.status}" async def test_get_folders(sess, folder_id: str): @@ -1297,7 +1296,7 @@ async def test_get_folders(sess, folder_id: str): """ async with sess.get(f"{folders_url}") as resp: LOG.debug(f"Reading folder {folder_id}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" response = await resp.json() assert len(response["folders"]) == 1 assert response["page"] == {"page": 1, "size": 5, "totalPages": 1, "totalFolders": 1} @@ -1317,7 +1316,7 @@ async def test_get_folders_objects(sess, folder_id: str): await patch_folder(sess, folder_id, patch_add_object) async with sess.get(f"{folders_url}") as resp: LOG.debug(f"Reading folder {folder_id}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" response = await resp.json() assert len(response["folders"]) == 1 assert response["folders"][0]["metadataObjects"][0]["accessionId"] == accession_id @@ -1332,7 +1331,7 @@ async def test_get_folders_objects(sess, folder_id: str): await patch_folder(sess, folder_id, patch_add_more_object) async with sess.get(f"{folders_url}") as resp: LOG.debug(f"Reading folder {folder_id}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" response = await resp.json() assert len(response["folders"]) == 1 assert response["folders"][0]["metadataObjects"][0]["accessionId"] == accession_id @@ -1348,7 +1347,7 @@ async def test_get_folders_objects(sess, folder_id: str): await patch_folder(sess, folder_id, patch_change_tags_object) async with sess.get(f"{folders_url}") as resp: LOG.debug(f"Reading folder {folder_id}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" response = await resp.json() assert len(response["folders"]) == 1 assert response["folders"][0]["metadataObjects"][0]["accessionId"] == accession_id @@ -1366,7 +1365,7 @@ async def test_submissions_work(sess, folder_id): submission_data = await create_multi_file_request_data(sub_files) async with sess.post(f"{submit_url}", data=submission_data) as resp: LOG.debug("Checking initial submission worked") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" res = await resp.json() assert len(res) == 2, "expected 2 objects" assert res[0]["schema"] == "study", "expected first element to be study" @@ -1389,7 +1388,7 @@ async def test_submissions_work(sess, folder_id): # Sanity check that the study object was inserted correctly before modifying it async with sess.get(f"{objects_url}/study/{study_access_id}") as resp: LOG.debug("Sanity checking that previous object was added correctly") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" res = await resp.json() assert res["accessionId"] == study_access_id, "study accession id does not match" assert res["alias"] == "GSE10966", "study alias does not match" @@ -1411,7 +1410,7 @@ async def test_submissions_work(sess, folder_id): more_submission_data = await create_multi_file_request_data(sub_files) async with sess.post(f"{submit_url}", data=more_submission_data) as resp: LOG.debug("Checking object in initial submission was modified") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" res = await resp.json() assert len(res) == 2, "expected 2 objects" new_study_access_id = res[0]["accessionId"] @@ -1420,7 +1419,7 @@ async def test_submissions_work(sess, folder_id): # Check the modified object was inserted correctly async with sess.get(f"{objects_url}/study/{new_study_access_id}") as resp: LOG.debug("Checking that previous object was modified correctly") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" res = await resp.json() assert res["accessionId"] == new_study_access_id, "study accession id does not match" assert res["alias"] == "GSE10966", "study alias does not match" @@ -1445,7 +1444,7 @@ async def test_health_check(sess): """ async with sess.get(f"{base_url}/health") as resp: LOG.debug("Checking that health status is ok") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" res = await resp.json() assert res["status"] == "Ok" assert res["services"]["database"]["status"] == "Ok" @@ -1574,7 +1573,7 @@ async def main(): await login(sess, other_test_user, other_test_user_given, other_test_user_family) async with sess.get(f"{users_url}/{user_id}") as resp: LOG.debug(f"Reading user {user_id}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" response = await resp.json() real_user_id = response["userId"] await delete_user(sess, real_user_id) From 0007484e7c620e804baf4d5e09a59fd4dfabbc5c Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 30 Dec 2021 12:48:22 +0200 Subject: [PATCH 137/336] fail multipartreader with HTTPUnsupportedMediaType --- metadata_backend/api/handlers/common.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/metadata_backend/api/handlers/common.py b/metadata_backend/api/handlers/common.py index b285b5f81..43ec14f70 100644 --- a/metadata_backend/api/handlers/common.py +++ b/metadata_backend/api/handlers/common.py @@ -39,11 +39,13 @@ async def multipart_content( part = await reader.next() # we expect a simple body part (BodyPartReader) instance here # otherwise, it will be another MultipartReader instance for the nested multipart. - # we don't need to cast the part BodyPartReader, we fail if we get anything else + # we don't need to cast the part BodyPartReader, we fail if we get anything else. + # MultipartReader is aimed at ``multiplart/mixed``, ``multipart/related`` content + # we will be working with ``multipart/form-data`` only. if isinstance(part, MultipartReader): - reason = "Only one file can be sent to this endpoint at a time." + reason = "We cannot work nested multipart content." LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) + raise web.HTTPUnsupportedMediaType(reason=reason) if not part: break if extract_one and (xml_files or csv_files): From 6a4a88c92ee85b9dfeabb8bc1b86285a7ef4bd39 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 3 Jan 2022 09:04:19 +0000 Subject: [PATCH 138/336] Bump jsonschema from 4.3.2 to 4.3.3 Bumps [jsonschema](https://github.com/Julian/jsonschema) from 4.3.2 to 4.3.3. - [Release notes](https://github.com/Julian/jsonschema/releases) - [Changelog](https://github.com/Julian/jsonschema/blob/main/CHANGELOG.rst) - [Commits](https://github.com/Julian/jsonschema/compare/v4.3.2...v4.3.3) --- updated-dependencies: - dependency-name: jsonschema dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 506c37b11..2f295d731 100644 --- a/requirements.txt +++ b/requirements.txt @@ -40,7 +40,7 @@ idna==3.3 # via # requests # yarl -jsonschema==4.3.2 +jsonschema==4.3.3 # via -r requirements.in motor==2.5.1 # via -r requirements.in From 93f269072e19f76f2599e8adf13214926494c6fd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 3 Jan 2022 13:40:40 +0000 Subject: [PATCH 139/336] Bump tox from 3.24.4 to 3.24.5 Bumps [tox](https://github.com/tox-dev/tox) from 3.24.4 to 3.24.5. - [Release notes](https://github.com/tox-dev/tox/releases) - [Changelog](https://github.com/tox-dev/tox/blob/master/docs/changelog.rst) - [Commits](https://github.com/tox-dev/tox/compare/3.24.4...3.24.5) --- updated-dependencies: - dependency-name: tox dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 33e80effe..79fc88bde 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ packages=find_packages(exclude=["tests"]), install_requires=requirements, extras_require={ - "test": ["coverage==6.2", "pytest==6.2.5", "pytest-cov==3.0.0", "tox==3.24.4"], + "test": ["coverage==6.2", "pytest==6.2.5", "pytest-cov==3.0.0", "tox==3.24.5"], "docs": ["sphinx >= 1.4", "sphinx_rtd_theme==1.0.0"], }, package_data={ From 2859d34030982741902726b04e188fa51b58af1f Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 4 Jan 2022 14:46:56 +0200 Subject: [PATCH 140/336] correct comment in integration tests --- tests/integration/run_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index 74b4c6b4c..6f5bb3e9c 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -557,7 +557,7 @@ async def test_csv(sess, folder_id): _schema = "sample" _filename = "EGAformat.csv" accession_id = await post_object(sess, _schema, _filename) - # there are 3 rows but only 2 are correct + # there are 3 rows and we expected to get 3rd assert len(accession_id[0]) == 3, f"expected nb of CSV entries does not match, we got: {len(accession_id)}" _first_csv_row_id = accession_id[0][0]["accessionId"] patch_object = [ From c92752790488aa226c07921becdc6ac630785fad Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Tue, 21 Dec 2021 11:16:41 +0200 Subject: [PATCH 141/336] refactor auth.py to use oidcrp #157 --- .env.example | 4 +- .gitignore | 4 + docker-compose-tls.yml | 2 - docker-compose.yml | 2 - docs/submitter.rst | 22 ++-- metadata_backend/api/auth.py | 191 ++++++++-------------------- metadata_backend/api/middlewares.py | 4 +- metadata_backend/conf/conf.py | 11 +- requirements.in | 2 +- requirements.txt | 43 ++++++- 10 files changed, 108 insertions(+), 177 deletions(-) diff --git a/.env.example b/.env.example index 33b5ede87..81e90b774 100644 --- a/.env.example +++ b/.env.example @@ -2,10 +2,8 @@ AAI_CLIENT_SECRET=secret_must_be_long AAI_CLIENT_ID=aud2 ISS_URL=http://mockauth:8000 -AUTH_URL=http://localhost:8000/authorize -OIDC_URL=http://mockauth:8000 +OIDC_URL=http://mockauth:8000/.well-known/openid-configuration AUTH_REFERER=http://mockauth:8000 -JWK_URL=http://mockauth:8000/keyset # app urls BASE_URL=http://localhost:5430 diff --git a/.gitignore b/.gitignore index 5500c2d5d..9ecefa70c 100644 --- a/.gitignore +++ b/.gitignore @@ -120,3 +120,7 @@ venv.bak/ metadata_backend/frontend/* config/* + +# oidcrp generated directories that store JWKs +private +static diff --git a/docker-compose-tls.yml b/docker-compose-tls.yml index 8baa04cbc..9d913a392 100644 --- a/docker-compose-tls.yml +++ b/docker-compose-tls.yml @@ -24,10 +24,8 @@ services: - "AAI_CLIENT_SECRET=${AAI_CLIENT_SECRET}" - "AAI_CLIENT_ID=${AAI_CLIENT_ID}" - "ISS_URL=${ISS_URL}" - - "AUTH_URL=${AUTH_URL}" - "OIDC_URL=${OIDC_URL}" - "AUTH_REFERER=${AUTH_REFERER}" - - "JWK_URL=${JWK_URL}" - "BASE_URL=${BASE_URL}" # Enable this for working with front-end on localhost # - "REDIRECT_URL=${REDIRECT_URL}" diff --git a/docker-compose.yml b/docker-compose.yml index 78de48024..3902fb587 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -19,10 +19,8 @@ services: - "AAI_CLIENT_SECRET=${AAI_CLIENT_SECRET}" - "AAI_CLIENT_ID=${AAI_CLIENT_ID}" - "ISS_URL=${ISS_URL}" - - "AUTH_URL=${AUTH_URL}" - "OIDC_URL=${OIDC_URL}" - "AUTH_REFERER=${AUTH_REFERER}" - - "JWK_URL=${JWK_URL}" - "BASE_URL=${BASE_URL}" # Enable this for working with front-end on localhost # - "REDIRECT_URL=${REDIRECT_URL}" diff --git a/docs/submitter.rst b/docs/submitter.rst index 14f70cd3e..fe1eec0f5 100644 --- a/docs/submitter.rst +++ b/docs/submitter.rst @@ -47,16 +47,15 @@ the table below. +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``ISS_URL`` | ``-`` | OIDC claim issuer URL. | Yes | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``AUTH_URL`` | ``-`` | Set if a special OIDC authorize URL is required, | No | -| | | otherwise use ``"OIDC_URL"/authorize``. | | +| ``AUTH_METHOD` | ``code` | OIDC Authentication method to use. | No | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``OIDC_URL`` | ``-`` | OIDC base URL for constructing OIDC provider endpoint calls. | Yes | +| ``OIDC_URL`` | ``-`` | OIDC URL that exposes /.well-known/openid-configuration values | Yes | ++--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ +| ``OIDC_SCOPE`` | ``openid profile email`` | Claims to request from AAI | No | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``REDIRECT_URL`` | ``-`` | Required only for testing with front-end on ``localhost`` or change to | No | | | | ``http://frontend:3000`` if started using ``docker-compose`` (see :ref:`deploy`). | | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``JWK_URL`` | ``-`` | JWK OIDC URL for retrieving key for validating ID token. | Yes | -+--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``LOG_LEVEL`` | ``INFO`` | Set logging level, uppercase. | No | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``SERVE_KEY`` | ``-`` | Keyfile used for TLS. | No | @@ -118,11 +117,9 @@ The Authentication follows the `OIDC Specification /.well-known/openid-configuration``. +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``ISS_URL`` | ``-`` | OIDC claim issuer URL. | Yes | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``AUTH_URL`` | ``-`` | Set if a special OIDC authorize URL is required, | No | -| | | otherwise use ``"OIDC_URL"/authorize``. | | +| ``AUTH_METHOD`` | ``code`` | OIDC Authentication method to use. | No | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``OIDC_URL`` | ``-`` | OIDC base URL for constructing OIDC provider endpoint calls. | Yes | +| ``OIDC_URL`` | ``-`` | OIDC URL that exposes /.well-known/openid-configuration values. | Yes | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``JWK_URL`` | ``-`` | JWK OIDC URL for retrieving key for validating ID token. | Yes | +| ``OIDC_SCOPE`` | ``openid profile email`` | Claims to request from AAI | No | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ REST API diff --git a/metadata_backend/api/auth.py b/metadata_backend/api/auth.py index 6be0e740d..4aeb594c5 100644 --- a/metadata_backend/api/auth.py +++ b/metadata_backend/api/auth.py @@ -1,18 +1,13 @@ """Handle Access for request and OIDC workflow.""" -import secrets -import urllib.parse import hashlib import ujson -from aiohttp import web, BasicAuth, ClientSession +from aiohttp import web from aiohttp.web import Request, Response from .middlewares import decrypt_cookie, generate_cookie -from authlib.jose import jwt -from authlib.oidc.core import CodeIDToken -from authlib.jose.errors import MissingClaimError, InvalidClaimError, ExpiredTokenError, InvalidTokenError, DecodeError -from multidict import CIMultiDict from .operators import UserOperator +from oidcrp.rp_handler import RPHandler from typing import Dict, Tuple @@ -32,40 +27,39 @@ def __init__(self, aai: Dict) -> None: self.client_id = aai["client_id"] self.client_secret = aai["client_secret"] self.callback_url = aai["callback_url"] - self.auth_url = aai["auth_url"] - self.token_url = aai["token_url"] - self.revoke_url = aai["revoke_url"] - self.scope = aai["scope"] - self.jwk = aai["jwk_server"] + self.oidc_url = aai["oidc_url"] self.iss = aai["iss"] - self.user_info = aai["user_info"] - self.nonce = secrets.token_hex() + self.scope = aai["scope"] + self.auth_method = aai["auth_method"] + + self.oidc_conf = { + "aai": { + "issuer": self.iss, + "client_id": self.client_id, + "client_secret": self.client_secret, + "redirect_uris": [self.redirect], + "behaviour": { + "response_types": self.auth_method.split(" "), + "scope": self.scope.split(" "), + }, + }, + } + self.rph = RPHandler(self.oidc_url, client_configs=self.oidc_conf) async def login(self, req: Request) -> Response: """Redirect user to AAI login. - :param req: A HTTP request instance + :param req: A HTTP request instance (unused) :raises: HTTPSeeOther redirect to login AAI """ - # Generate a state for callback and save it to session storage - state = secrets.token_hex() - req.app["OIDC_State"].add(state) - LOG.debug("Start login") - # Parameters for authorisation request - params = { - "client_id": self.client_id, - "response_type": "code", - "state": state, - "redirect_uri": self.callback_url, - "scope": self.scope, - "nonce": self.nonce, - } - # Prepare response - url = f"{self.auth_url}?{urllib.parse.urlencode(params)}" - response = web.HTTPSeeOther(url) - response.headers["Location"] = url + # Generate authentication payload + session = self.rph.begin("aai") + + # Redirect user to AAI + response = web.HTTPSeeOther(session["url"]) + response.headers["Location"] = session["url"] raise response async def callback(self, req: Request) -> Response: @@ -79,6 +73,7 @@ async def callback(self, req: Request) -> Response: :param req: A HTTP request instance with callback parameters :returns: HTTPSeeOther redirect to home page """ + # Response from AAI must have the query params `state` and `code` if "state" in req.query and "code" in req.query: LOG.debug("AAI response contained the correct params.") @@ -88,34 +83,16 @@ async def callback(self, req: Request) -> Response: LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - # Verify, that state is pending - if not params["state"] in req.app["OIDC_State"]: + # Verify oidc_state and retrieve auth session + session = self.rph.get_session_information(params["state"]) + if session is None: raise web.HTTPForbidden(reason="Bad user session.") - auth = BasicAuth(login=self.client_id, password=self.client_secret) - data = {"grant_type": "authorization_code", "code": params["code"], "redirect_uri": self.callback_url} - - # Set up client authentication for request - async with ClientSession(auth=auth) as sess: - # Send request to AAI - async with sess.post(f"{self.token_url}", data=data) as resp: - LOG.debug(f"AAI response status: {resp.status}.") - # Validate response from AAI - if resp.status == 200: - result = await resp.json() - if all(x in result for x in ["id_token", "access_token"]): - LOG.debug("Both ID and Access tokens received.") - access_token = result["access_token"] - id_token = result["id_token"] - await self._validate_jwt(id_token) - else: - reason = "AAI response did not contain access and id tokens." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - else: - reason = f"Token request to AAI failed: {resp}" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) + # Place authorization_code to session for finalize step + session["auth_request"]["code"] = params["code"] + + # finalize requests id_token and access_token with code, validates them and requests userinfo data + session = self.rph.finalize(session["iss"], session["auth_request"]) response = web.HTTPSeeOther(f"{self.redirect}/home") @@ -146,11 +123,21 @@ async def callback(self, req: Request) -> Response: session_id = cookie["id"] - req.app["Session"][session_id] = {"oidc_state": params["state"], "access_token": access_token} + req.app["Session"][session_id] = {"oidc_state": params["state"], "access_token": session["token"]} req.app["Cookies"].add(session_id) - req.app["OIDC_State"].remove(params["state"]) - await self._set_user(req, session_id, access_token) + user_data: Tuple[str, str] + if "eppn" in session["userinfo"]: + user_data = ( + session["userinfo"]["eppn"], + f"{session['userinfo']['given_name']} {session['userinfo']['family_name']}", + ) + elif "sub" in session["userinfo"]: + user_data = ( + session["userinfo"]["sub"], + f"{session['userinfo']['given_name']} {session['userinfo']['family_name']}", + ) + await self._set_user(req, session_id, user_data) # done like this otherwise it will not redirect properly response.headers["Location"] = "/home" if self.redirect == self.domain else f"{self.redirect}/home" @@ -166,6 +153,7 @@ async def logout(self, req: Request) -> Response: :returns: HTTPSeeOther redirect to login page """ # Revoke token at AAI + # Implement, when revocation_endpoint is supported by AAI try: cookie = decrypt_cookie(req) @@ -185,91 +173,16 @@ async def logout(self, req: Request) -> Response: raise response - async def _set_user(self, req: Request, session_id: str, token: str) -> None: + async def _set_user(self, req: Request, session_id: str, user_data: Tuple[str, str]) -> None: """Set user in current session and return user id based on result of create_user. :raises: HTTPBadRequest in could not get user info from AAI OIDC :param req: A HTTP request instance - :param token: access token from AAI + :param user_data: user id and given name """ - user_data: Tuple[str, str] - try: - headers = CIMultiDict({"Authorization": f"Bearer {token}"}) - async with ClientSession(headers=headers) as sess: - async with sess.get(f"{self.user_info}") as resp: - result = await resp.json() - if "eppn" in result: - user_data = result["eppn"], f"{result['given_name']} {result['family_name']}" - elif "sub" in result: - user_data = result["sub"], f"{result['given_name']} {result['family_name']}" - else: - LOG.error("Could not set user, missing claim eppn or sub.") - raise web.HTTPBadRequest(reason="Could not set user, missing claim eppn or sub.") - except Exception as e: - LOG.error(f"Could not get information from AAI UserInfo endpoint because of: {e}") - raise web.HTTPBadRequest(reason="Could not get information from AAI UserInfo endpoint.") + LOG.debug("Create and set user to database") db_client = req.app["db_client"] operator = UserOperator(db_client) user_id = await operator.create_user(user_data) req.app["Session"][session_id]["user_info"] = user_id - - async def _get_key(self) -> dict: - """Get OAuth2 public key and transform it to usable pem key. - - :raises: HTTPUnauthorized in case JWK could not be retrieved - :returns: dictionary with JWK (JSON Web Keys) - """ - try: - async with ClientSession() as session: - async with session.get(self.jwk) as r: - # This can be a single key or a list of JWK - return await r.json() - except Exception: - raise web.HTTPUnauthorized(reason="JWK cannot be retrieved") - - async def _validate_jwt(self, token: str) -> None: - """Validate id token from AAI according to OIDC specs. - - :raises: HTTPUnauthorized in case token is missing claim, has expired signature or invalid - :raises: HTTPForbidden does not provide access to the token received - :param token: id token received from AAI - """ - key = await self._get_key() # JWK used to decode token with - claims_options = { - "iss": { - "essential": True, - "values": self.iss, - }, - "aud": {"essential": True, "value": self.client_id}, - "exp": {"essential": True}, - "iat": {"essential": True}, - } - claims_params = { - "auth_time": {"essential": True}, - "acr": { - "essential": True, - "values": f"{self.iss}/LoginHaka,{self.iss}/LoginCSC", - }, - "nonce": self.nonce, - } - try: - LOG.debug("Validate ID Token") - - decoded_data = jwt.decode( - token, key, claims_options=claims_options, claims_params=claims_params, claims_cls=CodeIDToken - ) # decode the token - decoded_data.validate() # validate the token contents - # Testing the exceptions is done in integration tests - except MissingClaimError as e: - raise web.HTTPUnauthorized(reason=f"Missing claim(s): {e}") - except ExpiredTokenError as e: - raise web.HTTPUnauthorized(reason=f"Expired signature: {e}") - except InvalidClaimError as e: - raise web.HTTPForbidden(reason=f"Token info not corresponding with claim: {e}") - except InvalidTokenError as e: - raise web.HTTPUnauthorized(reason=f"Invalid authorization token: {e}") - except DecodeError as e: - raise web.HTTPUnauthorized(reason=f"Invalid JWT format: {e}") - except Exception: - raise web.HTTPForbidden(reason="No access") diff --git a/metadata_backend/api/middlewares.py b/metadata_backend/api/middlewares.py index beaae4127..6efc4f158 100644 --- a/metadata_backend/api/middlewares.py +++ b/metadata_backend/api/middlewares.py @@ -112,7 +112,7 @@ async def check_login(request: Request, handler: Callable) -> StreamResponse: or (request.path.startswith("/") and request.path.endswith(tuple([".svg", ".jpg", ".ico", ".json"]))) ): return await handler(request) - if request.path.startswith(tuple(controlled_paths)) and "OIDC_URL" in os.environ and bool(os.getenv("OIDC_URL")): + if request.path.startswith(tuple(controlled_paths)) and "ISS_URL" in os.environ and bool(os.getenv("ISS_URL")): cookie = decrypt_cookie(request) session = request.app["Session"].setdefault(cookie["id"], {}) if not all(x in ["access_token", "user_info", "oidc_state"] for x in session): @@ -129,7 +129,7 @@ async def check_login(request: Request, handler: Callable) -> StreamResponse: raise web.HTTPUnauthorized(headers={"WWW-Authenticate": 'OAuth realm="/", charset="UTF-8"'}) return await handler(request) - elif "OIDC_URL" in os.environ and bool(os.getenv("OIDC_URL")): + elif "ISS_URL" in os.environ and bool(os.getenv("ISS_URL")): LOG.debug(f"not authorised to view this page {request.path}") raise web.HTTPUnauthorized(headers={"WWW-Authenticate": 'OAuth realm="/", charset="UTF-8"'}) else: diff --git a/metadata_backend/conf/conf.py b/metadata_backend/conf/conf.py index 1a997b516..7c4a67171 100644 --- a/metadata_backend/conf/conf.py +++ b/metadata_backend/conf/conf.py @@ -144,17 +144,12 @@ def create_db_client() -> AsyncIOMotorClient: "redirect": f'{os.getenv("REDIRECT_URL")}' if bool(os.getenv("REDIRECT_URL")) else os.getenv("BASE_URL", "http://localhost:5430"), - "scope": "openid profile email", + "scope": os.getenv("OIDC_SCOPE", "openid profile email"), "iss": os.getenv("ISS_URL", ""), "callback_url": f'{os.getenv("BASE_URL", "http://localhost:5430").rstrip("/")}/callback', - "auth_url": f'{os.getenv("AUTH_URL", "")}' - if bool(os.getenv("AUTH_URL")) - else f'{os.getenv("OIDC_URL", "").rstrip("/")}/authorize', - "token_url": f'{os.getenv("OIDC_URL", "").rstrip("/")}/token', - "user_info": f'{os.getenv("OIDC_URL", "").rstrip("/")}/userinfo', - "revoke_url": f'{os.getenv("OIDC_URL", "").rstrip("/")}/revoke', - "jwk_server": f'{os.getenv("JWK_URL", "")}', + "oidc_url": os.getenv("OIDC_URL", "http://auth.org/.well-known/openid-configuration"), "auth_referer": f'{os.getenv("AUTH_REFERER", "")}', + "auth_method": os.getenv("AUTH_METHOD", "code"), } diff --git a/requirements.in b/requirements.in index 691582bf4..8b6c2f4d0 100644 --- a/requirements.in +++ b/requirements.in @@ -8,4 +8,4 @@ requests uvloop xmlschema ujson -Authlib +oidcrp diff --git a/requirements.txt b/requirements.txt index 2f295d731..3ff5e5cbf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with python 3.8 # To update, run: # -# pip-compile requirements.in +# pip-compile # aiohttp==3.8.1 # via -r requirements.in @@ -14,8 +14,6 @@ attrs==21.2.0 # via # aiohttp # jsonschema -authlib==0.15.5 - # via -r requirements.in certifi==2021.10.8 # via requests cffi==1.15.0 @@ -27,9 +25,14 @@ charset-normalizer==2.0.7 cryptography==36.0.1 # via # -r requirements.in - # authlib + # cryptojwt + # pyopenssl +cryptojwt==1.6.1 + # via oidcmsg elementpath==2.4.0 # via xmlschema +filelock==3.4.0 + # via oidcmsg frozenlist==1.2.0 # via # aiohttp @@ -40,6 +43,8 @@ idna==3.3 # via # requests # yarl +importlib-resources==5.4.0 + # via jsonschema jsonschema==4.3.3 # via -r requirements.in motor==2.5.1 @@ -48,30 +53,54 @@ multidict==5.2.0 # via # aiohttp # yarl +oidcmsg==1.5.4 + # via oidcrp +oidcrp==2.1.3 + # via -r requirements.in pycparser==2.21 # via cffi pymongo==3.12.1 # via motor +pyopenssl==21.0.0 + # via oidcmsg pyrsistent==0.18.0 # via jsonschema python-dateutil==2.8.2 # via -r requirements.in +pyyaml==6.0 + # via + # oidcmsg + # oidcrp +readerwriterlock==1.0.9 + # via cryptojwt requests==2.26.0 - # via -r requirements.in + # via + # -r requirements.in + # cryptojwt + # responses +responses==0.16.0 + # via oidcrp six==1.16.0 - # via python-dateutil + # via + # pyopenssl + # python-dateutil + # responses typing-extensions==4.0.0 # via async-timeout ujson==5.1.0 # via -r requirements.in urllib3==1.26.7 - # via requests + # via + # requests + # responses uvloop==0.16.0 # via -r requirements.in xmlschema==1.9.2 # via -r requirements.in yarl==1.7.2 # via aiohttp +zipp==3.6.0 + # via importlib-resources # The following packages are considered to be unsafe in a requirements file: # setuptools From 2ac5cae624e2b369b7f3eea0fa7e11fc57a1d98d Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Tue, 21 Dec 2021 13:31:03 +0200 Subject: [PATCH 142/336] remove deprecated envs --- .env.example | 4 +--- .github/workflows/int.yml | 4 ++-- docker-compose-tls.yml | 1 - docker-compose.yml | 2 -- docs/submitter.rst | 16 +++++----------- metadata_backend/api/auth.py | 4 ++-- metadata_backend/api/middlewares.py | 6 +++--- metadata_backend/conf/conf.py | 4 +--- tests/integration/run_tests.py | 2 +- 9 files changed, 15 insertions(+), 28 deletions(-) diff --git a/.env.example b/.env.example index 81e90b774..3da080cc5 100644 --- a/.env.example +++ b/.env.example @@ -1,9 +1,7 @@ # authentication AAI_CLIENT_SECRET=secret_must_be_long AAI_CLIENT_ID=aud2 -ISS_URL=http://mockauth:8000 -OIDC_URL=http://mockauth:8000/.well-known/openid-configuration -AUTH_REFERER=http://mockauth:8000 +OIDC_URL=http://mockauth:8000 # app urls BASE_URL=http://localhost:5430 diff --git a/.github/workflows/int.yml b/.github/workflows/int.yml index fb90168c4..bd2d36633 100644 --- a/.github/workflows/int.yml +++ b/.github/workflows/int.yml @@ -42,7 +42,7 @@ jobs: python tests/integration/run_tests.py env: BASE_URL: http://localhost:5430 - ISS_URL: http://localhost:8000 + OIDC_URL: http://localhost:8000 MONGO_HOST: localhost:27017 MONGO_DATABASE: default MONGO_AUTHDB: admin @@ -84,7 +84,7 @@ jobs: python tests/integration/run_tests.py env: BASE_URL: http://localhost:5430 - ISS_URL: http://localhost:8000 + OIDC_URL: http://localhost:8000 MONGO_SSL: True - name: Collect logs from docker diff --git a/docker-compose-tls.yml b/docker-compose-tls.yml index 9d913a392..0ce8d5c97 100644 --- a/docker-compose-tls.yml +++ b/docker-compose-tls.yml @@ -25,7 +25,6 @@ services: - "AAI_CLIENT_ID=${AAI_CLIENT_ID}" - "ISS_URL=${ISS_URL}" - "OIDC_URL=${OIDC_URL}" - - "AUTH_REFERER=${AUTH_REFERER}" - "BASE_URL=${BASE_URL}" # Enable this for working with front-end on localhost # - "REDIRECT_URL=${REDIRECT_URL}" diff --git a/docker-compose.yml b/docker-compose.yml index 3902fb587..27809132b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,9 +18,7 @@ services: - "MONGO_HOST=${MONGO_HOST}" - "AAI_CLIENT_SECRET=${AAI_CLIENT_SECRET}" - "AAI_CLIENT_ID=${AAI_CLIENT_ID}" - - "ISS_URL=${ISS_URL}" - "OIDC_URL=${OIDC_URL}" - - "AUTH_REFERER=${AUTH_REFERER}" - "BASE_URL=${BASE_URL}" # Enable this for working with front-end on localhost # - "REDIRECT_URL=${REDIRECT_URL}" diff --git a/docs/submitter.rst b/docs/submitter.rst index fe1eec0f5..8f9d93f9c 100644 --- a/docs/submitter.rst +++ b/docs/submitter.rst @@ -41,15 +41,12 @@ the table below. +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``AAI_CLIENT_ID`` | ``secret`` | OIDC client ID. | Yes | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``AUTH_REFERER`` | ``-`` | OIDC Provider url that redirects the request to the application. | Yes | -+--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``BASE_URL`` | ``http://localhost:5430`` | base URL of the metadata submitter. | Yes | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``ISS_URL`` | ``-`` | OIDC claim issuer URL. | Yes | -+--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``AUTH_METHOD` | ``code` | OIDC Authentication method to use. | No | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``OIDC_URL`` | ``-`` | OIDC URL that exposes /.well-known/openid-configuration values | Yes | +| ``OIDC_URL`` | ``-`` | OIDC URL base URL, MUST resolve to configuration endpoint when appended with | Yes | +| | | /.well-known/openid-configuration | | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``OIDC_SCOPE`` | ``openid profile email`` | Claims to request from AAI | No | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ @@ -117,7 +114,7 @@ The Authentication follows the `OIDC Specification /.well-known/openid-configuration``. +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``AAI_CLIENT_ID`` | ``secret`` | OIDC client ID. | Yes | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``AUTH_REFERER`` | ``-`` | OIDC Provider url that redirects the request to the application. | Yes | -+--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``BASE_URL`` | ``http://localhost:5430`` | base URL of the metadata submitter. | Yes | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``ISS_URL`` | ``-`` | OIDC claim issuer URL. | Yes | -+--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``AUTH_METHOD`` | ``code`` | OIDC Authentication method to use. | No | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``OIDC_URL`` | ``-`` | OIDC URL that exposes /.well-known/openid-configuration values. | Yes | +| ``OIDC_URL`` | ``-`` | OIDC URL base URL, MUST resolve to configuration endpoint when appended with | Yes | +| | | /.well-known/openid-configuration | | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``OIDC_SCOPE`` | ``openid profile email`` | Claims to request from AAI | No | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ diff --git a/metadata_backend/api/auth.py b/metadata_backend/api/auth.py index 4aeb594c5..32b9f6cb5 100644 --- a/metadata_backend/api/auth.py +++ b/metadata_backend/api/auth.py @@ -27,8 +27,8 @@ def __init__(self, aai: Dict) -> None: self.client_id = aai["client_id"] self.client_secret = aai["client_secret"] self.callback_url = aai["callback_url"] - self.oidc_url = aai["oidc_url"] - self.iss = aai["iss"] + self.oidc_url = aai["oidc_url"].rstrip("/") + "/.well-known/openid-configuration" + self.iss = aai["oidc_url"] self.scope = aai["scope"] self.auth_method = aai["auth_method"] diff --git a/metadata_backend/api/middlewares.py b/metadata_backend/api/middlewares.py index 6efc4f158..edab55bc5 100644 --- a/metadata_backend/api/middlewares.py +++ b/metadata_backend/api/middlewares.py @@ -112,7 +112,7 @@ async def check_login(request: Request, handler: Callable) -> StreamResponse: or (request.path.startswith("/") and request.path.endswith(tuple([".svg", ".jpg", ".ico", ".json"]))) ): return await handler(request) - if request.path.startswith(tuple(controlled_paths)) and "ISS_URL" in os.environ and bool(os.getenv("ISS_URL")): + if request.path.startswith(tuple(controlled_paths)) and "OIDC_URL" in os.environ and bool(os.getenv("OIDC_URL")): cookie = decrypt_cookie(request) session = request.app["Session"].setdefault(cookie["id"], {}) if not all(x in ["access_token", "user_info", "oidc_state"] for x in session): @@ -129,7 +129,7 @@ async def check_login(request: Request, handler: Callable) -> StreamResponse: raise web.HTTPUnauthorized(headers={"WWW-Authenticate": 'OAuth realm="/", charset="UTF-8"'}) return await handler(request) - elif "ISS_URL" in os.environ and bool(os.getenv("ISS_URL")): + elif "OIDC_URL" in os.environ and bool(os.getenv("OIDC_URL")): LOG.debug(f"not authorised to view this page {request.path}") raise web.HTTPUnauthorized(headers={"WWW-Authenticate": 'OAuth realm="/", charset="UTF-8"'}) else: @@ -199,7 +199,7 @@ def _check_csrf(request: web.Request) -> bool: if "redirect" in aai_config and request.headers["Referer"].startswith(aai_config["redirect"]): LOG.info("Skipping Referer check due to request coming from frontend.") return True - if "auth_referer" in aai_config and request.headers["Referer"].startswith(aai_config["auth_referer"]): + if "oidc_url" in aai_config and request.headers["Referer"].startswith(aai_config["oidc_url"]): LOG.info("Skipping Referer check due to request coming from OIDC.") return True if cookie["referer"] not in request.headers["Referer"]: diff --git a/metadata_backend/conf/conf.py b/metadata_backend/conf/conf.py index 7c4a67171..d5e00525d 100644 --- a/metadata_backend/conf/conf.py +++ b/metadata_backend/conf/conf.py @@ -145,10 +145,8 @@ def create_db_client() -> AsyncIOMotorClient: if bool(os.getenv("REDIRECT_URL")) else os.getenv("BASE_URL", "http://localhost:5430"), "scope": os.getenv("OIDC_SCOPE", "openid profile email"), - "iss": os.getenv("ISS_URL", ""), "callback_url": f'{os.getenv("BASE_URL", "http://localhost:5430").rstrip("/")}/callback', - "oidc_url": os.getenv("OIDC_URL", "http://auth.org/.well-known/openid-configuration"), - "auth_referer": f'{os.getenv("AUTH_REFERER", "")}', + "oidc_url": os.getenv("OIDC_URL", ""), "auth_method": os.getenv("AUTH_METHOD", "code"), } diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index 6f5bb3e9c..55e2a4b19 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -52,7 +52,7 @@ ("analysis", "ERZ266973.json", "ERZ266973.json"), ] base_url = os.getenv("BASE_URL", "http://localhost:5430") -mock_auth_url = os.getenv("ISS_URL", "http://localhost:8000") +mock_auth_url = os.getenv("OIDC_URL", "http://localhost:8000") objects_url = f"{base_url}/objects" drafts_url = f"{base_url}/drafts" templates_url = f"{base_url}/templates" From 88c9a5866f9b8ae5d8474f10a91ec026b686a10c Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Tue, 21 Dec 2021 14:53:19 +0200 Subject: [PATCH 143/336] fix callback url, used wrong conf value --- metadata_backend/api/auth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata_backend/api/auth.py b/metadata_backend/api/auth.py index 32b9f6cb5..124e77c4a 100644 --- a/metadata_backend/api/auth.py +++ b/metadata_backend/api/auth.py @@ -37,7 +37,7 @@ def __init__(self, aai: Dict) -> None: "issuer": self.iss, "client_id": self.client_id, "client_secret": self.client_secret, - "redirect_uris": [self.redirect], + "redirect_uris": [self.callback_url], "behaviour": { "response_types": self.auth_method.split(" "), "scope": self.scope.split(" "), From 48e08d55f2764da7a403e398bc7486bf6a148f36 Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Tue, 21 Dec 2021 15:10:23 +0200 Subject: [PATCH 144/336] state checking raises key error on unknown state key --- metadata_backend/api/auth.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/metadata_backend/api/auth.py b/metadata_backend/api/auth.py index 124e77c4a..b7fa4535b 100644 --- a/metadata_backend/api/auth.py +++ b/metadata_backend/api/auth.py @@ -84,7 +84,11 @@ async def callback(self, req: Request) -> Response: raise web.HTTPBadRequest(reason=reason) # Verify oidc_state and retrieve auth session - session = self.rph.get_session_information(params["state"]) + session = None + try: + session = self.rph.get_session_information(params["state"]) + except KeyError as e: + LOG.error(f"Session not initialised: {e}") if session is None: raise web.HTTPForbidden(reason="Bad user session.") From a7b8205086b813f754dec23283ee80f5e0f6aa1e Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Wed, 22 Dec 2021 11:07:26 +0200 Subject: [PATCH 145/336] finalize can raise an exception when requesting and validating tokens, userinfo --- metadata_backend/api/auth.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/metadata_backend/api/auth.py b/metadata_backend/api/auth.py index b7fa4535b..81fbc48d6 100644 --- a/metadata_backend/api/auth.py +++ b/metadata_backend/api/auth.py @@ -8,6 +8,7 @@ from .middlewares import decrypt_cookie, generate_cookie from .operators import UserOperator from oidcrp.rp_handler import RPHandler +from oidcrp.exception import OidcServiceError from typing import Dict, Tuple @@ -89,14 +90,17 @@ async def callback(self, req: Request) -> Response: session = self.rph.get_session_information(params["state"]) except KeyError as e: LOG.error(f"Session not initialised: {e}") - if session is None: raise web.HTTPForbidden(reason="Bad user session.") # Place authorization_code to session for finalize step session["auth_request"]["code"] = params["code"] # finalize requests id_token and access_token with code, validates them and requests userinfo data - session = self.rph.finalize(session["iss"], session["auth_request"]) + try: + session = self.rph.finalize(session["iss"], session["auth_request"]) + except OidcServiceError as e: + LOG.error(f"OIDC Callback failed with: {e}") + raise web.HTTPBadRequest(reason="Invalid OIDC callback.") response = web.HTTPSeeOther(f"{self.redirect}/home") From 40bce54b35f80603cdaa49f55079904937df4580 Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Wed, 22 Dec 2021 12:32:57 +0200 Subject: [PATCH 146/336] update auth.py tests, add more exceptions --- metadata_backend/api/auth.py | 20 ++++- tests/test_auth.py | 154 +++++++++++++---------------------- tests/test_middlewares.py | 2 +- 3 files changed, 77 insertions(+), 99 deletions(-) diff --git a/metadata_backend/api/auth.py b/metadata_backend/api/auth.py index 81fbc48d6..c01233337 100644 --- a/metadata_backend/api/auth.py +++ b/metadata_backend/api/auth.py @@ -56,7 +56,14 @@ async def login(self, req: Request) -> Response: LOG.debug("Start login") # Generate authentication payload - session = self.rph.begin("aai") + session = None + try: + session = self.rph.begin("aai") + except Exception as e: + # This can be caused if config is improperly configured, and + # oidcrp is unable to fetch oidc configuration from the given URL + LOG.error(f"OIDC authorization request failed: {e}") + raise web.HTTPInternalServerError(reason="OIDC authorization request failed.") # Redirect user to AAI response = web.HTTPSeeOther(session["url"]) @@ -89,6 +96,7 @@ async def callback(self, req: Request) -> Response: try: session = self.rph.get_session_information(params["state"]) except KeyError as e: + # This exception is raised if the RPHandler doesn't have the supplied "state" LOG.error(f"Session not initialised: {e}") raise web.HTTPForbidden(reason="Bad user session.") @@ -98,7 +106,14 @@ async def callback(self, req: Request) -> Response: # finalize requests id_token and access_token with code, validates them and requests userinfo data try: session = self.rph.finalize(session["iss"], session["auth_request"]) + except KeyError as e: + LOG.error(f"Issuer {session['iss']} not found: {e}.") + raise web.HTTPBadRequest(reason="Token issuer not found.") except OidcServiceError as e: + # This exception is raised if RPHandler encounters an error due to: + # 1. "code" is wrong, so token request failed + # 2. token validation failed + # 3. userinfo request failed LOG.error(f"OIDC Callback failed with: {e}") raise web.HTTPBadRequest(reason="Invalid OIDC callback.") @@ -145,6 +160,9 @@ async def callback(self, req: Request) -> Response: session["userinfo"]["sub"], f"{session['userinfo']['given_name']} {session['userinfo']['family_name']}", ) + else: + LOG.error("User was authenticated, but they are missing mandatory claim eppn or sub.") + raise web.HTTPBadRequest(reason="Could not set user, missing claim eppn or sub.") await self._set_user(req, session_id, user_data) # done like this otherwise it will not redirect properly diff --git a/tests/test_auth.py b/tests/test_auth.py index a17f03c75..46cf2003e 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -1,5 +1,5 @@ """Test API auth endpoints.""" -from aiohttp.web_exceptions import HTTPForbidden, HTTPUnauthorized, HTTPBadRequest +from aiohttp.web_exceptions import HTTPForbidden, HTTPInternalServerError, HTTPSeeOther, HTTPBadRequest from metadata_backend.api.auth import AccessHandler from unittest.mock import MagicMock, patch from aiohttp.test_utils import AioHTTPTestCase @@ -7,16 +7,9 @@ from metadata_backend.server import init from .mockups import ( - Mock_Request, - MockResponse, get_request_with_fernet, - jwt_data, - jwk_data, - jwt_data_claim_miss, - jwt_data_bad_nonce, ) from unittest import IsolatedAsyncioTestCase -import ujson class AccessHandlerFailTestCase(AioHTTPTestCase): @@ -97,52 +90,16 @@ def setUp(self): "domain": "http://domain.com:5430", "redirect": "http://domain.com:5430", "scope": "openid profile email", - "iss": "http://iss.domain.com:5430", "callback_url": "http://domain.com:5430/callback", - "auth_url": "http://auth.domain.com:5430/authorize", - "token_url": "http://auth.domain.com:5430/token", - "user_info": "http://auth.domain.com:5430/userinfo", - "revoke_url": "http://auth.domain.com:5430/revoke", - "jwk_server": "http://auth.domain.com:5430/jwk", - "auth_referer": "http://auth.domain.com:5430", + "oidc_url": "http://auth.domain.com:5430", + "auth_method": "code", } self.AccessHandler = AccessHandler(access_config) - self.AccessHandler.nonce = "nonce" def tearDown(self): """Cleanup mocked stuff.""" pass - async def test_get_jwk_fail(self): - """Test retrieving JWK exception.""" - with patch("aiohttp.ClientSession.get", side_effect=HTTPUnauthorized): - with self.assertRaises(HTTPUnauthorized): - await self.AccessHandler._get_key() - - async def test_jwk_key(self): - """Test get jwk key.""" - data = { - "kty": "oct", - "kid": "018c0ae5-4d9b-471b-bfd6-eef314bc7037", - "use": "sig", - "alg": "HS256", - "k": "hJtXIZ2uSN5kbQfbtTNWbpdmhkV8FJG-Onbc6mxCcYg", - } - resp = MockResponse(ujson.dumps(data), 200) - - with patch("aiohttp.ClientSession.get", return_value=resp): - result = await self.AccessHandler._get_key() - self.assertEqual(result, ujson.dumps(data)) - - async def test_set_user_fail(self): - """Test set user raises exception.""" - request = Mock_Request() - tk = ("something",) - session_id = "session_id" - with patch("aiohttp.ClientSession.get", side_effect=HTTPUnauthorized): - with self.assertRaises(HTTPBadRequest): - await self.AccessHandler._set_user(request, session_id, tk) - async def test_set_user(self): """Test set user success.""" request = get_request_with_fernet() @@ -151,53 +108,43 @@ async def test_set_user(self): request.app["db_client"] = MagicMock() request.app["Session"] = {session_id: {}} - tk = "something" - data = { + user_data = { "eppn": "eppn@test.fi", "given_name": "User", "family_name": "Test", } - resp = MockResponse(data, 200) - with patch("aiohttp.ClientSession.get", return_value=resp): - with patch("metadata_backend.api.operators.UserOperator.create_user", return_value=new_user_id): - await self.AccessHandler._set_user(request, session_id, tk) + with patch("metadata_backend.api.operators.UserOperator.create_user", return_value=new_user_id): + await self.AccessHandler._set_user(request, session_id, user_data) self.assertIn("user_info", request.app["Session"][session_id]) self.assertEqual(new_user_id, request.app["Session"][session_id]["user_info"]) - async def test_callback_fail(self): - """Test callback fails.""" + async def test_login_fail(self): + """Test login fails due to bad OIDCRP config.""" + # OIDCRP init fails, because AAI config endpoint request fails request = get_request_with_fernet() - request.query["state"] = "state" - request.query["code"] = "code" - request.app["Session"] = {} - request.app["OIDC_State"] = set(("state",)) - resp_no_token = MockResponse({}, 200) - resp_400 = MockResponse({}, 400) + with self.assertRaises(HTTPInternalServerError): + await self.AccessHandler.login(request) - with patch("aiohttp.ClientSession.post", return_value=resp_no_token): - with self.assertRaises(HTTPBadRequest): - await self.AccessHandler.callback(request) - - with patch("aiohttp.ClientSession.post", return_value=resp_400): - with self.assertRaises(HTTPBadRequest): - await self.AccessHandler.callback(request) + async def test_login_pass(self): + """Test login redirects user.""" + response = {"url": "some url"} + request = get_request_with_fernet() + with patch("oidcrp.rp_handler.RPHandler.begin", return_value=response): + with self.assertRaises(HTTPSeeOther): + await self.AccessHandler.login(request) async def test_callback_pass(self): """Test callback correct validation.""" request = get_request_with_fernet() request.query["state"] = "state" request.query["code"] = "code" - request.app["Session"] = {} - request.app["Cookies"] = set({}) - request.app["OIDC_State"] = set(("state",)) - resp_token = MockResponse(jwt_data, 200) - resp_jwk = MockResponse(jwk_data, 200) - - with patch("aiohttp.ClientSession.post", return_value=resp_token): - with patch("aiohttp.ClientSession.get", return_value=resp_jwk): + session = {"iss": "http://auth.domain.com:5430", "auth_request": {}} + finalize = {"token": "token", "userinfo": {"eppn": "eppn", "given_name": "name", "family_name": "name"}} + with patch("oidcrp.rp_handler.RPHandler.get_session_information", return_value=session): + with patch("oidcrp.rp_handler.RPHandler.finalize", return_value=finalize): with patch("metadata_backend.api.auth.AccessHandler._set_user", return_value=None): await self.AccessHandler.callback(request) @@ -206,33 +153,46 @@ async def test_callback_missing_claim(self): request = get_request_with_fernet() request.query["state"] = "state" request.query["code"] = "code" - request.app["Session"] = {} - request.app["Cookies"] = set({}) - request.app["OIDC_State"] = set(("state",)) - resp_token = MockResponse(jwt_data_claim_miss, 200) - resp_jwk = MockResponse(jwk_data, 200) + session = {"iss": "http://auth.domain.com:5430", "auth_request": {}} + finalize = {"token": "token", "userinfo": {}} + with patch("oidcrp.rp_handler.RPHandler.get_session_information", return_value=session): + with patch("oidcrp.rp_handler.RPHandler.finalize", return_value=finalize): + with self.assertRaises(HTTPBadRequest): + await self.AccessHandler.callback(request) - with patch("aiohttp.ClientSession.post", return_value=resp_token): - with patch("aiohttp.ClientSession.get", return_value=resp_jwk): - with patch("metadata_backend.api.auth.AccessHandler._set_user", return_value=None): - with self.assertRaises(HTTPUnauthorized): - await self.AccessHandler.callback(request) + async def test_callback_fail_finalize(self): + """Test callback fail finalize.""" + request = get_request_with_fernet() + request.query["state"] = "state" + request.query["code"] = "code" - async def test_callback_bad_claim(self): - """Test callback bad nonce validation.""" + session = {"iss": "http://auth.domain.com:5430", "auth_request": {}} + with patch("oidcrp.rp_handler.RPHandler.get_session_information", return_value=session): + with self.assertRaises(HTTPBadRequest): + await self.AccessHandler.callback(request) + + async def test_callback_bad_state(self): + """Test callback bad state validation.""" request = get_request_with_fernet() request.query["state"] = "state" request.query["code"] = "code" - request.app["OIDC_State"] = set() - request.app["Session"] = {} - request.app["Cookies"] = set({}) - resp_token = MockResponse(jwt_data_bad_nonce, 200) - resp_jwk = MockResponse(jwk_data, 200) + with self.assertRaises(HTTPForbidden): + await self.AccessHandler.callback(request) - with patch("aiohttp.ClientSession.post", return_value=resp_token): - with patch("aiohttp.ClientSession.get", return_value=resp_jwk): - with patch("metadata_backend.api.auth.AccessHandler._set_user", return_value=None): - with self.assertRaises(HTTPForbidden): - await self.AccessHandler.callback(request) + async def test_callback_missing_state(self): + """Test callback bad state validation.""" + request = get_request_with_fernet() + request.query["code"] = "code" + + with self.assertRaises(HTTPBadRequest): + await self.AccessHandler.callback(request) + + async def test_callback_missing_code(self): + """Test callback bad state validation.""" + request = get_request_with_fernet() + request.query["state"] = "state" + + with self.assertRaises(HTTPBadRequest): + await self.AccessHandler.callback(request) diff --git a/tests/test_middlewares.py b/tests/test_middlewares.py index e54ea8bf3..25c9542f7 100644 --- a/tests/test_middlewares.py +++ b/tests/test_middlewares.py @@ -97,7 +97,7 @@ def test_check_csrf_idp_skip(self): """Test check_csrf when skipping referer from auth endpoint.""" with unittest.mock.patch( "metadata_backend.api.middlewares.aai_config", - new={"auth_referer": "http://idp:3000"}, + new={"oidc_url": "http://idp:3000"}, ): testreq = get_request_with_fernet() cookie, _ = generate_cookie(testreq) From 087e601b443409822322b420e1fb100451508e6f Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Wed, 22 Dec 2021 12:33:14 +0200 Subject: [PATCH 147/336] remove deprecated env --- docker-compose-tls.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/docker-compose-tls.yml b/docker-compose-tls.yml index 0ce8d5c97..8deaddbeb 100644 --- a/docker-compose-tls.yml +++ b/docker-compose-tls.yml @@ -23,7 +23,6 @@ services: - "MONGO_SSL_CLIENT_CERT=${MONGO_SSL_CLIENT_CERT}" - "AAI_CLIENT_SECRET=${AAI_CLIENT_SECRET}" - "AAI_CLIENT_ID=${AAI_CLIENT_ID}" - - "ISS_URL=${ISS_URL}" - "OIDC_URL=${OIDC_URL}" - "BASE_URL=${BASE_URL}" # Enable this for working with front-end on localhost From 97b728433e05d05097dfbc6f8bb0f179ee15ba34 Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Wed, 22 Dec 2021 14:24:36 +0200 Subject: [PATCH 148/336] fix bug in docs --- docs/submitter.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/submitter.rst b/docs/submitter.rst index 8f9d93f9c..c646d8f7c 100644 --- a/docs/submitter.rst +++ b/docs/submitter.rst @@ -43,7 +43,7 @@ the table below. +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``BASE_URL`` | ``http://localhost:5430`` | base URL of the metadata submitter. | Yes | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``AUTH_METHOD` | ``code` | OIDC Authentication method to use. | No | +| ``AUTH_METHOD`` | ``code`` | OIDC Authentication method to use. | No | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``OIDC_URL`` | ``-`` | OIDC URL base URL, MUST resolve to configuration endpoint when appended with | Yes | | | | /.well-known/openid-configuration | | From 5c869e5cfe308eb619c08f6b17ed6eef3301a65f Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Wed, 22 Dec 2021 16:10:04 +0200 Subject: [PATCH 149/336] remove deprecated mockups, update deprecated unit tests --- metadata_backend/api/auth.py | 1 + tests/mockups.py | 132 ----------------------------------- tests/test_auth.py | 31 +++----- 3 files changed, 12 insertions(+), 152 deletions(-) diff --git a/metadata_backend/api/auth.py b/metadata_backend/api/auth.py index c01233337..29dd2fcee 100644 --- a/metadata_backend/api/auth.py +++ b/metadata_backend/api/auth.py @@ -52,6 +52,7 @@ async def login(self, req: Request) -> Response: :param req: A HTTP request instance (unused) :raises: HTTPSeeOther redirect to login AAI + :raises: HTTPInternalServerError if OIDC configuration init failed """ LOG.debug("Start login") diff --git a/tests/mockups.py b/tests/mockups.py index 905a4653d..baad36809 100644 --- a/tests/mockups.py +++ b/tests/mockups.py @@ -6,36 +6,6 @@ import ujson import cryptography.fernet -from cryptography.hazmat.primitives import serialization -from cryptography.hazmat.primitives.asymmetric import rsa -from cryptography.hazmat.backends import default_backend -from authlib.jose import jwt, jwk -from typing import Tuple - - -class MockResponse: - """Mock-up class for HTTP response.""" - - def __init__(self, text, status): - """Initialize Mock Response.""" - self._text = text - self.status = status - - async def text(self): - """Get Mock Response body.""" - return self._text - - async def json(self): - """Get Mock Response body.""" - return self._text - - async def __aexit__(self, exc_type, exc, tb): - """Return async exit.""" - pass - - async def __aenter__(self): - """Return async enter.""" - return self class Mock_Request: @@ -114,105 +84,3 @@ def encrypt_cookie(cookie, req): """Add encrypted cookie to request.""" cookie_crypted = req.app["Crypt"].encrypt(ujson.dumps(cookie).encode("utf-8")).decode("utf-8") req.cookies["MTD_SESSION"] = cookie_crypted - - -def generate_token() -> Tuple: - """Generate RSA Key pair to be used to sign token and the JWT Token itself.""" - private_key = rsa.generate_private_key(public_exponent=65537, key_size=2048, backend=default_backend()) - public_key = private_key.public_key().public_bytes( - encoding=serialization.Encoding.PEM, format=serialization.PublicFormat.SubjectPublicKeyInfo - ) - pem = private_key.private_bytes( - encoding=serialization.Encoding.PEM, - format=serialization.PrivateFormat.TraditionalOpenSSL, - encryption_algorithm=serialization.NoEncryption(), - ) - # we set no `exp` and other claims as they are optional in a real scenario these should bde set - # See available claims here: https://www.iana.org/assignments/jwt/jwt.xhtml - # the important claim is the "authorities" - public_jwk = jwk.dumps(public_key, kty="RSA") - private_jwk = jwk.dumps(pem, kty="RSA") - - return (public_jwk, private_jwk) - - -jwk_pair = generate_token() - -keys = [jwk_pair[0]] -keys[0]["kid"] = "rsa1" -jwk_data = {"keys": keys} -header = {"jku": "http://mockauth:8000/jwk", "kid": "rsa1", "alg": "RS256", "typ": "JWT"} -id_token = { - "at_hash": "fSi3VUa5i2o2SgY5gPJZgg", - "sub": "smth", - "eduPersonAffiliation": "member;staff", - "eppn": "eppn@test.fi", - "displayName": "test user", - "iss": "http://iss.domain.com:5430", - "schacHomeOrganizationType": "urn:schac:homeOrganizationType:test:other", - "given_name": "user", - "nonce": "nonce", - "aud": "aud2", - "acr": "http://iss.domain.com:5430/LoginHaka", - "nsAccountLock": "false", - "eduPersonScopedAffiliation": "staff@test.what;member@test.what", - "auth_time": 1606579533, - "name": "test user", - "schacHomeOrganization": "test.what", - "exp": 9999999999, - "iat": 1561621913, - "family_name": "test", - "email": "eppn@test.fi", -} -id_token_no_sub = { - "at_hash": "fSi3VUa5i2o2SgY5gPJZgg", - "eduPersonAffiliation": "member;staff", - "eppn": "eppn@test.fi", - "displayName": "test user", - "iss": "http://iss.domain.com:5430", - "schacHomeOrganizationType": "urn:schac:homeOrganizationType:test:other", - "given_name": "user", - "nonce": "nonce", - "aud": "aud2", - "acr": "http://iss.domain.com:5430/LoginHaka", - "nsAccountLock": "false", - "eduPersonScopedAffiliation": "staff@test.what;member@test.what", - "auth_time": 1606579533, - "name": "test user", - "schacHomeOrganization": "test.what", - "exp": 9999999999, - "iat": 1561621913, - "family_name": "test", - "email": "eppn@test.fi", -} -id_token_bad_nonce = { - "at_hash": "fSi3VUa5i2o2SgY5gPJZgg", - "eduPersonAffiliation": "member;staff", - "eppn": "eppn@test.fi", - "sub": "smth", - "displayName": "test user", - "iss": "http://iss.domain.com:5430", - "schacHomeOrganizationType": "urn:schac:homeOrganizationType:test:other", - "given_name": "user", - "nonce": "", - "aud": "aud2", - "acr": "http://iss.domain.com:5430/LoginHaka", - "nsAccountLock": "false", - "eduPersonScopedAffiliation": "staff@test.what;member@test.what", - "auth_time": 1606579533, - "name": "test user", - "schacHomeOrganization": "test.what", - "exp": 9999999999, - "iat": 1561621913, - "family_name": "test", - "email": "eppn@test.fi", -} -jwt_data = {"access_token": "test", "id_token": jwt.encode(header, id_token, jwk_pair[1]).decode("utf-8")} -jwt_data_claim_miss = { - "access_token": "test", - "id_token": jwt.encode(header, id_token_no_sub, jwk_pair[1]).decode("utf-8"), -} -jwt_data_bad_nonce = { - "access_token": "test", - "id_token": jwt.encode(header, id_token_bad_nonce, jwk_pair[1]).decode("utf-8"), -} diff --git a/tests/test_auth.py b/tests/test_auth.py index 46cf2003e..774085c63 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -36,14 +36,12 @@ async def tearDownAsync(self): await self.client.close() async def test_login_with_default_config_values(self): - """Test that login raises 404 when the AUTH_URL env variable is not a proper endpoint.""" - self.client.app["OIDC_State"] = set() - response = await self.client.get("/aai") - self.assertEqual(response.status, 404) - resp_json = await response.json() - self.assertEqual(resp_json["instance"], "/authorize") - # Also check that we have regisitered oidc state - self.assertEqual(1, len(self.client.app["OIDC_State"])) + """Test that login raises 500 when OIDC is improperly configured.""" + with patch("oidcrp.rp_handler.RPHandler.begin", side_effect=Exception): + response = await self.client.get("/aai") + self.assertEqual(response.status, 500) + resp_json = await response.json() + self.assertEqual("OIDC authorization request failed.", resp_json["details"]) async def test_callback_fails_without_query_params(self): """Test that callback endpoint raises 400 if no params provided in the request.""" @@ -54,18 +52,11 @@ async def test_callback_fails_without_query_params(self): async def test_callback_fails_with_wrong_oidc_state(self): """Test that callback endpoint raises 403 when state in the query is not the same as specified in session.""" - self.client.app["Session"] = {} - self.client.app["OIDC_State"] = set() - response = await self.client.get("/callback?state=wrong_value&code=code") - self.assertEqual(response.status, 403) - resp_json = await response.json() - self.assertEqual(resp_json["detail"], "Bad user session.") - - async def test_callback_(self): - """Test that callback.""" - self.client.app["OIDC_State"] = set(("mo_state_value",)) - response = await self.client.get("/callback?state=mo_state_value&code=code") - self.assertIn(response.status, (403, 500)) + with patch("oidcrp.rp_handler.RPHandler.get_session_information", side_effect=KeyError): + response = await self.client.get("/callback?state=wrong_value&code=code") + self.assertEqual(response.status, 403) + resp_json = await response.json() + self.assertEqual(resp_json["detail"], "Bad user session.") async def test_logout_works(self): """Test that logout revokes all tokens.""" From 2d7f618d86c926bb34cb21a3675e1f40fc203f32 Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Thu, 23 Dec 2021 09:09:01 +0200 Subject: [PATCH 150/336] make http 500 middleware behave similarly to other errors --- metadata_backend/api/middlewares.py | 2 +- tests/test_auth.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata_backend/api/middlewares.py b/metadata_backend/api/middlewares.py index edab55bc5..69ce65773 100644 --- a/metadata_backend/api/middlewares.py +++ b/metadata_backend/api/middlewares.py @@ -68,7 +68,7 @@ async def http_error_handler(req: Request, handler: Callable) -> Response: raise web.HTTPUnprocessableEntity(text=details, content_type=c_type) else: _check_error_page_requested(req, 500) - raise web.HTTPServerError() + raise web.HTTPInternalServerError(text=details, content_type=c_type) @middleware diff --git a/tests/test_auth.py b/tests/test_auth.py index 774085c63..80c640a38 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -41,7 +41,7 @@ async def test_login_with_default_config_values(self): response = await self.client.get("/aai") self.assertEqual(response.status, 500) resp_json = await response.json() - self.assertEqual("OIDC authorization request failed.", resp_json["details"]) + self.assertEqual("OIDC authorization request failed.", resp_json["detail"]) async def test_callback_fails_without_query_params(self): """Test that callback endpoint raises 400 if no params provided in the request.""" From b14565e7498970280cd7e0ceac5b74a83dd3a7a7 Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Wed, 5 Jan 2022 12:19:17 +0200 Subject: [PATCH 151/336] update mandatory userinfo keys --- metadata_backend/api/auth.py | 13 ++++++++++--- tests/test_auth.py | 4 ++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/metadata_backend/api/auth.py b/metadata_backend/api/auth.py index 29dd2fcee..cbf5cbb0d 100644 --- a/metadata_backend/api/auth.py +++ b/metadata_backend/api/auth.py @@ -151,9 +151,14 @@ async def callback(self, req: Request) -> Response: req.app["Cookies"].add(session_id) user_data: Tuple[str, str] - if "eppn" in session["userinfo"]: + if "CSCUserName" in session["userinfo"]: user_data = ( - session["userinfo"]["eppn"], + session["userinfo"]["CSCUserName"], + f"{session['userinfo']['given_name']} {session['userinfo']['family_name']}", + ) + if "remoteUserIdentifier" in session["userinfo"]: + user_data = ( + session["userinfo"]["remoteUserIdentifier"], f"{session['userinfo']['given_name']} {session['userinfo']['family_name']}", ) elif "sub" in session["userinfo"]: @@ -162,7 +167,9 @@ async def callback(self, req: Request) -> Response: f"{session['userinfo']['given_name']} {session['userinfo']['family_name']}", ) else: - LOG.error("User was authenticated, but they are missing mandatory claim eppn or sub.") + LOG.error( + "User was authenticated, but they are missing mandatory claim CSCUserName, remoteUserIdentifier or sub." + ) raise web.HTTPBadRequest(reason="Could not set user, missing claim eppn or sub.") await self._set_user(req, session_id, user_data) diff --git a/tests/test_auth.py b/tests/test_auth.py index 80c640a38..2ba1e846f 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -100,7 +100,7 @@ async def test_set_user(self): request.app["db_client"] = MagicMock() request.app["Session"] = {session_id: {}} user_data = { - "eppn": "eppn@test.fi", + "sub": "user@test.fi", "given_name": "User", "family_name": "Test", } @@ -133,7 +133,7 @@ async def test_callback_pass(self): request.query["code"] = "code" session = {"iss": "http://auth.domain.com:5430", "auth_request": {}} - finalize = {"token": "token", "userinfo": {"eppn": "eppn", "given_name": "name", "family_name": "name"}} + finalize = {"token": "token", "userinfo": {"sub": "user", "given_name": "name", "family_name": "name"}} with patch("oidcrp.rp_handler.RPHandler.get_session_information", return_value=session): with patch("oidcrp.rp_handler.RPHandler.finalize", return_value=finalize): with patch("metadata_backend.api.auth.AccessHandler._set_user", return_value=None): From d91bbad6318d20379b27d85c2cae73bbb49dea54 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 10 Jan 2022 09:05:46 +0000 Subject: [PATCH 152/336] Bump tox from 3.24.4 to 3.24.5 Bumps [tox](https://github.com/tox-dev/tox) from 3.24.4 to 3.24.5. - [Release notes](https://github.com/tox-dev/tox/releases) - [Changelog](https://github.com/tox-dev/tox/blob/master/docs/changelog.rst) - [Commits](https://github.com/tox-dev/tox/compare/3.24.4...3.24.5) --- updated-dependencies: - dependency-name: tox dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index b1c7301bb..f352430c2 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -72,7 +72,7 @@ tomli==1.2.2 # via # black # pep517 -tox==3.24.4 +tox==3.24.5 # via -r requirements-dev.in typing-extensions==4.0.0 # via black From 4690ae046378ee25cc80b6471970187eb7a39c9a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 10 Jan 2022 09:06:46 +0000 Subject: [PATCH 153/336] Bump requests from 2.26.0 to 2.27.1 Bumps [requests](https://github.com/psf/requests) from 2.26.0 to 2.27.1. - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.26.0...v2.27.1) --- updated-dependencies: - dependency-name: requests dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 2f295d731..779d1a4fc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -56,7 +56,7 @@ pyrsistent==0.18.0 # via jsonschema python-dateutil==2.8.2 # via -r requirements.in -requests==2.26.0 +requests==2.27.1 # via -r requirements.in six==1.16.0 # via python-dateutil From 1f98db976d54d264ab1a98ad651d5989cd11889a Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Tue, 11 Jan 2022 10:37:32 +0200 Subject: [PATCH 154/336] update integration tests --- Dockerfile-dev | 1 + docs/submitter.rst | 2 +- metadata_backend/api/auth.py | 4 +- tests/integration/mock_auth.py | 132 +++++++++++++++++++++++++++++---- tests/integration/run_tests.py | 7 +- 5 files changed, 125 insertions(+), 21 deletions(-) diff --git a/Dockerfile-dev b/Dockerfile-dev index fefef7fb5..748a2a0f6 100644 --- a/Dockerfile-dev +++ b/Dockerfile-dev @@ -13,6 +13,7 @@ COPY requirements.txt . COPY metadata_backend/ ./metadata_backend RUN pip install . +RUN pip install authlib==0.15.5 # required for mockauth (integration test) EXPOSE 5430 diff --git a/docs/submitter.rst b/docs/submitter.rst index c646d8f7c..566b16792 100644 --- a/docs/submitter.rst +++ b/docs/submitter.rst @@ -118,7 +118,7 @@ We follow the steps of the OpenID Connect protocol. - The OP authenticates the End-User and obtains authorization. - The OP responds with an ID Token and usually an Access Token, which are validated with configuration provided by ``OIDC_URL``. - The RP can send a request with the Access Token to the UserInfo Endpoint. -- The UserInfo Endpoint returns Claims about the End-User, use use some claims ``sub`` and ``eppn`` to identify the user and start a session. +- The UserInfo Endpoint returns Claims about the End-User, use claims ``sub``, ``CSCUserName`` or ``remoteUserIdentifier`` to identify the user and start a session. Information related to the OpenID Provider (OP) that needs to be configured is displayed in the table below. Most of the information can be retrieved from `OIDC Provider `_ metadata diff --git a/metadata_backend/api/auth.py b/metadata_backend/api/auth.py index cbf5cbb0d..5879545d8 100644 --- a/metadata_backend/api/auth.py +++ b/metadata_backend/api/auth.py @@ -170,7 +170,9 @@ async def callback(self, req: Request) -> Response: LOG.error( "User was authenticated, but they are missing mandatory claim CSCUserName, remoteUserIdentifier or sub." ) - raise web.HTTPBadRequest(reason="Could not set user, missing claim eppn or sub.") + raise web.HTTPBadRequest( + reason="Could not set user, missing claim CSCUserName, remoteUserIdentifier or sub." + ) await self._set_user(req, session_id, user_data) # done like this otherwise it will not redirect properly diff --git a/tests/integration/mock_auth.py b/tests/integration/mock_auth.py index 1acb9897f..9ba42cba3 100644 --- a/tests/integration/mock_auth.py +++ b/tests/integration/mock_auth.py @@ -1,5 +1,6 @@ """Mock OAUTH2 aiohttp.web server.""" +from time import time from aiohttp import web from cryptography.hazmat.primitives import serialization from cryptography.hazmat.primitives.asymmetric import rsa @@ -30,10 +31,15 @@ def generate_token() -> Tuple: return (public_jwk, private_jwk) +# oidcrp is strict about iat, exp, ttl, so we can't hard code them +iat = int(time()) +ttl = 3600 +exp = iat + ttl + nonce = "" jwk_pair = generate_token() -user_eppn = "" +user_sub = "" user_given_name = "" user_family_name = "" @@ -42,12 +48,12 @@ def generate_token() -> Tuple: async def setmock(req: web.Request) -> web.Response: """Auth endpoint.""" - global user_eppn, user_family_name, user_given_name - user_eppn = req.query["eppn"] + global user_sub, user_family_name, user_given_name + user_sub = req.query["sub"] user_family_name = req.query["family"] user_given_name = req.query["given"] - logging.info(user_eppn, user_family_name, user_given_name) + logging.info(user_sub, user_family_name, user_given_name) return web.HTTPOk() @@ -71,12 +77,12 @@ async def auth(req: web.Request) -> web.Response: async def token(req: web.Request) -> web.Response: """Auth endpoint.""" - global nonce, user_eppn, user_family_name, user_given_name + global nonce, user_sub, user_family_name, user_given_name id_token = { "at_hash": "fSi3VUa5i2o2SgY5gPJZgg", "sub": "smth", "eduPersonAffiliation": "member;staff", - "eppn": user_eppn, + "sub": user_sub, "displayName": f"{user_given_name} {user_family_name}", "iss": "http://mockauth:8000", "schacHomeOrganizationType": "urn:schac:homeOrganizationType:test:other", @@ -86,15 +92,20 @@ async def token(req: web.Request) -> web.Response: "acr": "http://mockauth:8000/LoginHaka", "nsAccountLock": "false", "eduPersonScopedAffiliation": "staff@test.what;member@test.what", - "auth_time": 1606579533, + "auth_time": iat, "name": f"{user_given_name} {user_family_name}", "schacHomeOrganization": "test.what", - "exp": 9999999999, - "iat": 1561621913, + "exp": exp, + "iat": iat, "family_name": user_family_name, - "email": user_eppn, + "email": user_sub, + } + data = { + "access_token": "test", + "id_token": jwt.encode(header, id_token, jwk_pair[1]).decode("utf-8"), + "token_type": "Bearer", + "expires_in": ttl, } - data = {"access_token": "test", "id_token": jwt.encode(header, id_token, jwk_pair[1]).decode("utf-8")} logging.info(data) @@ -114,21 +125,20 @@ async def jwk_response(request: web.Request) -> web.Response: async def userinfo(request: web.Request) -> web.Response: """Mock an authentication to ELIXIR AAI for GA4GH claims.""" - global nonce, user_eppn, user_family_name, user_given_name + global nonce, user_sub, user_family_name, user_given_name user_info = { - "sub": "smth", "eduPersonAffiliation": "member;staff", - "eppn": user_eppn, + "sub": user_sub, "displayName": f"{user_given_name} {user_family_name}", "schacHomeOrganizationType": "urn:schac:homeOrganizationType:test:other", "given_name": user_given_name, - "uid": user_eppn, + "uid": user_sub, "nsAccountLock": "false", "eduPersonScopedAffiliation": "staff@test.what;member@test.what", "name": f"{user_given_name} {user_family_name}", "schacHomeOrganization": "test.what", "family_name": user_family_name, - "email": user_eppn, + "email": user_sub, } logging.info(user_info) @@ -136,6 +146,95 @@ async def userinfo(request: web.Request) -> web.Response: return web.json_response(user_info) +async def oidc_config(request: web.Request) -> web.Response: + """Return standard OIDC configuration.""" + oidc_config_json = { + "issuer": "http://mockauth:8000", + "authorization_endpoint": "http://localhost:8000/authorize", # must be localhost to be accessible outside of docker-network + "token_endpoint": "http://mockauth:8000/token", + "userinfo_endpoint": "http://mockauth:8000/userinfo", + "jwks_uri": "http://mockauth:8000/keyset", + "response_types_supported": [ + "code", + "id_token", + "token id_token", + "code id_token", + "code token", + "code token id_token", + ], + "subject_types_supported": ["public", "pairwise"], + "grant_types_supported": [ + "authorization_code", + "implicit", + "refresh_token", + "urn:ietf:params:oauth:grant-type:device_code", + ], + "id_token_encryption_alg_values_supported": [ + "RSA1_5", + "RSA-OAEP", + "RSA-OAEP-256", + "A128KW", + "A192KW", + "A256KW", + "A128GCMKW", + "A192GCMKW", + "A256GCMKW", + ], + "id_token_encryption_enc_values_supported": ["A128CBC-HS256"], + "id_token_signing_alg_values_supported": ["RS256", "RS384", "RS512", "HS256", "HS384", "HS512", "ES256"], + "userinfo_encryption_alg_values_supported": [ + "RSA1_5", + "RSA-OAEP", + "RSA-OAEP-256", + "A128KW", + "A192KW", + "A256KW", + "A128GCMKW", + "A192GCMKW", + "A256GCMKW", + ], + "userinfo_encryption_enc_values_supported": ["A128CBC-HS256"], + "userinfo_signing_alg_values_supported": ["RS256", "RS384", "RS512", "HS256", "HS384", "HS512", "ES256"], + "request_object_signing_alg_values_supported": [ + "none", + "RS256", + "RS384", + "RS512", + "HS256", + "HS384", + "HS512", + "ES256", + "ES384", + "ES512", + ], + "token_endpoint_auth_methods_supported": [ + "client_secret_basic", + "client_secret_post", + "client_secret_jwt", + "private_key_jwt", + ], + "claims_parameter_supported": True, + "request_parameter_supported": True, + "request_uri_parameter_supported": False, + "require_request_uri_registration": False, + "display_values_supported": ["page"], + "scopes_supported": ["openid"], + "response_modes_supported": ["query", "fragment", "form_post"], + "claims_supported": [ + "aud", + "iss", + "sub", + "iat", + "exp", + "acr", + "auth_time", + "ga4gh_passport_v1", + "remoteUserIdentifier", + ], + } + return web.json_response(oidc_config_json) + + def init() -> web.Application: """Start server.""" app = web.Application() @@ -144,6 +243,7 @@ def init() -> web.Application: app.router.add_post("/token", token) app.router.add_get("/keyset", jwk_response) app.router.add_get("/userinfo", userinfo) + app.router.add_get("/.well-known/openid-configuration", oidc_config) return app diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index 55e2a4b19..d6a40453c 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -52,7 +52,8 @@ ("analysis", "ERZ266973.json", "ERZ266973.json"), ] base_url = os.getenv("BASE_URL", "http://localhost:5430") -mock_auth_url = os.getenv("OIDC_URL", "http://localhost:8000") +# mock_auth_url = os.getenv("OIDC_URL", "http://localhost:8000") +mock_auth_url = "http://localhost:8000" # must be localhost, accessible from outside of docker-network objects_url = f"{base_url}/objects" drafts_url = f"{base_url}/drafts" templates_url = f"{base_url}/templates" @@ -77,10 +78,10 @@ # === Helper functions === -async def login(sess, eppn, given, family): +async def login(sess, sub, given, family): """Mock login.""" params = { - "eppn": eppn, + "sub": sub, "family": family, "given": given, } From e07b64141a28d4c60a0ef78131940ecd4a971899 Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Tue, 11 Jan 2022 10:43:04 +0200 Subject: [PATCH 155/336] fix flake8 and spellcheck --- .wordlist.txt | 2 ++ tests/integration/mock_auth.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.wordlist.txt b/.wordlist.txt index 69e52ff6f..63d834985 100644 --- a/.wordlist.txt +++ b/.wordlist.txt @@ -24,6 +24,7 @@ createFromXML createNewDraftFolder createSlice CSC +CSCUserName CSCfi cscfi css @@ -115,6 +116,7 @@ readthedocs redux REFERER reqs +remoteUserIdentifier rootReducer rst schemas diff --git a/tests/integration/mock_auth.py b/tests/integration/mock_auth.py index 9ba42cba3..5114893d9 100644 --- a/tests/integration/mock_auth.py +++ b/tests/integration/mock_auth.py @@ -80,7 +80,6 @@ async def token(req: web.Request) -> web.Response: global nonce, user_sub, user_family_name, user_given_name id_token = { "at_hash": "fSi3VUa5i2o2SgY5gPJZgg", - "sub": "smth", "eduPersonAffiliation": "member;staff", "sub": user_sub, "displayName": f"{user_given_name} {user_family_name}", @@ -150,7 +149,8 @@ async def oidc_config(request: web.Request) -> web.Response: """Return standard OIDC configuration.""" oidc_config_json = { "issuer": "http://mockauth:8000", - "authorization_endpoint": "http://localhost:8000/authorize", # must be localhost to be accessible outside of docker-network + # must be localhost to be accessible outside of docker-network + "authorization_endpoint": "http://localhost:8000/authorize", "token_endpoint": "http://mockauth:8000/token", "userinfo_endpoint": "http://mockauth:8000/userinfo", "jwks_uri": "http://mockauth:8000/keyset", From 8d81026c4e5704653aff390da3abba8050781225 Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Tue, 11 Jan 2022 13:52:24 +0200 Subject: [PATCH 156/336] differentiate docker and local urls of mockauth, remove authlib version --- Dockerfile-dev | 2 +- tests/integration/mock_auth.py | 21 ++++++++++++--------- tests/integration/run_tests.py | 3 +-- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/Dockerfile-dev b/Dockerfile-dev index 748a2a0f6..40dcefcf7 100644 --- a/Dockerfile-dev +++ b/Dockerfile-dev @@ -13,7 +13,7 @@ COPY requirements.txt . COPY metadata_backend/ ./metadata_backend RUN pip install . -RUN pip install authlib==0.15.5 # required for mockauth (integration test) +RUN pip install authlib # required for mockauth (integration test) EXPOSE 5430 diff --git a/tests/integration/mock_auth.py b/tests/integration/mock_auth.py index 5114893d9..ede29bf25 100644 --- a/tests/integration/mock_auth.py +++ b/tests/integration/mock_auth.py @@ -1,5 +1,6 @@ """Mock OAUTH2 aiohttp.web server.""" +from os import getenv from time import time from aiohttp import web from cryptography.hazmat.primitives import serialization @@ -43,7 +44,10 @@ def generate_token() -> Tuple: user_given_name = "" user_family_name = "" -header = {"jku": "http://mockauth:8000/jwk", "kid": "rsa1", "alg": "RS256", "typ": "JWT"} +mock_auth_url_docker = getenv("OIDC_URL", "http://mockauth:8000") # called from inside docker-network +mock_auth_url_local = getenv("OIDC_URL_TEST", "http://localhost:8000") # called from local machine + +header = {"jku": f"{mock_auth_url_docker}/jwk", "kid": "rsa1", "alg": "RS256", "typ": "JWT"} async def setmock(req: web.Request) -> web.Response: @@ -83,12 +87,12 @@ async def token(req: web.Request) -> web.Response: "eduPersonAffiliation": "member;staff", "sub": user_sub, "displayName": f"{user_given_name} {user_family_name}", - "iss": "http://mockauth:8000", + "iss": mock_auth_url_docker, "schacHomeOrganizationType": "urn:schac:homeOrganizationType:test:other", "given_name": user_given_name, "nonce": nonce, "aud": "aud2", - "acr": "http://mockauth:8000/LoginHaka", + "acr": f"{mock_auth_url_docker}/LoginHaka", "nsAccountLock": "false", "eduPersonScopedAffiliation": "staff@test.what;member@test.what", "auth_time": iat, @@ -148,12 +152,11 @@ async def userinfo(request: web.Request) -> web.Response: async def oidc_config(request: web.Request) -> web.Response: """Return standard OIDC configuration.""" oidc_config_json = { - "issuer": "http://mockauth:8000", - # must be localhost to be accessible outside of docker-network - "authorization_endpoint": "http://localhost:8000/authorize", - "token_endpoint": "http://mockauth:8000/token", - "userinfo_endpoint": "http://mockauth:8000/userinfo", - "jwks_uri": "http://mockauth:8000/keyset", + "issuer": mock_auth_url_docker, + "authorization_endpoint": f"{mock_auth_url_local}/authorize", + "token_endpoint": f"{mock_auth_url_docker}/token", + "userinfo_endpoint": f"{mock_auth_url_docker}/userinfo", + "jwks_uri": f"{mock_auth_url_docker}/keyset", "response_types_supported": [ "code", "id_token", diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index d6a40453c..edc404b95 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -52,8 +52,7 @@ ("analysis", "ERZ266973.json", "ERZ266973.json"), ] base_url = os.getenv("BASE_URL", "http://localhost:5430") -# mock_auth_url = os.getenv("OIDC_URL", "http://localhost:8000") -mock_auth_url = "http://localhost:8000" # must be localhost, accessible from outside of docker-network +mock_auth_url = os.getenv("OIDC_URL_TEST", "http://localhost:8000") objects_url = f"{base_url}/objects" drafts_url = f"{base_url}/drafts" templates_url = f"{base_url}/templates" From ea9111ffc67853ce5fd2edaf4519470d1e806404 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 30 Dec 2021 14:05:50 +0200 Subject: [PATCH 157/336] add spellchecks for python json schemas improve pyspelling filters and ignored things. optimise wordlist --- .gitignore | 2 + .spellcheck.yml | 33 +++ .wordlist.txt | 617 +++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 536 insertions(+), 116 deletions(-) diff --git a/.gitignore b/.gitignore index 9ecefa70c..e482d5ce5 100644 --- a/.gitignore +++ b/.gitignore @@ -124,3 +124,5 @@ config/* # oidcrp generated directories that store JWKs private static +# ignore pyspelling dictionary +*.dic diff --git a/.spellcheck.yml b/.spellcheck.yml index 2a4b87d4c..c03c5d023 100644 --- a/.spellcheck.yml +++ b/.spellcheck.yml @@ -8,6 +8,39 @@ matrix: encoding: utf-8 pipeline: - pyspelling.filters.markdown: + - pyspelling.filters.context: + context_visible_first: true + escapes: '\\[\\`~]' + delimiters: + # Ignore text between inline back ticks as this is code or hightlight words + - open: '(?P`+)' + close: '(?P=open)' + # Ignore surrounded in <> as in RST it is link + - open: '<([A-Za-z0-9-_:.]+)|(https?://[^\\s/$.?#].[^\\s]+|[A-Za-z0-9-_:.]+)' + close: '>' sources: - 'docs/*.rst' default_encoding: utf-8 + +- name: JSON schemas + aspell: + lang: en + dictionary: + wordlists: + - .wordlist.txt + pipeline: + - pyspelling.filters.javascript: + jsdocs: true + line_comments: false + block_comments: false + strings: true + - pyspelling.filters.context: + context_visible_first: true + escapes: '\\[\\`~]' + delimiters: + # Ignore Pāli word as it cannot be added to dictionary + - open: '(Pāli)' + close: '-' + sources: + - metadata_backend/helpers/schemas/*.json + default_encoding: utf-8 diff --git a/.wordlist.txt b/.wordlist.txt index 63d834985..f864c0c0f 100644 --- a/.wordlist.txt +++ b/.wordlist.txt @@ -1,161 +1,546 @@ -AAI -accessionId -addObjectToDrafts -addObjectToFolder -Ajv +abkhaz +accessionid +additionalproperties +addobjecttodrafts +addobjecttofolder +affiliationidentifier +affiliationidentifierscheme +agp +akan +allof +alternateidentifier +alternateidentifiers +alternateidentifiertype +amplicon +amr +amrantibiogram +analysisattribute +analysisattributes +analysiscenter +analysisdate +analysislinks +analysisref +analysistype +annotinfo +antibiogram api apisauce -asciicast -asciinema -Async +aragonese +arxiv +assemblyannotation async +atac auth -AUTHDB +authdb automodule -autosummary +avaric +avestan +awardnumber +awardtitle +awarduri backend -baseURL -CERTREQS -CLI +bai +bam +bambara +barcode +basecall +basecoord +baseurl +bashkir +bcf +bgiseq +bibcode +bihari +biocollections +biome +biomes +bionano +bioproject +biosample +biosystems +bislama +bisulfite +blastdbinfo +bokmål +bookchapter +boolean +cdd +cdna +centername +centerprojectname +chamorro +checksummethod +chia +chichewa +chip +clinvar +cloneend +commonname +computationalnotebook conf +conferencepaper +conferenceproceeding const -createFromJSON -createFromXML -createNewDraftFolder -createSlice -CSC -CSCUserName -CSCfi +contributortype +covid +cpg +crai +createfromjson +createfromxml +createnewdraftfolder +createslice +crna +crossref +csc cscfi -css -currentSubmissionType -DAC -Dataset +cscusername +csi +cts +curation +currentsubmissiontype +customfields +dac +daclinks +dacref +datacite +datadescription +datapaper +dataset +datasetattribute +datasetattributes +datasetlinks +datasets +datasettype +datauses +datausetype +datecreated +dateinformation +datepublished +datetype +dbprobe +dbvar +de +decodings +defaultlength +defaultmember +demultiplexed +demultiplexing +demux +descriptiontype +designdescription destructure -ebi -EGA +divehi +dnase +doi +doiinfo +dt +dzongkha +ean +eastboundlongitude ega -ENA +eissn ena -enasequence -ENV +entrez +entrezdb +entrezid +entrezlink +enum env -eppn -eslint -eslintrc -FhJ -fileName -folderID -followingly -FormData -formData +epigenetics +exome +expectedbasecalltable +experimentattribute +experimentattributes +experimentlinks +experimentref +experimenttype +externalid +extrainfo +faire +familyname +faroese +fasta +fastq +filename +filetype +flatfile +flx +folderid +followsreadindex +formdata +fos +fractionation frontend -FWYs -genindex -Github +fula +funder +funderidentifier +funderidentifiertype +fundername +fundingreferences +galician +ganda +gapplus +gds +genbank +genestudio +genexus +genomemap +genomic +genotyping +geolocation +geolocationbox +geolocationplace +geolocationpoint +geolocationpolygon +geolocations +geoprofiles +gff github -githubusercontent -GroupedBySchema +givenname +gridion +groupedbyschema +gtr +guaraní gunicorn +helicos +heliscope +hiri +hiscansq +hiseq +histone +hmpr +homologene hostname -href +hq html http https -INITDB -io -javascript -js -JSON +identifiertype +ido +igbo +igsn +iix +illumina +insdc +interactiveresource +interlingua +interlingue +inupiaq +ipg +isni +issn +istc +journalarticle json -JSONContent -JWK -Keyfile -Kubernetes -li -localhost +jsoncontent +jwk +kalaallisut +kallisto +kanuri +kashmiri +katanga +keyfile +kinyarwanda +kirundi +komi +kubernetes +kwanyama +kyrgyz +lang +leaveaspool +libraryconstructionprotocol +librarydescriptor +librarylayout +libraryname +libraryselection +librarysource +librarystrategy +librarytype +limburgish +lims +lingala +lissn +locusname +lsid lt -makeStyles +luba +luxembourgish +maincontact +marshallese +matchedge +matk +maxcontains maxdepth -md -metadataObjects +maxitems +maxmismatch +mbd +mda +medgen +medip +membername +metadataobjects +metagenome +metagenomic +metagenomics +metatranscriptome +metatranscriptomic +methylation +methylcytidine +mf +mgiseq middlewares -modindex +mingaplength +minion +miniseq +minitems +minlength +minmatch +mirna +miseq +mnase +mol +moltype mongo -Mongodb mongodb -mTFEFsWsNUbP -Nav -NeIC -neic -newdraft +motu +mpeg +mre +msll +nameidentifier +nameidentifiers +nameidentifierscheme +namespace +nametype +nano +ncbi +ncbisearch +ncrna +ndebele +ndonga +nextseq +nlmcatalog noindex +nominallength +nominalsdev +northboundlatitude +novaseq npm -npx -objectDetails -ObjectInsideFolder -ObjectInsideFolderWithTags -ObjectStatus -ObjectSubmissionTypes -ObjectTags -objectType -ObjectTypes -OIDC -ol -OpenID +nuccore +nuosu +nynorsk +objectdetails +objectinsidefolder +objectinsidefolderwithtags +objectsubmissiontypes +objecttags +objecttype +occitan +oecd +oidc +ojibwe +oligo +omim +oneof +ontologies openid -phenome +orcid +orgtrack +oromo +ossetian +outputmanagementplan +pacbio +paleo +panjabi +pashto +pathogenanalysis +pcassay +pccompound +pcr +pcsubstance +pdf +peerreview +pgm +physicalobject +pipesection +pmc +pmid +pointlatitude +pointlongitude +policyattribute +policyattributes +policylinks +policyref +policytext +policyurl +polya +poolclone +poolingstrategy +poolmembertype +popset pre -prettierrc -ProviderMetadata +precedesreadindex +preprint +prevstepindex +primaryid +probeset +processedreads +processingtype +promethion +proteinclusters +protfam +publicationyear +pubmed py -PyCQA -pycqa quickstart -readthedocs +randompriming +rbcl +readclass +readgrouptag +readindex +readlabel +readme +readspec +readtype redux -REFERER +refcenter +referencealignment +referencesequence +referer +refname +relatedidentifier +relatedidentifiers +relatedidentifiertype +relatedmetadatascheme +relationtype +relativeorder +remoteuseridentifier reqs -remoteUserIdentifier -rootReducer +resequencing +resourcetypegeneral +rnaseq +rootreducer +ror +rrna rst +runattribute +runattributes +runcenter +rundate +runlinks +runref +runtime +runtype +sami +sampleattribute +sampleattributes +sampledata +sampledemuxdirective +sampledescriptor +samplelinks +samplename +samplephenotype +sampleref +sango +sardinian +schemaorg schemas -SDA -sda -SLLVERSION +schemetype +schemeuri +scientificname +sdev +se +secondaryid +sectionname +selex +seqannot +sequenceannotation +sequenceassembly +sequenceflatfile +sequencetype +sequencevariation +sff +sha +shona +sinhala +snp +solid +sotho +southboundlatitude +spotdescriptor +spotlength sra -src -SSL +srf ssl -submissionFolder -submissionFolderSlice -submissionType +ssrna +stepindex +studyabstract +studyattribute +studyattributes +studydescription +studylinks +studyref +studytitle +studytype +subjectscheme +submissionfolder +submissionfolderslice +submissiontype +submitter's +submitterdemultiplexed +submitterid +submitters svg -TLS +swati +tabix +tajik +targetloci +taxonid +taxonomicreferenceset +taxonomysystem +taxonomysystemversion +telephonenumber +tigrinya tls toctree tox -UI -ui -uk -ul -un +tpa +transcriptome +transcriptomeassembly +transcriptomic +transcriptomics +tsonga +turkmen +twi +umi +uniqueitems +unlocalised +uri url -useDispatch -UserInfo -useSelector +urllink +usedispatch +userid +useselector +uuid +uyghur validator -withStyles -WizardComponents -wizardObject -WizardObjectIndex -WizardSavedObjectsList -WizardSavedObjectsListProps -WizardShowSummaryStep -WizardSteps -wizardSubmissionFolderSlice +vcf +venda +volapük +wcs +westboundlongitude +wga +wgs +wizardobject +wizardsavedobjectslistprops +wolof +wxs +xl xml -XMLFile -XSD -yaml -yml +xmlfile +xref +xrefdb +xrefid +xreflink +xsd +za +zhuang \ No newline at end of file From b973aeb1f6567f03284e2eb8816a69c341f60ec8 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 30 Dec 2021 14:06:49 +0200 Subject: [PATCH 158/336] uniform spelling of MongoDB --- docs/submitter.rst | 2 +- docs/test.rst | 4 ++-- metadata_backend/api/operators.py | 2 +- metadata_backend/conf/conf.py | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/submitter.rst b/docs/submitter.rst index 566b16792..f4cf2ad1e 100644 --- a/docs/submitter.rst +++ b/docs/submitter.rst @@ -29,7 +29,7 @@ the table below. +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``MONGO_PASSWORD`` | ``admin`` | Admin password for MongoDB. | Yes | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``MONGO_SSL`` | ``-`` | Set to True to enable MONGO TLS connection url. | No | +| ``MONGO_SSL`` | ``-`` | Set to True to enable MongoDB TLS connection url. | No | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``MONGO_SSL_CA`` | ``-`` | Path to CA file, required if ``MONGO_SSL`` enabled. | No | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ diff --git a/docs/test.rst b/docs/test.rst index 54fa5b393..cf1692901 100644 --- a/docs/test.rst +++ b/docs/test.rst @@ -43,9 +43,9 @@ After the backend has been successfully set up, run the following in the backend This command will run a series of integration tests. To clean db before or after each integration tests run: ``python tests/integration/clean_db.py`` (``--tls`` argument -can be added if Mongodb is started via ``docker-compose-tls.yml``). Script clean_db.py will delete all documents in all collections in the database. +can be added if MongoDB is started via ``docker-compose-tls.yml``). Script clean_db.py will delete all documents in all collections in the database. To erase the database run: ``python tests/integration/clean_db.py --purge``. After that indexes need to be recreated. -To do that run: ``python tests/integration/mongo_indexes.py`` (``--tls`` argument can be added if Mongodb is started via ``docker-compose-tls.yml``). +To do that run: ``python tests/integration/mongo_indexes.py`` (``--tls`` argument can be added if MongoDB is started via ``docker-compose-tls.yml``). Performance Testing diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 5eacaea80..669e8551c 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -468,7 +468,7 @@ async def _format_read_data( :param schema_type: Schema type of the object to read. :param data_raw: Data from mongodb query, can contain multiple results - :returns: Mongodb query result, formatted to readable dicts + :returns: MongoDB query result, formatted to readable dicts """ if isinstance(data_raw, dict): return self._format_single_dict(schema_type, data_raw) diff --git a/metadata_backend/conf/conf.py b/metadata_backend/conf/conf.py index d5e00525d..2b33d245f 100644 --- a/metadata_backend/conf/conf.py +++ b/metadata_backend/conf/conf.py @@ -7,7 +7,7 @@ - ``MONGO_USERNAME`` - Username for mongodb - ``MONGO_PASSWORD`` - Password for mongodb -- ``MONGO_HOST`` - Mongodb server hostname, with port specified +- ``MONGO_HOST`` - MongoDB server hostname, with port specified Admin access is needed in order to create new databases during runtime. Default values are the same that are used in docker-compose file @@ -22,7 +22,7 @@ Schema types (such as ``"submission"``, ``"study"``, ``"sample"``) are needed in different parts of the application. -3) Mongodb query mappings +3) MongoDB query mappings Mappings are needed to turn incoming REST api queries into mongodb queries. Change these if database structure changes. From e2915ce4966463de716ef4ae827dc78c97797203 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 30 Dec 2021 14:07:02 +0200 Subject: [PATCH 159/336] fix spelling mistake in current json schemas --- metadata_backend/helpers/schemas/datacite.json | 4 ++-- .../helpers/schemas/ena_experiment.json | 14 ++++++++------ metadata_backend/helpers/schemas/folders.json | 6 +++--- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/metadata_backend/helpers/schemas/datacite.json b/metadata_backend/helpers/schemas/datacite.json index 60a4a9818..b4d58c11d 100644 --- a/metadata_backend/helpers/schemas/datacite.json +++ b/metadata_backend/helpers/schemas/datacite.json @@ -583,7 +583,7 @@ }, "relatedIdentifiers": { "type": "array", - "title": "Related Indetifiers", + "title": "Related Identifiers", "description": "Must be a globally unique identifier", "items": { "type": "object", @@ -773,7 +773,7 @@ "formats": { "type": "array", "title": "Formats", - "description": "Use file extension or MIME type where possible, e.g., PDF, XML, MPG or application/pdf, text/xml, video/mpeg. We will prefill some of them based on what was filled in metadata.", + "description": "Use file extension or MIME type where possible, e.g., PDF, XML, MPG or application/pdf, text/xml, video/mpeg. We will pre-fill some of them based on what was filled in metadata.", "items": { "type": "string", "title": "Format" diff --git a/metadata_backend/helpers/schemas/ena_experiment.json b/metadata_backend/helpers/schemas/ena_experiment.json index bd758bc61..eeb7d11ad 100644 --- a/metadata_backend/helpers/schemas/ena_experiment.json +++ b/metadata_backend/helpers/schemas/ena_experiment.json @@ -168,7 +168,7 @@ "pool": { "type": "object", "title": "Sample Members", - "description": "Identifies a list of group/pool/multiplex sample members. This implies that this sample record is a group, pool, or multiplex, but it continues to receive its own accession and can be referenced by an experiment. By default ifno match to any of the listed members can be determined, then the default sample reference is used.", + "description": "Identifies a list of group/pool/multiplex sample members. This implies that this sample record is a group, pool, or multiplex, but it continues to receive its own accession and can be referenced by an experiment. By default if no match to any of the listed members can be determined, then the default sample reference is used.", "properties": { "defaultMember": { "type": "object", @@ -180,7 +180,6 @@ "title": "Member", "$ref": "#/definitions/poolMemberType" } - } } } @@ -208,7 +207,11 @@ "$id": "#/definitions/libraryType", "type": "object", "title": "Library used for experiment design", - "required": ["designDescription", "sampleDescriptor", "libraryDescriptor"], + "required": [ + "designDescription", + "sampleDescriptor", + "libraryDescriptor" + ], "properties": { "designDescription": { "title": "Design Description", @@ -227,7 +230,6 @@ "$ref": "#/definitions/reference" } ] - }, "libraryDescriptor": { "description": "The LIBRARY_DESCRIPTOR specifies the origin of the material being sequenced and any treatments that the material might have undergone that affect the sequencing result. This specification is needed even if the platform does not require a library construction step per se.", @@ -585,7 +587,7 @@ "properties": { "leaveAsPool": { "type": "string", - "description": "There shall be no sample de-multiplexing at the level of assiging individual reads to sample pool members.", + "description": "There shall be no sample de-multiplexing at the level of assigning individual reads to sample pool members.", "title": "Leave as pool" }, "submitterDemultiplexed": { @@ -802,4 +804,4 @@ } } } -} +} \ No newline at end of file diff --git a/metadata_backend/helpers/schemas/folders.json b/metadata_backend/helpers/schemas/folders.json index cda2eab7c..555c66d66 100644 --- a/metadata_backend/helpers/schemas/folders.json +++ b/metadata_backend/helpers/schemas/folders.json @@ -652,7 +652,7 @@ }, "relatedIdentifiers": { "type": "array", - "title": "Related Indetifiers", + "title": "Related Identifiers", "description": "Must be a globally unique identifier", "items": { "type": "object", @@ -842,7 +842,7 @@ "formats": { "type": "array", "title": "Formats", - "description": "Use file extension or MIME type where possible, e.g., PDF, XML, MPG or application/pdf, text/xml, video/mpeg. We will prefill some of them based on what was filled in metadata.", + "description": "Use file extension or MIME type where possible, e.g., PDF, XML, MPG or application/pdf, text/xml, video/mpeg. We will pre-fill some of them based on what was filled in metadata.", "items": { "type": "string", "title": "Format" @@ -1027,4 +1027,4 @@ } }, "additionalProperties": false -} +} \ No newline at end of file From a52318034d572d117a4c0b29be3665db69d4763b Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 31 Dec 2021 10:17:40 +0200 Subject: [PATCH 160/336] include readme in spellchecks --- .spellcheck.yml | 1 + .wordlist.txt | 11 +++++++++++ CONTRIBUTING.md | 6 +++--- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/.spellcheck.yml b/.spellcheck.yml index c03c5d023..44296edb0 100644 --- a/.spellcheck.yml +++ b/.spellcheck.yml @@ -20,6 +20,7 @@ matrix: close: '>' sources: - 'docs/*.rst' + - '**/*.md' default_encoding: utf-8 - name: JSON schemas diff --git a/.wordlist.txt b/.wordlist.txt index f864c0c0f..ac13bae79 100644 --- a/.wordlist.txt +++ b/.wordlist.txt @@ -11,6 +11,7 @@ allof alternateidentifier alternateidentifiers alternateidentifiertype +amd amplicon amr amrantibiogram @@ -64,6 +65,7 @@ blastdbinfo bokmål bookchapter boolean +buildx cdd cdna centername @@ -73,6 +75,7 @@ checksummethod chia chichewa chip +cli clinvar cloneend commonname @@ -83,6 +86,7 @@ conferenceproceeding const contributortype covid +cp cpg crai createfromjson @@ -95,6 +99,7 @@ csc cscfi cscusername csi +ctrl cts curation currentsubmissiontype @@ -129,8 +134,10 @@ demux descriptiontype designdescription destructure +devcontainer divehi dnase +docstrings doi doiinfo dt @@ -259,6 +266,7 @@ librarytype limburgish lims lingala +linux lissn locusname lsid @@ -297,6 +305,7 @@ minlength minmatch mirna miseq +mkdir mnase mol moltype @@ -306,6 +315,7 @@ motu mpeg mre msll +mypy nameidentifier nameidentifiers nameidentifierscheme @@ -529,6 +539,7 @@ volapük wcs westboundlongitude wga +wget wgs wizardobject wizardsavedobjectslistprops diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d3ca9a1b7..c7b51dc2d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -45,9 +45,9 @@ We do optimize for readability, and it would be awesome if you go through the co - We use [black](https://github.com/psf/black) code formatter and also check for [pep8](https://www.python.org/dev/peps/pep-0008/) and [pep257](https://www.python.org/dev/peps/pep-0257/) with some small exceptions. You can see the stated exceptions in `tox.ini` configuration file - We like to keep things simple, so when possible avoid importing any big libraries. - Tools to help you: - - Tox is configured to run bunch of tests: black, flake8, docstrings, missing type hints, mypy. - - Tox is also ran in our CI, so please run tox before each push to this repo - - If you like things to happen automagically, you can add pre-commit hook to your git workflow! Hook can be found from [scripts-folder](scripts) and it includes settings for tox and [misspell](https://github.com/client9/misspell) (which is there just for, well, spelling errors). + - Tox is configured to run bunch of tests: black, flake8, docstrings, missing type hints, mypy; + - Tox is also ran in our CI, so please run tox before each push to this repository; + - If you like things to happen automagically, you can add pre-commit hook to your git workflow! Hook can be found from [scripts-folder](scripts) and it includes settings for tox and [pyspelling](https://facelessuser.github.io/pyspelling/) (which is there just for, well, spelling errors). Thanks, CSC developers From 154b78bb12e10911f9cb6959543a1a18448782ae Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 31 Dec 2021 10:18:06 +0200 Subject: [PATCH 161/336] format shellscripts and optimise pre-commit script --- scripts/install-hooks.sh | 2 +- scripts/pre-commit.sh | 24 +++++++++++++----------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/scripts/install-hooks.sh b/scripts/install-hooks.sh index 76a54b267..9cdeebf23 100755 --- a/scripts/install-hooks.sh +++ b/scripts/install-hooks.sh @@ -1,5 +1,5 @@ #!/bin/sh # Install pre-commit hook by running ./install-hooks.sh git_root=$(git rev-parse --show-toplevel) -ln -s $git_root/scripts/pre-commit.sh $git_root/.git/hooks/pre-commit +ln -s "$git_root"/scripts/pre-commit.sh "$git_root/.git/hooks/pre-commit" echo "Symlinked pre-commit hook!" diff --git a/scripts/pre-commit.sh b/scripts/pre-commit.sh index 6c43bf590..c370884d1 100755 --- a/scripts/pre-commit.sh +++ b/scripts/pre-commit.sh @@ -3,9 +3,11 @@ # Comment out pre-commit hooks you don't want to use echo "Running tox as a pre-commit hook" -cd $(git rev-parse --show-toplevel) && rm -r .tox; tox -p auto +root_dir=$(git rev-parse --show-toplevel) -if [ $? -ne 0 ]; then +cd "$root_dir" || exit 1 + +if ! tox -r -p auto ; then echo "==============================" echo "Tests must pass before commit!" echo "Note: Tox also checks non-staged changes, so you might need to stash @@ -13,17 +15,17 @@ if [ $? -ne 0 ]; then exit 1 fi -command -v misspell > /dev/null 2>&1 || echo "Misspell not installed, not running as pre-commit hook" && exit 0 -echo "Running misspell as a pre-commit hook" -# Checking misspell against files and folder not in .gitignore -files=$(git ls-tree HEAD | awk '{print $4}' | tr '\n' ' ') -output=$(cd $(git rev-parse --show-toplevel) && misspell $files) +if ! command -v pyspelling > /dev/null 2>&1; then + echo "pyspelling not installed, not running as pre-commit hook" + exit 0 +fi + +echo "Running pyspelling as a pre-commit hook" +# Checking pyspelling against files and folder not in .gitignore -if [[ $output ]]; then +if ! pyspelling -v -c "$root_dir/.spellcheck.yml"; then echo "==============================" echo "Check your spelling errors before commit!" - echo "You had following errors:" - echo $output - echo "To fix errors with one command, run: misspell -w $files" + echo "To fix errors with one command, run: pyspelling -v -c $root_dir/.spellcheck.yml" exit 1 fi From 696ff4951302c08ed60b9028326cb54b0afc4c7a Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 3 Jan 2022 18:17:02 +0200 Subject: [PATCH 162/336] make dictionary lowercase to elimitate duplicates --- .spellcheck.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.spellcheck.yml b/.spellcheck.yml index 44296edb0..c782601cd 100644 --- a/.spellcheck.yml +++ b/.spellcheck.yml @@ -2,6 +2,7 @@ matrix: - name: Markdown aspell: lang: en + ignore-case: true dictionary: wordlists: - .wordlist.txt @@ -26,6 +27,7 @@ matrix: - name: JSON schemas aspell: lang: en + ignore-case: true dictionary: wordlists: - .wordlist.txt From e38f00692d8763f4b76058782cb907f785e8869f Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 5 Jan 2022 18:42:42 +0200 Subject: [PATCH 163/336] move spellcheck config under .github folder the configuration is used only in github action so it is not required in root folder --- .spellcheck.yml => .github/config/.spellcheck.yml | 6 +++--- .wordlist.txt => .github/config/.wordlist.txt | 2 ++ .github/workflows/docs.yml | 2 ++ 3 files changed, 7 insertions(+), 3 deletions(-) rename .spellcheck.yml => .github/config/.spellcheck.yml (87%) rename .wordlist.txt => .github/config/.wordlist.txt (99%) diff --git a/.spellcheck.yml b/.github/config/.spellcheck.yml similarity index 87% rename from .spellcheck.yml rename to .github/config/.spellcheck.yml index c782601cd..2cbf92b62 100644 --- a/.spellcheck.yml +++ b/.github/config/.spellcheck.yml @@ -5,7 +5,7 @@ matrix: ignore-case: true dictionary: wordlists: - - .wordlist.txt + - .github/config/.wordlist.txt encoding: utf-8 pipeline: - pyspelling.filters.markdown: @@ -17,7 +17,7 @@ matrix: - open: '(?P`+)' close: '(?P=open)' # Ignore surrounded in <> as in RST it is link - - open: '<([A-Za-z0-9-_:.]+)|(https?://[^\\s/$.?#].[^\\s]+|[A-Za-z0-9-_:.]+)' + - open: '<([A-Za-z0-9-_:.]+)|(https?://[^\s/$.?#].[^\s]+|[A-Za-z0-9-_:.]+)' close: '>' sources: - 'docs/*.rst' @@ -30,7 +30,7 @@ matrix: ignore-case: true dictionary: wordlists: - - .wordlist.txt + - .github/config/.wordlist.txt pipeline: - pyspelling.filters.javascript: jsdocs: true diff --git a/.wordlist.txt b/.github/config/.wordlist.txt similarity index 99% rename from .wordlist.txt rename to .github/config/.wordlist.txt index ac13bae79..0c93c8efd 100644 --- a/.wordlist.txt +++ b/.github/config/.wordlist.txt @@ -113,6 +113,7 @@ datapaper dataset datasetattribute datasetattributes +datasetIdentifiers datasetlinks datasets datasettype @@ -487,6 +488,7 @@ studyabstract studyattribute studyattributes studydescription +studyIdentifier studylinks studyref studytitle diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 14704d969..ab9ae0031 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -15,6 +15,8 @@ jobs: - uses: actions/checkout@master - uses: rojopolis/spellcheck-github-actions@0.20.0 name: Spellcheck + with: + config_path: .github/config/.spellcheck.yml code_docs: strategy: From 2d95b9443662fc014159fbb6d14e99bc9236e56b Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 5 Jan 2022 19:11:28 +0200 Subject: [PATCH 164/336] corrrect information & add description to datacite --- .github/config/.wordlist.txt | 4 + .../helpers/schemas/datacite.json | 153 ++++++++------- metadata_backend/helpers/schemas/folders.json | 178 ++++++++++-------- 3 files changed, 190 insertions(+), 145 deletions(-) diff --git a/.github/config/.wordlist.txt b/.github/config/.wordlist.txt index 0c93c8efd..bbe2f1b7c 100644 --- a/.github/config/.wordlist.txt +++ b/.github/config/.wordlist.txt @@ -125,6 +125,8 @@ datepublished datetype dbprobe dbvar +DDI +ddialliance de decodings defaultlength @@ -264,6 +266,7 @@ libraryselection librarysource librarystrategy librarytype +Lifecycle limburgish lims lingala @@ -550,6 +553,7 @@ wxs xl xml xmlfile +XMLSchema xref xrefdb xrefid diff --git a/metadata_backend/helpers/schemas/datacite.json b/metadata_backend/helpers/schemas/datacite.json index b4d58c11d..5cb708ed7 100644 --- a/metadata_backend/helpers/schemas/datacite.json +++ b/metadata_backend/helpers/schemas/datacite.json @@ -12,7 +12,7 @@ "description": "The main researcher(s) involved in producing the data, or the author(s) of the publication.", "items": { "type": "object", - "title": "Main researcher(s) involved with data or the author(s) of the publication", + "title": "Main researcher(s) involved with data or the author(s) of the publication.", "properties": { "givenName": { "type": "string", @@ -24,14 +24,16 @@ }, "name": { "type": "string", - "title": "Full name (from Given Name and Family Name)." + "description": "This field will be constructed based from Given Name and Family Name.", + "title": "Full name." }, "affiliation": { "type": "array", "title": "Affiliations", + "description": "The organizational or institutional affiliation of the creator. Start writing the name of the organization or institution and a suggestion will be made from Research Organization Registry (ROR) Community API.", "items": { "type": "object", - "title": "Affiliation details", + "title": "Affiliation Details", "properties": { "name": { "type": "string", @@ -39,14 +41,17 @@ }, "schemeUri": { "type": "string", + "description": "Name identifier scheme. This will default to https://ror.org/ .", "title": "URI (location) of the affiliation scheme" }, "affiliationIdentifier": { "type": "string", + "description": "URI location based on the URI scheme of the name identifier this will be pre-filled based on https://ror.org/name.", "title": "Location of affiliation identifier" }, "affiliationIdentifierScheme": { "type": "string", + "description": "Identifier scheme name. This will default to ROR.", "title": "Name of affiliation identifier scheme" } } @@ -56,23 +61,26 @@ "nameIdentifiers": { "type": "array", "title": "Creator Identifiers", - "description": "Uniquely identifies an individual or legal entity, according to various schemas, e.g. ORCID, ROR or ISNI. Use name identifier expressed as URL.", + "description": "Uniquely identifies the creator, according to various schemas, e.g. ORCID, ROR or ISNI. Use name identifier expressed as URL.", "items": { "type": "object", - "title": "Name identifier object", + "title": "Creator Name Identifier", "properties": { "schemeUri": { "type": "string", - "title": "URI (location) of the name identifier scheme", + "description": "Depending on the name identifier scheme e.g. https://isni.org/ or https://orcid.org/ or https://ror.org/ or https://www.grid.ac/ .", + "title": "Scheme of the URI (location) of the name identifier", "format": "uri" }, "nameIdentifier": { "type": "string", - "title": "URI (location) of name identifier" + "description": "URI location based on the URI scheme of the name identifier e.g. https://orcid.org/nameIdentifier .", + "title": "URI (location) of name identifier. " }, "nameIdentifierScheme": { "type": "string", - "title": "Name of name identifier scheme" + "description": "Identifier scheme name e.g. ORCID, ROR or ISNI .", + "title": "Name of identifier scheme." } } }, @@ -86,7 +94,7 @@ "subjects": { "type": "array", "title": "Subjects", - "description": "Subject, keyword, classification code, or key phrase describing the resource specified by OECD Fields of Science and Technology (FOS)", + "description": "Subject, keyword, classification code, or key phrase describing the resources specified by OECD Fields of Science and Technology (FOS)", "items": { "type": "object", "title": "Subjects", @@ -174,7 +182,8 @@ }, "name": { "type": "string", - "title": "Full name (from Given Name and Family Name)." + "description": "This field will be constructed based from Given Name and Family Name.", + "title": "Full name." }, "contributorType": { "type": "string", @@ -203,9 +212,10 @@ "affiliation": { "type": "array", "title": "Affiliations", + "description": "The organizational or institutional affiliation of the creator. Start writing the name of the organization or institution and a suggestion will be made from Research Organization Registry (ROR) Community API.", "items": { "type": "object", - "title": "Affiliation details", + "title": "Affiliation Details", "properties": { "name": { "type": "string", @@ -213,14 +223,17 @@ }, "schemeUri": { "type": "string", + "description": "Name identifier scheme. This will default to https://ror.org/ .", "title": "URI (location) of the affiliation scheme" }, "affiliationIdentifier": { "type": "string", + "description": "URI location based on the URI scheme of the name identifier this will be pre-filled based on https://ror.org/name.", "title": "Location of affiliation identifier" }, "affiliationIdentifierScheme": { "type": "string", + "description": "Identifier scheme name. This will default to ROR.", "title": "Name of affiliation identifier scheme" } } @@ -229,27 +242,31 @@ }, "nameIdentifiers": { "type": "array", - "title": "Contributor identifiers", - "description": "Uniquely identifies an individual or legal entity, according to various schemas, e.g. ORCID, ROR or ISNI. Use name identifier expressed as URL.", + "title": "Contributor Identifiers", + "description": "Uniquely identifies the contributor, according to various schemas, e.g. ORCID, ROR or ISNI. Use name identifier expressed as URL.", "items": { "type": "object", - "title": "Identifier details", + "title": "Contributor Name Identifier", "properties": { "schemeUri": { "type": "string", - "title": "URI (location) of the name identifier scheme", + "description": "Depending on the name identifier scheme e.g. https://isni.org/ or https://orcid.org/ or https://ror.org/ or https://www.grid.ac/ .", + "title": "Scheme of the URI (location) of the name identifier", "format": "uri" }, "nameIdentifier": { "type": "string", - "title": "Location of name identifier" + "description": "URI location based on the URI scheme of the name identifier e.g. https://orcid.org/nameIdentifier .", + "title": "URI (location) of name identifier. " }, "nameIdentifierScheme": { "type": "string", - "title": "Name of name identifier scheme" + "description": "Identifier scheme name e.g. ORCID, ROR or ISNI .", + "title": "Name of identifier scheme." } } - } + }, + "uniqueItems": true } }, "additionalProperties": false @@ -259,7 +276,7 @@ "dates": { "type": "array", "title": "Dates", - "description": "List of relevant dates to publication", + "description": "List of relevant dates to publication. The type of date can vary and it is recommended to add information if appropriate to distinguish between dates.", "items": { "type": "object", "title": "Date", @@ -271,12 +288,12 @@ "date": { "type": "string", "title": "Date", - "description": "A standard format for a date value" + "description": "A standard format for a date value." }, "dateType": { "type": "string", "title": "Date Type", - "description": "Relevance of the date", + "description": "Relevance of the date for the resources being submitted.", "enum": [ "Accepted", "Available", @@ -304,7 +321,7 @@ "descriptions": { "type": "array", "title": "Descriptions", - "description": "Additional information about the resource that does not fit in any of the other categories.", + "description": "Additional information about the resources that does not fit in any of the other categories, or general descriptions that are supplemental to the descriptions added for Study or Datasets.", "items": { "type": "object", "title": "description", @@ -337,7 +354,7 @@ "geoLocations": { "type": "array", "title": "GeoLocations", - "description": "Spatial region or named place where the data was gathered or about which the resource is focused.", + "description": "Spatial region or named place where the data was gathered or where the resources are focused.", "items": { "type": "object", "title": "GeoLocation", @@ -369,19 +386,19 @@ "properties": { "westBoundLongitude": { "type": "string", - "title": "Longitude coordinate of west bound" + "title": "Longitude coordinate of west bound." }, "eastBoundLongitude": { "type": "string", - "title": "Longitude coordinate of east bound" + "title": "Longitude coordinate of east bound." }, "southBoundLatitude": { "type": "string", - "title": "Latitude coordinate of south bound" + "title": "Latitude coordinate of south bound." }, "northBoundLatitude": { "type": "string", - "title": "Latitude coordinate of north bound" + "title": "Latitude coordinate of north bound." } } } @@ -393,7 +410,7 @@ "language": { "type": "string", "title": "Language", - "description": "Primary language of the Study/Datasets submitted.", + "description": "Primary language of the submitted resources.", "enum": [ "Afar", "Abkhaz", @@ -572,7 +589,7 @@ "Vietnamese", "Volapük", "Walloon", - "Wolof", + "WolOf", "Xhosa", "Yiddish", "Yoruba", @@ -584,7 +601,7 @@ "relatedIdentifiers": { "type": "array", "title": "Related Identifiers", - "description": "Must be a globally unique identifier", + "description": "Information about a resource related to the one being registered, primarily used to provide series information or a text citation where the related resource does not have an identifier. It is also optional to provide such an identifier.", "items": { "type": "object", "title": "Identifier of related resource", @@ -597,7 +614,7 @@ "relatedIdentifier": { "type": "string", "title": "Identifier", - "description": "These must be globally unique identifiers and correspond to the type selected" + "description": "These must be globally unique identifiers and correspond to the type selected. e.g. 10.2022/example.78m9865 for DOI identifier Type" }, "relatedIdentifierType": { "type": "string", @@ -629,56 +646,59 @@ "title": "Relationship Type", "enum": [ "Cites", - "Is cited by", + "Is Cited By", "Compiles", - "Is compiled by", + "Is Compiled By", "Continues", - "Is continued by", + "Is Continued By", "Describes", - "Is described by", + "Is Described By", "Documents", - "Is documented by", - "Is derived from", - "Is source of", - "Has metadata", - "Is metadata for", - "Has part", - "Is part of", - "Is supplemented by", - "Is supplement to", + "Is Documented By", + "Is Derived From", + "Is Source Of", + "Has Metadata", + "Is Metadata For", + "Has Part", + "Is Part Of", + "Is Supplemented By", + "Is Supplement To", "Obsoletes", - "Is obsoleted by", + "Is Obsoleted By", "References", - "Is referenced by", + "Is Referenced By", "Requires", - "Is required by", + "Is Required By", "Reviews", - "Is reviewed by", - "Has version", - "Is version of", - "Is new version of", - "Is previous version of", - "Is published in", - "Is variant form of", - "Is original form of", - "Is identical to" + "Is Reviewed By", + "Has Version", + "Is Version Of", + "Is New version Of", + "Is Previous Version Of", + "Is Published In", + "Is Variant Form Of", + "Is Original Form Of", + "Is Identical To" ] }, "relatedMetadataScheme": { "type": "string", - "title": "Related Metadata Scheme" + "title": "Related Metadata Scheme", + "description": "To be Use only with this relation pair: (Has Metadata/Is Metadata For). Example DDI-L" }, "schemeUri": { "type": "string", - "title": "Related Metadata Scheme URI" + "title": "Related Metadata Scheme URI", + "description": "To be Use only with this relation pair: (Has Metadata/Is Metadata For). Example: http://www.ddialliance.org/Specification/DDI-Lifecycle/3.1/XMLSchema/instance.xsd" }, "schemeType": { "type": "string", - "title": "Related Metadata Scheme Type" + "title": "Related Metadata Scheme Type", + "description": "To be Use only with this relation pair: (Has Metadata/Is Metadata For). Examples: XSD, DDT, Turtle" }, "resourceTypeGeneral": { "type": "string", - "title": "Resource Type General", + "title": "Resource Type", "enum": [ "Audiovisual", "Book", @@ -718,10 +738,10 @@ "alternateIdentifiers": { "type": "array", "title": "Alternate Identifiers", - "description": "An identifier or identifiers other than the primary or related identifier applied to the resource being registered. EGA identifier obtained that as an alternative to the current resource.", + "description": "An identifier or identifiers other than the primary or related identifier applied to the resources being registered. EGA identifier(s) obtained, that can be used as an alternative to the current resources.", "items": { "type": "object", - "title": "An identifier or identifiers other than the primary Identifier of the resource.", + "title": "Identifier(s) other than the primary identifier of the resources", "required": [ "alternateIdentifier", "alternateIdentifierType" @@ -764,7 +784,7 @@ "sizes": { "type": "array", "title": "Sizes", - "description": "Size (e.g., bytes, pages, inches, etc.) or duration (extent), e.g., hours, minutes, days, etc., of a resource. Examples: '15 pages', '6 MB', '45 minutes'", + "description": "Size (e.g., bytes, pages, inches, etc.) or duration (extent), e.g., hours, minutes, days, etc., of a resources. Examples: '15 pages', '6 MB', '45 minutes'.", "items": { "type": "string", "title": "Size" @@ -782,9 +802,10 @@ "fundingReferences": { "type": "array", "title": "Funding References", + "description": "It is recommended to supply funding information when financial support has been received. The funder will be identified by their Research Organization Registry (ROR).", "items": { "type": "object", - "title": "Information about financial support for the resource", + "title": "Information about financial support for producing the resources", "required": [ "funderName", "funderIdentifier", @@ -815,12 +836,12 @@ "awardNumber": { "type": "string", "title": "Award Number", - "description": "The code assigned by the funder to a sponsored award" + "description": "The code assigned by the funder to a sponsored award." }, "awardTitle": { "type": "string", "title": "Award Title", - "description": "The human readable title of the award" + "description": "The human readable title of the award." }, "awardUri": { "type": "string", diff --git a/metadata_backend/helpers/schemas/folders.json b/metadata_backend/helpers/schemas/folders.json index 555c66d66..eadd9dbd5 100644 --- a/metadata_backend/helpers/schemas/folders.json +++ b/metadata_backend/helpers/schemas/folders.json @@ -36,7 +36,7 @@ }, "doiInfo": { "type": "object", - "title": "The DOI info schema", + "title": "Datacite DOI Registration Information", "required": [ "creators", "subjects" @@ -48,7 +48,7 @@ "description": "The main researcher(s) involved in producing the data, or the author(s) of the publication.", "items": { "type": "object", - "title": "Main researcher(s) involved with data or the author(s) of the publication", + "title": "Main researcher(s) involved with data or the author(s) of the publication.", "properties": { "givenName": { "type": "string", @@ -60,7 +60,8 @@ }, "name": { "type": "string", - "title": "Full name (from Given Name and Family Name)." + "description": "This field will be constructed based from Given Name and Family Name.", + "title": "Full name." }, "nameType": { "type": "string", @@ -70,9 +71,10 @@ "affiliation": { "type": "array", "title": "Affiliations", + "description": "The organizational or institutional affiliation of the creator. Start writing the name of the organization or institution and a suggestion will be made from Research Organization Registry (ROR) Community API.", "items": { "type": "object", - "title": "Affiliation details", + "title": "Affiliation Details", "properties": { "name": { "type": "string", @@ -80,17 +82,18 @@ }, "schemeUri": { "type": "string", - "title": "URI (location) of the affiliation scheme", - "const": "https://ror.org" + "description": "Name identifier scheme. This will default to https://ror.org/ .", + "title": "URI (location) of the affiliation scheme" }, "affiliationIdentifier": { "type": "string", + "description": "URI location based on the URI scheme of the name identifier this will be pre-filled based on https://ror.org/name.", "title": "Location of affiliation identifier" }, "affiliationIdentifierScheme": { "type": "string", - "title": "Name of affiliation identifier scheme", - "const": "ROR" + "description": "Identifier scheme name. This will default to ROR.", + "title": "Name of affiliation identifier scheme" } } }, @@ -99,23 +102,26 @@ "nameIdentifiers": { "type": "array", "title": "Creator Identifiers", - "description": "Uniquely identifies an individual or legal entity, according to various schemas, e.g. ORCID, ROR or ISNI. Use name identifier expressed as URL.", + "description": "Uniquely identifies the creator, according to various schemas, e.g. ORCID, ROR or ISNI. Use name identifier expressed as URL.", "items": { "type": "object", - "title": "Name identifier object", + "title": "Creator Name Identifier", "properties": { "schemeUri": { "type": "string", - "title": "URI (location) of the name identifier scheme", + "description": "Depending on the name identifier scheme e.g. https://isni.org/ or https://orcid.org/ or https://ror.org/ or https://www.grid.ac/ .", + "title": "Scheme of the URI (location) of the name identifier", "format": "uri" }, "nameIdentifier": { "type": "string", - "title": "URI (location) of name identifier" + "description": "URI location based on the URI scheme of the name identifier e.g. https://orcid.org/nameIdentifier .", + "title": "URI (location) of name identifier. " }, "nameIdentifierScheme": { "type": "string", - "title": "Name of name identifier scheme" + "description": "Identifier scheme name e.g. ORCID, ROR or ISNI .", + "title": "Name of identifier scheme." } } }, @@ -129,7 +135,7 @@ "subjects": { "type": "array", "title": "Subjects", - "description": "Subject, keyword, classification code, or key phrase describing the resource specified by OECD Fields of Science and Technology (FOS)", + "description": "Subject, keyword, classification code, or key phrase describing the resources specified by OECD Fields of Science and Technology (FOS)", "items": { "type": "object", "title": "Subjects", @@ -221,7 +227,8 @@ }, "name": { "type": "string", - "title": "Full name (from Given Name and Family Name)." + "description": "This field will be constructed based from Given Name and Family Name.", + "title": "Full name." }, "nameType": { "type": "string", @@ -255,9 +262,10 @@ "affiliation": { "type": "array", "title": "Affiliations", + "description": "The organizational or institutional affiliation of the creator. Start writing the name of the organization or institution and a suggestion will be made from Research Organization Registry (ROR) Community API.", "items": { "type": "object", - "title": "Affiliation details", + "title": "Affiliation Details", "properties": { "name": { "type": "string", @@ -265,17 +273,18 @@ }, "schemeUri": { "type": "string", - "title": "URI (location) of the affiliation scheme", - "const": "https://ror.org" + "description": "Name identifier scheme. This will default to https://ror.org/ .", + "title": "URI (location) of the affiliation scheme" }, "affiliationIdentifier": { "type": "string", + "description": "URI location based on the URI scheme of the name identifier this will be pre-filled based on https://ror.org/name.", "title": "Location of affiliation identifier" }, "affiliationIdentifierScheme": { "type": "string", - "title": "Name of affiliation identifier scheme", - "const": "ROR" + "description": "Identifier scheme name. This will default to ROR.", + "title": "Name of affiliation identifier scheme" } } }, @@ -283,27 +292,31 @@ }, "nameIdentifiers": { "type": "array", - "title": "Contributor identifiers", - "description": "Uniquely identifies an individual or legal entity, according to various schemas, e.g. ORCID, ROR or ISNI. Use name identifier expressed as URL.", + "title": "Contributor Identifiers", + "description": "Uniquely identifies the contributor, according to various schemas, e.g. ORCID, ROR or ISNI. Use name identifier expressed as URL.", "items": { "type": "object", - "title": "Identifier details", + "title": "Contributor Name Identifier", "properties": { "schemeUri": { "type": "string", - "title": "URI (location) of the name identifier scheme", + "description": "Depending on the name identifier scheme e.g. https://isni.org/ or https://orcid.org/ or https://ror.org/ or https://www.grid.ac/ .", + "title": "Scheme of the URI (location) of the name identifier", "format": "uri" }, "nameIdentifier": { "type": "string", - "title": "Location of name identifier" + "description": "URI location based on the URI scheme of the name identifier e.g. https://orcid.org/nameIdentifier .", + "title": "URI (location) of name identifier. " }, "nameIdentifierScheme": { "type": "string", - "title": "Name of name identifier scheme" + "description": "Identifier scheme name e.g. ORCID, ROR or ISNI .", + "title": "Name of identifier scheme." } } - } + }, + "uniqueItems": true } }, "additionalProperties": false @@ -313,7 +326,7 @@ "dates": { "type": "array", "title": "Dates", - "description": "List of relevant dates to publication", + "description": "List of relevant dates to publication. The type of date can vary and it is recommended to add information if appropriate to distinguish between dates.", "items": { "type": "object", "title": "Date", @@ -325,12 +338,12 @@ "date": { "type": "string", "title": "Date", - "description": "A standard format for a date value" + "description": "A standard format for a date value." }, "dateType": { "type": "string", "title": "Date Type", - "description": "Relevance of the date", + "description": "Relevance of the date for the resources being submitted.", "enum": [ "Accepted", "Available", @@ -358,7 +371,7 @@ "descriptions": { "type": "array", "title": "Descriptions", - "description": "Additional information about the resource that does not fit in any of the other categories.", + "description": "Additional information about the resources that does not fit in any of the other categories, or general descriptions that are supplemental to the descriptions added for Study or Datasets.", "items": { "type": "object", "title": "description", @@ -390,18 +403,20 @@ }, "geoLocations": { "type": "array", - "title": "List of GeoLocations", + "title": "GeoLocations", + "description": "Spatial region or named place where the data was gathered or where the resources are focused.", "items": { "type": "object", - "title": "GeoLocation object", + "title": "GeoLocation", "properties": { "geoLocationPlace": { "type": "string", - "title": "Spatial region or named place where the data was gathered" + "title": "Geolocation Place" }, "geoLocationPoint": { "type": "object", - "title": "A point containing a single latitude-longitude pair", + "title": "Geolocation Point", + "description": "A point location in space. A point contains a single longitude-latitude pair.", "properties": { "pointLongitude": { "type": "string", @@ -416,23 +431,24 @@ }, "geoLocationBox": { "type": "object", - "title": "A box determined by two longitude and two latitude borders", + "title": "Geolocation Box", + "description": "The spatial limits of a box. A box is defined by two geographic points. Left low corner and right upper corner. Each point is defined by its longitude and latitude.", "properties": { "westBoundLongitude": { "type": "string", - "title": "Longitude coordinate of west bound" + "title": "Longitude coordinate of west bound." }, "eastBoundLongitude": { "type": "string", - "title": "Longitude coordinate of east bound" + "title": "Longitude coordinate of east bound." }, "southBoundLatitude": { "type": "string", - "title": "Latitude coordinate of south bound" + "title": "Latitude coordinate of south bound." }, "northBoundLatitude": { "type": "string", - "title": "Latitude coordinate of north bound" + "title": "Latitude coordinate of north bound." } } }, @@ -462,7 +478,7 @@ "language": { "type": "string", "title": "Language", - "description": "Primary language of the Study/Datasets submitted.", + "description": "Primary language of the submitted resources.", "enum": [ "Afar", "Abkhaz", @@ -641,7 +657,7 @@ "Vietnamese", "Volapük", "Walloon", - "Wolof", + "WolOf", "Xhosa", "Yiddish", "Yoruba", @@ -653,7 +669,7 @@ "relatedIdentifiers": { "type": "array", "title": "Related Identifiers", - "description": "Must be a globally unique identifier", + "description": "Information about a resource related to the one being registered, primarily used to provide series information or a text citation where the related resource does not have an identifier. It is also optional to provide such an identifier.", "items": { "type": "object", "title": "Identifier of related resource", @@ -666,7 +682,7 @@ "relatedIdentifier": { "type": "string", "title": "Identifier", - "description": "These must be globally unique identifiers and correspond to the type selected" + "description": "These must be globally unique identifiers and correspond to the type selected. e.g. 10.2022/example.78m9865 for DOI identifier Type" }, "relatedIdentifierType": { "type": "string", @@ -698,56 +714,59 @@ "title": "Relationship Type", "enum": [ "Cites", - "Is cited by", + "Is Cited By", "Compiles", - "Is compiled by", + "Is Compiled By", "Continues", - "Is continued by", + "Is Continued By", "Describes", - "Is described by", + "Is Described By", "Documents", - "Is documented by", - "Is derived from", - "Is source of", - "Has metadata", - "Is metadata for", - "Has part", - "Is part of", - "Is supplemented by", - "Is supplement to", + "Is Documented By", + "Is Derived From", + "Is Source Of", + "Has Metadata", + "Is Metadata For", + "Has Part", + "Is Part Of", + "Is Supplemented By", + "Is Supplement To", "Obsoletes", - "Is obsoleted by", + "Is Obsoleted By", "References", - "Is referenced by", + "Is Referenced By", "Requires", - "Is required by", + "Is Required By", "Reviews", - "Is reviewed by", - "Has version", - "Is version of", - "Is new version of", - "Is previous version of", - "Is published in", - "Is variant form of", - "Is original form of", - "Is identical to" + "Is Reviewed By", + "Has Version", + "Is Version Of", + "Is New version Of", + "Is Previous Version Of", + "Is Published In", + "Is Variant Form Of", + "Is Original Form Of", + "Is Identical To" ] }, "relatedMetadataScheme": { "type": "string", - "title": "Related Metadata Scheme" + "title": "Related Metadata Scheme", + "description": "To be Use only with this relation pair: (Has Metadata/Is Metadata For). Example DDI-L" }, "schemeUri": { "type": "string", - "title": "Related Metadata Scheme URI" + "title": "Related Metadata Scheme URI", + "description": "To be Use only with this relation pair: (Has Metadata/Is Metadata For). Example: http://www.ddialliance.org/Specification/DDI-Lifecycle/3.1/XMLSchema/instance.xsd" }, "schemeType": { "type": "string", - "title": "Related Metadata Scheme Type" + "title": "Related Metadata Scheme Type", + "description": "To be Use only with this relation pair: (Has Metadata/Is Metadata For). Examples: XSD, DDT, Turtle" }, "resourceTypeGeneral": { "type": "string", - "title": "Resource Type General", + "title": "Resource Type", "enum": [ "Audiovisual", "Book", @@ -787,10 +806,10 @@ "alternateIdentifiers": { "type": "array", "title": "Alternate Identifiers", - "description": "An identifier or identifiers other than the primary or related identifier applied to the resource being registered. EGA identifier obtained that as an alternative to the current resource.", + "description": "An identifier or identifiers other than the primary or related identifier applied to the resources being registered. EGA identifier(s) obtained, that can be used as an alternative to the current resources.", "items": { "type": "object", - "title": "An identifier or identifiers other than the primary Identifier of the resource.", + "title": "Identifier(s) other than the primary identifier of the resources", "required": [ "alternateIdentifier", "alternateIdentifierType" @@ -833,7 +852,7 @@ "sizes": { "type": "array", "title": "Sizes", - "description": "Size (e.g.,bytes, pages, inches, etc.) or duration (extent), e.g.,hours, minutes, days, etc., of a resource. Examples: '15 pages', '6 MB', '45 minutes'", + "description": "Size (e.g., bytes, pages, inches, etc.) or duration (extent), e.g., hours, minutes, days, etc., of a resources. Examples: '15 pages', '6 MB', '45 minutes'.", "items": { "type": "string", "title": "Size" @@ -851,9 +870,10 @@ "fundingReferences": { "type": "array", "title": "Funding References", + "description": "It is recommended to supply funding information when financial support has been received. The funder will be identified by their Research Organization Registry (ROR).", "items": { "type": "object", - "title": "Information about financial support for the resource", + "title": "Information about financial support for producing the resources", "required": [ "funderName", "funderIdentifier", @@ -884,12 +904,12 @@ "awardNumber": { "type": "string", "title": "Award Number", - "description": "The code assigned by the funder to a sponsored award" + "description": "The code assigned by the funder to a sponsored award." }, "awardTitle": { "type": "string", "title": "Award Title", - "description": "The human readable title of the award" + "description": "The human readable title of the award." }, "awardUri": { "type": "string", From 130e28519140a339600d3f4cca757ba56814b698 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 10 Jan 2022 13:26:45 +0200 Subject: [PATCH 165/336] refine links and attributes description --- .github/config/.wordlist.txt | 2 ++ .../helpers/schemas/ena_analysis.json | 18 +++++++------- metadata_backend/helpers/schemas/ena_dac.json | 14 ++++++----- .../helpers/schemas/ena_dataset.json | 16 +++++++------ .../helpers/schemas/ena_experiment.json | 17 ++++++------- .../helpers/schemas/ena_policy.json | 18 +++++++------- metadata_backend/helpers/schemas/ena_run.json | 18 +++++++------- .../helpers/schemas/ena_sample.json | 24 ++++++++++++------- .../helpers/schemas/ena_study.json | 18 +++++++------- 9 files changed, 83 insertions(+), 62 deletions(-) diff --git a/.github/config/.wordlist.txt b/.github/config/.wordlist.txt index bbe2f1b7c..f62c4b02d 100644 --- a/.github/config/.wordlist.txt +++ b/.github/config/.wordlist.txt @@ -171,10 +171,12 @@ familyname faroese fasta fastq +FBtr filename filetype flatfile flx +FLYBASE folderid followsreadindex formdata diff --git a/metadata_backend/helpers/schemas/ena_analysis.json b/metadata_backend/helpers/schemas/ena_analysis.json index 6adddfd89..61fd77841 100644 --- a/metadata_backend/helpers/schemas/ena_analysis.json +++ b/metadata_backend/helpers/schemas/ena_analysis.json @@ -27,17 +27,18 @@ "properties": { "xrefDb": { "type": "string", - "title": "Database" + "title": "Database", + "description": "Name of the database or a link where the resource is located at. INSDC controlled vocabulary of permitted cross references.Please see http://www.insdc.org/db_xref.html . For example, FLYBASE." }, "xrefId": { "type": "string", "title": "Database ID", - "description": "Accession in the referenced database." + "description": "Accession ID or stable identifier, in the referenced database. For example, FBtr0080008 (in FLYBASE)" }, "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the xrefLink by." } } }, @@ -53,7 +54,7 @@ "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the urlLink by." }, "url": { "type": "string", @@ -73,7 +74,7 @@ ], "properties": { "entrezDb": { - "description": "NCBI controlled vocabulary of permitted cross references", + "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references.", "title": "Database", "type": "string", "enum": [ @@ -129,7 +130,7 @@ "pattern": "^[a-zA-Z0-9]+" }, "label": { - "description": "Text label to display for the link.", + "description": "A text label to identify the url Entrez link by.", "title": "Label", "type": "string" } @@ -139,7 +140,6 @@ "$id": "#/definitions/analysisAttribute", "type": "object", "title": "Analysis Attribute", - "description": "Properties and attributes of the data set. These can be entered as free-form tag-value pairs. Submitters may be asked to follow a community established ontology when describing the work.", "required": [ "tag", "value" @@ -962,6 +962,7 @@ "analysisLinks": { "type": "array", "title": "Analysis Links", + "description": "Links to resources related to this experiment or experiment set (publication, datasets, online databases). Used to encode URL links, Entrez links, and xref DB links. ", "items": { "$ref": "#/definitions/Links" } @@ -969,9 +970,10 @@ "analysisAttributes": { "type": "array", "title": "Analysis Attributes", + "description": "Properties and attributes of the data set. These can be entered as free-form tag-value pairs. Submitters may be asked to follow a community established ontology when describing the work.", "items": { "$ref": "#/definitions/analysisAttribute" } } } -} +} \ No newline at end of file diff --git a/metadata_backend/helpers/schemas/ena_dac.json b/metadata_backend/helpers/schemas/ena_dac.json index d074ca729..3de5111cc 100644 --- a/metadata_backend/helpers/schemas/ena_dac.json +++ b/metadata_backend/helpers/schemas/ena_dac.json @@ -27,17 +27,18 @@ "properties": { "xrefDb": { "type": "string", - "title": "Database" + "title": "Database", + "description": "Name of the database or a link where the resource is located at. INSDC controlled vocabulary of permitted cross references.Please see http://www.insdc.org/db_xref.html . For example, FLYBASE." }, "xrefId": { "type": "string", "title": "Database ID", - "description": "Accession in the referenced database." + "description": "Accession ID or stable identifier, in the referenced database. For example, FBtr0080008 (in FLYBASE)" }, "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the xrefLink by." } } }, @@ -53,7 +54,7 @@ "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the urlLink by." }, "url": { "type": "string", @@ -73,7 +74,7 @@ ], "properties": { "entrezDb": { - "description": "NCBI controlled vocabulary of permitted cross references", + "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references.", "title": "Database", "type": "string", "enum": [ @@ -129,7 +130,7 @@ "pattern": "^[a-zA-Z0-9]+" }, "label": { - "description": "Text label to display for the link.", + "description": "A text label to identify the url Entrez link by.", "title": "Label", "type": "string" } @@ -213,6 +214,7 @@ "dacLinks": { "type": "array", "title": "DAC Links", + "description": "Links to resources related to this experiment or experiment set (publication, datasets, online databases). Used to encode URL links, Entrez links, and xref DB links. ", "items": { "$ref": "#/definitions/Links" } diff --git a/metadata_backend/helpers/schemas/ena_dataset.json b/metadata_backend/helpers/schemas/ena_dataset.json index 489e314a5..25dfaef01 100644 --- a/metadata_backend/helpers/schemas/ena_dataset.json +++ b/metadata_backend/helpers/schemas/ena_dataset.json @@ -27,17 +27,18 @@ "properties": { "xrefDb": { "type": "string", - "title": "Database" + "title": "Database", + "description": "Name of the database or a link where the resource is located at. INSDC controlled vocabulary of permitted cross references.Please see http://www.insdc.org/db_xref.html . For example, FLYBASE." }, "xrefId": { "type": "string", "title": "Database ID", - "description": "Accession in the referenced database." + "description": "Accession ID or stable identifier, in the referenced database. For example, FBtr0080008 (in FLYBASE)" }, "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the xrefLink by." } } }, @@ -53,7 +54,7 @@ "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the urlLink by." }, "url": { "type": "string", @@ -73,7 +74,7 @@ ], "properties": { "entrezDb": { - "description": "NCBI controlled vocabulary of permitted cross references", + "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references.", "title": "Database", "type": "string", "enum": [ @@ -129,7 +130,7 @@ "pattern": "^[a-zA-Z0-9]+" }, "label": { - "description": "Text label to display for the link.", + "description": "A text label to identify the url Entrez link by.", "title": "Label", "type": "string" } @@ -139,7 +140,6 @@ "$id": "#/definitions/datasetAttribute", "type": "object", "title": "Dataset Attribute", - "description": "Properties and attributes of the data set. These can be entered as free-form tag-value pairs. Submitters may be asked to follow a community established ontology when describing the work.", "required": [ "tag", "value" @@ -294,6 +294,7 @@ "datasetLinks": { "type": "array", "title": "Dataset Links", + "description": "Used to encode URL links, Entrez links, and xref DB links. These are links used to cross reference with other relevant resources.", "items": { "$ref": "#/definitions/Links" } @@ -301,6 +302,7 @@ "datasetAttributes": { "type": "array", "title": "Dataset Attributes", + "description": "Properties and attributes of the data set. These can be entered as free-form tag-value pairs. Submitters may be asked to follow a community established ontology when describing the work.", "items": { "$ref": "#/definitions/datasetAttribute" } diff --git a/metadata_backend/helpers/schemas/ena_experiment.json b/metadata_backend/helpers/schemas/ena_experiment.json index eeb7d11ad..ed148389c 100644 --- a/metadata_backend/helpers/schemas/ena_experiment.json +++ b/metadata_backend/helpers/schemas/ena_experiment.json @@ -27,17 +27,18 @@ "properties": { "xrefDb": { "type": "string", - "title": "Database" + "title": "Database", + "description": "Name of the database or a link where the resource is located at. INSDC controlled vocabulary of permitted cross references.Please see http://www.insdc.org/db_xref.html . For example, FLYBASE." }, "xrefId": { "type": "string", "title": "Database ID", - "description": "Accession in the referenced database." + "description": "Accession ID or stable identifier, in the referenced database. For example, FBtr0080008 (in FLYBASE)" }, "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the xrefLink by." } } }, @@ -53,7 +54,7 @@ "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the urlLink by." }, "url": { "type": "string", @@ -73,7 +74,7 @@ ], "properties": { "entrezDb": { - "description": "NCBI controlled vocabulary of permitted cross references", + "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references.", "title": "Database", "type": "string", "enum": [ @@ -129,7 +130,7 @@ "pattern": "^[a-zA-Z0-9]+" }, "label": { - "description": "Text label to display for the link.", + "description": "A text label to identify the url Entrez link by.", "title": "Label", "type": "string" } @@ -139,7 +140,6 @@ "$id": "#/definitions/experimentAttribute", "type": "object", "title": "Experiment Attribute", - "description": "Properties and attributes of the data set. These can be entered as free-form tag-value pairs. Submitters may be asked to follow a community established ontology when describing the work.", "required": [ "tag", "value" @@ -791,7 +791,7 @@ "experimentLinks": { "type": "array", "title": "Experiment Links", - "description": " Links to resources related to this experiment or experiment set (publication, datasets, online databases).", + "description": "Links to resources related to this experiment or experiment set (publication, datasets, online databases). Used to encode URL links, Entrez links, and xref DB links. ", "items": { "$ref": "#/definitions/Links" } @@ -799,6 +799,7 @@ "experimentAttributes": { "type": "array", "title": "Experiment Attributes", + "description": "Properties and attributes of the data set. These can be entered as free-form tag-value pairs. Submitters may be asked to follow a community established ontology when describing the work.", "items": { "$ref": "#/definitions/experimentAttribute" } diff --git a/metadata_backend/helpers/schemas/ena_policy.json b/metadata_backend/helpers/schemas/ena_policy.json index 34310b7c6..da3357a06 100644 --- a/metadata_backend/helpers/schemas/ena_policy.json +++ b/metadata_backend/helpers/schemas/ena_policy.json @@ -27,17 +27,18 @@ "properties": { "xrefDb": { "type": "string", - "title": "Database" + "title": "Database", + "description": "Name of the database or a link where the resource is located at. INSDC controlled vocabulary of permitted cross references.Please see http://www.insdc.org/db_xref.html . For example, FLYBASE." }, "xrefId": { "type": "string", "title": "Database ID", - "description": "Accession in the referenced database." + "description": "Accession ID or stable identifier, in the referenced database. For example, FBtr0080008 (in FLYBASE)" }, "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the xrefLink by." } } }, @@ -53,7 +54,7 @@ "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the urlLink by." }, "url": { "type": "string", @@ -73,7 +74,7 @@ ], "properties": { "entrezDb": { - "description": "NCBI controlled vocabulary of permitted cross references", + "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references.", "title": "Database", "type": "string", "enum": [ @@ -129,7 +130,7 @@ "pattern": "^[a-zA-Z0-9]+" }, "label": { - "description": "Text label to display for the link.", + "description": "A text label to identify the url Entrez link by.", "title": "Label", "type": "string" } @@ -187,7 +188,6 @@ "$id": "#/definitions/policyAttribute", "type": "object", "title": "Policy Attribute", - "description": "tag title and its associated value (description)", "required": [ "tag", "value" @@ -334,6 +334,7 @@ "policyLinks": { "type": "array", "title": "Policy Links", + "description": "Links to resources related to this experiment or experiment set (publication, datasets, online databases). Used to encode URL links, Entrez links, and xref DB links. ", "items": { "$ref": "#/definitions/Links" } @@ -349,9 +350,10 @@ "policyAttributes": { "type": "array", "title": "Policy Attributes", + "description": "Properties and attributes of the policy. These can be entered as free-form tag-value pairs. Submitters may be asked to follow a community established ontology when describing the work.", "items": { "$ref": "#/definitions/policyAttribute" } } } -} +} \ No newline at end of file diff --git a/metadata_backend/helpers/schemas/ena_run.json b/metadata_backend/helpers/schemas/ena_run.json index 2295ce226..77870da66 100644 --- a/metadata_backend/helpers/schemas/ena_run.json +++ b/metadata_backend/helpers/schemas/ena_run.json @@ -27,17 +27,18 @@ "properties": { "xrefDb": { "type": "string", - "title": "Database" + "title": "Database", + "description": "Name of the database or a link where the resource is located at. INSDC controlled vocabulary of permitted cross references.Please see http://www.insdc.org/db_xref.html . For example, FLYBASE." }, "xrefId": { "type": "string", "title": "Database ID", - "description": "Accession in the referenced database." + "description": "Accession ID or stable identifier, in the referenced database. For example, FBtr0080008 (in FLYBASE)" }, "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the xrefLink by." } } }, @@ -53,7 +54,7 @@ "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the urlLink by." }, "url": { "type": "string", @@ -73,7 +74,7 @@ ], "properties": { "entrezDb": { - "description": "NCBI controlled vocabulary of permitted cross references", + "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references.", "title": "Database", "type": "string", "enum": [ @@ -129,7 +130,7 @@ "pattern": "^[a-zA-Z0-9]+" }, "label": { - "description": "Text label to display for the link.", + "description": "A text label to identify the url Entrez link by.", "title": "Label", "type": "string" } @@ -139,7 +140,6 @@ "$id": "#/definitions/runAttribute", "type": "object", "title": "Run Attribute", - "description": "Properties and attributes of the data set. These can be entered as free-form tag-value pairs. Submitters may be asked to follow a community established ontology when describing the work.", "required": [ "tag", "value" @@ -711,6 +711,7 @@ "runLinks": { "type": "array", "title": "Run Links", + "description": "Links to resources related to this experiment or experiment set (publication, datasets, online databases). Used to encode URL links, Entrez links, and xref DB links. ", "items": { "$ref": "#/definitions/Links" } @@ -718,9 +719,10 @@ "runAttributes": { "type": "array", "title": "Run Attributes", + "description": "Properties and attributes of the data set. These can be entered as free-form tag-value pairs. Submitters may be asked to follow a community established ontology when describing the work.", "items": { "$ref": "#/definitions/runAttribute" } } } -} +} \ No newline at end of file diff --git a/metadata_backend/helpers/schemas/ena_sample.json b/metadata_backend/helpers/schemas/ena_sample.json index 2691b2ad1..8d171bf53 100644 --- a/metadata_backend/helpers/schemas/ena_sample.json +++ b/metadata_backend/helpers/schemas/ena_sample.json @@ -27,17 +27,18 @@ "properties": { "xrefDb": { "type": "string", - "title": "Database" + "title": "Database", + "description": "Name of the database or a link where the resource is located at. INSDC controlled vocabulary of permitted cross references.Please see http://www.insdc.org/db_xref.html . For example, FLYBASE." }, "xrefId": { "type": "string", "title": "Database ID", - "description": "Accession in the referenced database." + "description": "Accession ID or stable identifier, in the referenced database. For example, FBtr0080008 (in FLYBASE)" }, "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the xrefLink by." } } }, @@ -53,7 +54,7 @@ "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the urlLink by." }, "url": { "type": "string", @@ -73,7 +74,7 @@ ], "properties": { "entrezDb": { - "description": "NCBI controlled vocabulary of permitted cross references", + "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references.", "title": "Database", "type": "string", "enum": [ @@ -129,7 +130,7 @@ "pattern": "^[a-zA-Z0-9]+" }, "label": { - "description": "Text label to display for the link.", + "description": "A text label to identify the url Entrez link by.", "title": "Label", "type": "string" } @@ -139,7 +140,6 @@ "$id": "#/definitions/sampleAttribute", "type": "object", "title": "Sample Attribute", - "description": "tag title and its associated value (description)", "required": [ "tag", "value" @@ -210,7 +210,11 @@ "gender": { "type": "string", "title": "Gender", - "enum": ["male", "female", "unknown"] + "enum": [ + "male", + "female", + "unknown" + ] } }, "required": [ @@ -235,6 +239,7 @@ "sampleLinks": { "type": "array", "title": "Sample Links", + "description": "Links to resources related to this experiment or experiment set (publication, datasets, online databases). Used to encode URL links, Entrez links, and xref DB links. ", "items": { "$ref": "#/definitions/Links" } @@ -242,9 +247,10 @@ "sampleAttributes": { "type": "array", "title": "Sample Attributes", + "description": "Properties and attributes of a sample. These can be entered as free-form tag-value pairs. For certain studies, submitters may be asked to follow a community established ontology when describing the work.", "items": { "$ref": "#/definitions/sampleAttribute" } } } -} +} \ No newline at end of file diff --git a/metadata_backend/helpers/schemas/ena_study.json b/metadata_backend/helpers/schemas/ena_study.json index 1c5cfea70..89a3807f8 100644 --- a/metadata_backend/helpers/schemas/ena_study.json +++ b/metadata_backend/helpers/schemas/ena_study.json @@ -27,17 +27,18 @@ "properties": { "xrefDb": { "type": "string", - "title": "Database" + "title": "Database", + "description": "Name of the database or a link where the resource is located at. INSDC controlled vocabulary of permitted cross references.Please see http://www.insdc.org/db_xref.html . For example, FLYBASE." }, "xrefId": { "type": "string", "title": "Database ID", - "description": "Accession in the referenced database." + "description": "Accession ID or stable identifier, in the referenced database. For example, FBtr0080008 (in FLYBASE)" }, "label": { - "description": "Text label to display for the link.", + "type": "string", "title": "Label", - "type": "string" + "description": "A text label to identify the xrefLink by." } } }, @@ -53,7 +54,7 @@ "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the urlLink by." }, "url": { "type": "string", @@ -73,7 +74,7 @@ ], "properties": { "entrezDb": { - "description": "NCBI controlled vocabulary of permitted cross references", + "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references.", "title": "Database", "type": "string", "enum": [ @@ -129,7 +130,7 @@ "pattern": "^[a-zA-Z0-9]+" }, "label": { - "description": "Text label to display for the link.", + "description": "A text label to identify the url Entrez link by.", "title": "Label", "type": "string" } @@ -139,7 +140,6 @@ "$id": "#/definitions/studyAttribute", "type": "object", "title": "Study Attribute", - "description": "Tag title and its associated value", "required": [ "tag", "value" @@ -225,6 +225,7 @@ "studyLinks": { "type": "array", "title": "Study Links", + "description": "Links to resources related to this experiment or experiment set (publication, datasets, online databases). Used to encode URL links, Entrez links, and xref DB links. ", "items": { "$ref": "#/definitions/Links" } @@ -232,6 +233,7 @@ "studyAttributes": { "type": "array", "title": "Study Attributes", + "description": "Properties and attributes of the study. These can be entered as free-form tag-value pairs. For certain studies, submitters may be asked to follow a community established ontology when describing the work.", "items": { "$ref": "#/definitions/studyAttribute" } From 8762e189ba834b9b7aeb5c6eedbb8735961f4246 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 10 Jan 2022 13:27:00 +0200 Subject: [PATCH 166/336] DAC description changed to title to match schema --- metadata_backend/helpers/schemas/ena_dac.json | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/metadata_backend/helpers/schemas/ena_dac.json b/metadata_backend/helpers/schemas/ena_dac.json index 3de5111cc..ab5970710 100644 --- a/metadata_backend/helpers/schemas/ena_dac.json +++ b/metadata_backend/helpers/schemas/ena_dac.json @@ -194,7 +194,9 @@ "$ref": "#/definitions/contact" }, { - "required": ["mainContact"], + "required": [ + "mainContact" + ], "properties": { "mainContact": { "const": true @@ -206,10 +208,9 @@ "maxContains": 1 }, "title": { - "title": "DAC Description", + "title": "DAC Title", "description": "Short text that can be used to call out DAC records in searches or in displays.", - "type": "string", - "minLength": 10 + "type": "string" }, "dacLinks": { "type": "array", @@ -220,4 +221,4 @@ } } } -} +} \ No newline at end of file From afcdbbe7a0a2363213b166b6aa683904c8ccb695 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 10 Jan 2022 13:27:07 +0200 Subject: [PATCH 167/336] Dataset requires title --- metadata_backend/helpers/schemas/ena_dataset.json | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/metadata_backend/helpers/schemas/ena_dataset.json b/metadata_backend/helpers/schemas/ena_dataset.json index 25dfaef01..68edcd65a 100644 --- a/metadata_backend/helpers/schemas/ena_dataset.json +++ b/metadata_backend/helpers/schemas/ena_dataset.json @@ -235,6 +235,9 @@ }, "type": "object", "description": "Describes an object that contains data access policy information.", + "required": [ + "title" + ], "properties": { "title": { "title": "Dataset Title", @@ -281,7 +284,7 @@ "type": "array", "items": { "$ref": "#/definitions/reference" - } + } }, "analysisRef": { "title": "Analysis Reference", @@ -289,7 +292,7 @@ "type": "array", "items": { "$ref": "#/definitions/reference" - } + } }, "datasetLinks": { "type": "array", @@ -308,4 +311,4 @@ } } } -} +} \ No newline at end of file From 29b53f90e653aebb51c499e485ec147fddc6ca70 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 10 Jan 2022 13:37:44 +0200 Subject: [PATCH 168/336] enum sorted alphabetically --- .../helpers/schemas/datacite.json | 250 +++++++++--------- .../helpers/schemas/ena_analysis.json | 92 +++---- metadata_backend/helpers/schemas/ena_dac.json | 34 +-- .../helpers/schemas/ena_dataset.json | 50 ++-- .../helpers/schemas/ena_experiment.json | 142 +++++----- .../helpers/schemas/ena_policy.json | 34 +-- metadata_backend/helpers/schemas/ena_run.json | 142 +++++----- .../helpers/schemas/ena_sample.json | 34 +-- .../helpers/schemas/ena_study.json | 52 ++-- metadata_backend/helpers/schemas/folders.json | 250 +++++++++--------- 10 files changed, 540 insertions(+), 540 deletions(-) diff --git a/metadata_backend/helpers/schemas/datacite.json b/metadata_backend/helpers/schemas/datacite.json index 5cb708ed7..7b9b4dc58 100644 --- a/metadata_backend/helpers/schemas/datacite.json +++ b/metadata_backend/helpers/schemas/datacite.json @@ -106,54 +106,54 @@ "type": "string", "title": "FOS identifier", "enum": [ - "FOS: Natural sciences", - "FOS: Mathematics", - "FOS: Computer and information sciences", - "FOS: Physical sciences", - "FOS: Chemical sciences", - "FOS: Earth and related environmental sciences", + "FOS: Agricultural biotechnology", + "FOS: Agricultural sciences", + "FOS: Agriculture, forestry, and fisheries", + "FOS: Animal and dairy science", + "FOS: Arts (arts, history of arts, performing arts, music)", + "FOS: Basic medicine", "FOS: Biological sciences", - "FOS: Other natural sciences", - "FOS: Engineering and technology", + "FOS: Chemical engineering", + "FOS: Chemical sciences", "FOS: Civil engineering", + "FOS: Clinical medicine", + "FOS: Computer and information sciences", + "FOS: Earth and related environmental sciences", + "FOS: Economics and business", + "FOS: Educational sciences", "FOS: Electrical engineering, electronic engineering, information engineering", - "FOS: Mechanical engineering", - "FOS: Chemical engineering", - "FOS: Materials engineering", - "FOS: Medical engineering", - "FOS: Environmental engineering", + "FOS: Engineering and technology", "FOS: Environmental biotechnology", + "FOS: Environmental engineering", + "FOS: Health sciences", + "FOS: History and archaeology", + "FOS: Humanities", "FOS: Industrial biotechnology", - "FOS: Nano-technology", - "FOS: Other engineering and technologies", + "FOS: Languages and literature", + "FOS: Law", + "FOS: Materials engineering", + "FOS: Mathematics", + "FOS: Mechanical engineering", + "FOS: Media and communications", "FOS: Medical and health sciences", - "FOS: Basic medicine", - "FOS: Clinical medicine", - "FOS: Health sciences", "FOS: Medical biotechnology", - "FOS: Other medical sciences", - "FOS: Agricultural sciences", - "FOS: Agriculture, forestry, and fisheries", - "FOS: Animal and dairy science", - "FOS: Veterinary science", - "FOS: Agricultural biotechnology", + "FOS: Medical engineering", + "FOS: Nano-technology", + "FOS: Natural sciences", "FOS: Other agricultural sciences", - "FOS: Social sciences", - "FOS: Psychology", - "FOS: Economics and business", - "FOS: Educational sciences", - "FOS: Sociology", - "FOS: Law", - "FOS: Political science", - "FOS: Social and economic geography", - "FOS: Media and communications", + "FOS: Other engineering and technologies", + "FOS: Other humanities", + "FOS: Other medical sciences", + "FOS: Other natural sciences", "FOS: Other social sciences", - "FOS: Humanities", - "FOS: History and archaeology", - "FOS: Languages and literature", "FOS: Philosophy, ethics and religion", - "FOS: Arts (arts, history of arts, performing arts, music)", - "FOS: Other humanities" + "FOS: Physical sciences", + "FOS: Political science", + "FOS: Psychology", + "FOS: Social and economic geography", + "FOS: Social sciences", + "FOS: Sociology", + "FOS: Veterinary science" ] } }, @@ -200,8 +200,8 @@ "Project Manager", "Project Member", "Related Person", - "Researcher", "Research Group", + "Researcher", "Rights Holder", "Sponsor", "Supervisor", @@ -297,8 +297,8 @@ "enum": [ "Accepted", "Available", - "Copyrighted", "Collected", + "Copyrighted", "Created", "Issued", "Submitted", @@ -412,189 +412,189 @@ "title": "Language", "description": "Primary language of the submitted resources.", "enum": [ - "Afar", "Abkhaz", - "Avestan", + "Afar", "Afrikaans", "Akan", + "Albanian", "Amharic", - "Aragonese", "Arabic", + "Aragonese", + "Armenian", "Assamese", "Avaric", + "Avestan", "Aymara", "Azerbaijani", + "Bambara", "Bashkir", + "Basque", "Belarusian", - "Bulgarian", + "Bengali", "Bihari", "Bislama", - "Bambara", - "Bengali", - "Tibetan", - "Breton", "Bosnian", + "Breton", + "Bulgarian", + "Burmese", "Catalan", - "Chechen", "Chamorro", + "Chechen", + "Chichewa", + "Chinese", + "Chuvash", + "Cornish", "Corsican", "Cree", + "Croatian", "Czech", - "Old Church Slavonic", - "Chuvash", - "Welsh", "Danish", - "German", "Divehi", + "Dutch", "Dzongkha", - "Ewe", - "Greek", "English", "Esperanto", - "Spanish", "Estonian", - "Basque", - "Persian", - "Fula", - "Finnish", - "Fijian", + "Ewe", "Faroese", + "Fijian", + "Finnish", "French", - "Western Frisian", - "Irish", - "Scottish Gaelic", + "Fula", "Galician", + "Ganda", + "Georgian", + "German", + "Greek", "Guaraní", "Gujarati", - "Manx", + "Haitian", "Hausa", "Hebrew", + "Herero", "Hindi", "Hiri Motu", - "Croatian", - "Haitian", "Hungarian", - "Armenian", - "Herero", - "Interlingua", + "Icelandic", + "Ido", + "Igbo", "Indonesian", + "Interlingua", "Interlingue", - "Igbo", - "Nuosu", + "Inuktitut", "Inupiaq", - "Ido", - "Icelandic", + "Irish", "Italian", - "Inuktitut", "Japanese", "Javanese", - "Georgian", - "Kongo", - "Kikuyu", - "Kwanyama", - "Kazakh", "Kalaallisut", - "Khmer", "Kannada", - "Korean", "Kanuri", "Kashmiri", - "Kurdish", + "Kazakh", + "Khmer", + "Kikuyu", + "Kinyarwanda", + "Kirundi", "Komi", - "Cornish", + "Kongo", + "Korean", + "Kurdish", + "Kwanyama", "Kyrgyz", + "Lao", "Latin", - "Luxembourgish", - "Ganda", + "Latvian", "Limburgish", "Lingala", - "Lao", "Lithuanian", "Luba-Katanga", - "Latvian", - "Malagasy", - "Marshallese", - "Māori", + "Luxembourgish", "Macedonian", - "Malayalam", - "Mongolian", - "Marathi", + "Malagasy", "Malay", + "Malayalam", "Maltese", - "Burmese", + "Manx", + "Māori", + "Marathi", + "Marshallese", + "Mongolian", "Nauru", - "Norwegian Bokmål", - "Northern Ndebele", - "Nepali", + "Navajo", "Ndonga", - "Dutch", + "Nepali", + "Northern Ndebele", + "Northern Sami", + "Norwegian Bokmål", "Norwegian Nynorsk", "Norwegian", - "Southern Ndebele", - "Navajo", - "Chichewa", + "Nuosu", "Occitan", "Ojibwe", - "Oromo", + "Old Church Slavonic", "Oriya", + "Oromo", "Ossetian", - "Panjabi", "Pāli", - "Polish", + "Panjabi", "Pashto", + "Persian", + "Polish", "Portuguese", "Quechua", - "Romansh", - "Kirundi", "Romanian", + "Romansh", "Russian", - "Kinyarwanda", + "Samoan", + "Sango", "Sanskrit", "Sardinian", + "Scottish Gaelic", + "Serbian", + "Shona", "Sindhi", - "Northern Sami", - "Sango", "Sinhala", "Slovak", "Slovenian", - "Samoan", - "Shona", "Somali", - "Albanian", - "Serbian", - "Swati", + "Southern Ndebele", "Southern Sotho", + "Spanish", "Sundanese", - "Swedish", "Swahili", + "Swati", + "Swedish", + "Tagalog", + "Tahitian", + "Tajik", "Tamil", + "Tatar", "Telugu", - "Tajik", "Thai", + "Tibetan", "Tigrinya", - "Turkmen", - "Tagalog", - "Tswana", "Tonga", - "Turkish", "Tsonga", - "Tatar", + "Tswana", + "Turkish", + "Turkmen", "Twi", - "Tahitian", - "Uyghur", "Ukrainian", "Urdu", + "Uyghur", "Uzbek", "Venda", "Vietnamese", "Volapük", "Walloon", + "Welsh", + "Western Frisian", "WolOf", "Xhosa", "Yiddish", "Yoruba", "Zhuang", - "Chinese", "Zulu" ] }, diff --git a/metadata_backend/helpers/schemas/ena_analysis.json b/metadata_backend/helpers/schemas/ena_analysis.json index 61fd77841..c232bf055 100644 --- a/metadata_backend/helpers/schemas/ena_analysis.json +++ b/metadata_backend/helpers/schemas/ena_analysis.json @@ -78,49 +78,49 @@ "title": "Database", "type": "string", "enum": [ - "pubmed", - "protein", - "nuccore", - "ipg", - "nucleotide", - "structure", - "genome", "annotinfo", "assembly", + "biocollections", "bioproject", "biosample", + "biosystems", "blastdbinfo", "books", "cdd", "clinvar", + "dbvar", "gap", "gapplus", - "grasp", - "dbvar", - "gene", "gds", + "gene", + "genome", "geoprofiles", + "grasp", + "gtr", "homologene", + "ipg", "medgen", "mesh", "ncbisearch", "nlmcatalog", + "nuccore", + "nucleotide", "omim", "orgtrack", + "pcassay", + "pccompound", + "pcsubstance", "pmc", "popset", + "protein", "proteinclusters", - "pcassay", "protfam", - "biosystems", - "pccompound", - "pcsubstance", + "pubmed", "seqannot", "snp", "sra", - "taxonomy", - "biocollections", - "gtr" + "structure", + "taxonomy" ] }, "entrezId": { @@ -325,34 +325,34 @@ "type": "string", "title": "Filetype", "enum": [ - "tab", - "bam", + "agp", "bai", - "cram", - "crai", - "vcf", - "vcf_aggregate", + "bam", + "bcf_aggregate", "bcf", - "tabix", - "wig", "bed", - "gff", + "BioNano_native", + "chromosome_list", + "crai", + "cram", + "csi", "fasta", "fastq", "flatfile", - "chromosome_list", - "sample_list", - "readme_file", - "phenotype_file", - "BioNano_native", - "Kallisto_native", - "agp", - "unlocalised_list", + "gff", "info", + "Kallisto_native", "manifest", "other", - "csi", - "bcf_aggregate" + "phenotype_file", + "readme_file", + "sample_list", + "tab", + "tabix", + "unlocalised_list", + "vcf_aggregate", + "vcf", + "wig" ] }, "checksumMethod": { @@ -437,13 +437,13 @@ "type": "string", "title": "Type of Sequence Assembly", "enum": [ - "clone or isolate", - "primary metagenome", "binned metagenome", - "Metagenome-Assembled Genome (MAG)", - "Environmental Single-Cell Amplified Genome (SAG)", + "clinical isolate assembly", + "clone or isolate", "COVID-19 outbreak", - "clinical isolate assembly" + "Environmental Single-Cell Amplified Genome (SAG)", + "Metagenome-Assembled Genome (MAG)", + "primary metagenome" ] }, "tpa": { @@ -482,14 +482,14 @@ "type": "string", "title": "Experiment Type", "enum": [ - "Whole genome sequencing", - "Whole transcriptome sequencing", + "Curation", "Exome sequencing", "Genotyping by array", - "transcriptomics", - "Curation", "Genotyping by sequencing", - "Target sequencing" + "Target sequencing", + "transcriptomics", + "Whole genome sequencing", + "Whole transcriptome sequencing" ] }, "imputation": { diff --git a/metadata_backend/helpers/schemas/ena_dac.json b/metadata_backend/helpers/schemas/ena_dac.json index ab5970710..10a64df05 100644 --- a/metadata_backend/helpers/schemas/ena_dac.json +++ b/metadata_backend/helpers/schemas/ena_dac.json @@ -78,49 +78,49 @@ "title": "Database", "type": "string", "enum": [ - "pubmed", - "protein", - "nuccore", - "ipg", - "nucleotide", - "structure", - "genome", "annotinfo", "assembly", + "biocollections", "bioproject", "biosample", + "biosystems", "blastdbinfo", "books", "cdd", "clinvar", + "dbvar", "gap", "gapplus", - "grasp", - "dbvar", - "gene", "gds", + "gene", + "genome", "geoprofiles", + "grasp", + "gtr", "homologene", + "ipg", "medgen", "mesh", "ncbisearch", "nlmcatalog", + "nuccore", + "nucleotide", "omim", "orgtrack", + "pcassay", + "pccompound", + "pcsubstance", "pmc", "popset", + "protein", "proteinclusters", - "pcassay", "protfam", - "biosystems", - "pccompound", - "pcsubstance", + "pubmed", "seqannot", "snp", "sra", - "taxonomy", - "biocollections", - "gtr" + "structure", + "taxonomy" ] }, "entrezId": { diff --git a/metadata_backend/helpers/schemas/ena_dataset.json b/metadata_backend/helpers/schemas/ena_dataset.json index 68edcd65a..967069d8e 100644 --- a/metadata_backend/helpers/schemas/ena_dataset.json +++ b/metadata_backend/helpers/schemas/ena_dataset.json @@ -78,49 +78,49 @@ "title": "Database", "type": "string", "enum": [ - "pubmed", - "protein", - "nuccore", - "ipg", - "nucleotide", - "structure", - "genome", "annotinfo", "assembly", + "biocollections", "bioproject", "biosample", + "biosystems", "blastdbinfo", "books", "cdd", "clinvar", + "dbvar", "gap", "gapplus", - "grasp", - "dbvar", - "gene", "gds", + "gene", + "genome", "geoprofiles", + "grasp", + "gtr", "homologene", + "ipg", "medgen", "mesh", "ncbisearch", "nlmcatalog", + "nuccore", + "nucleotide", "omim", "orgtrack", + "pcassay", + "pccompound", + "pcsubstance", "pmc", "popset", + "protein", "proteinclusters", - "pcassay", "protfam", - "biosystems", - "pccompound", - "pcsubstance", + "pubmed", "seqannot", "snp", "sra", - "taxonomy", - "biocollections", - "gtr" + "structure", + "taxonomy" ] }, "entrezId": { @@ -250,20 +250,20 @@ "items": { "type": "string", "enum": [ - "Whole genome sequencing", + "Amplicon sequencing", + "Chip-Seq", + "Chromatin accessibility profiling by high-throughput sequencing", "Exome sequencing", + "Genomic variant calling", "Genotyping by array", - "Transcriptome profiling by high-throughput sequencing", - "Transcriptome profiling by array", - "Amplicon sequencing", + "Histone modification profiling by high-throughput sequencing", "Methylation binding domain sequencing", "Methylation profiling by high-throughput sequencing", "Phenotype information", "Study summary information", - "Genomic variant calling", - "Chromatin accessibility profiling by high-throughput sequencing", - "Histone modification profiling by high-throughput sequencing", - "Chip-Seq" + "Transcriptome profiling by array", + "Transcriptome profiling by high-throughput sequencing", + "Whole genome sequencing" ] }, "uniqueItems": true diff --git a/metadata_backend/helpers/schemas/ena_experiment.json b/metadata_backend/helpers/schemas/ena_experiment.json index ed148389c..761364c16 100644 --- a/metadata_backend/helpers/schemas/ena_experiment.json +++ b/metadata_backend/helpers/schemas/ena_experiment.json @@ -78,49 +78,49 @@ "title": "Database", "type": "string", "enum": [ - "pubmed", - "protein", - "nuccore", - "ipg", - "nucleotide", - "structure", - "genome", "annotinfo", "assembly", + "biocollections", "bioproject", "biosample", + "biosystems", "blastdbinfo", "books", "cdd", "clinvar", + "dbvar", "gap", "gapplus", - "grasp", - "dbvar", - "gene", "gds", + "gene", + "genome", "geoprofiles", + "grasp", + "gtr", "homologene", + "ipg", "medgen", "mesh", "ncbisearch", "nlmcatalog", + "nuccore", + "nucleotide", "omim", "orgtrack", + "pcassay", + "pccompound", + "pcsubstance", "pmc", "popset", + "protein", "proteinclusters", - "pcassay", "protfam", - "biosystems", - "pccompound", - "pcsubstance", + "pubmed", "seqannot", "snp", "sra", - "taxonomy", - "biocollections", - "gtr" + "structure", + "taxonomy" ] }, "entrezId": { @@ -252,11 +252,11 @@ "AMPLICON", "ATAC-seq", "Bisulfite-Seq", + "ChIA-PET", + "ChIP-Seq", "CLONE", "CLONEEND", "CTS", - "ChIA-PET", - "ChIP-Seq", "DNase-Hypersensitivity", "EST", "FAIRE-seq", @@ -264,15 +264,18 @@ "FL-cDNA", "Hi-C", "MBD-Seq", + "MeDIP-Seq", + "miRNA-Seq", "MNase-Seq", "MRE-Seq", - "MeDIP-Seq", + "ncRNA-Seq", "OTHER", "POOLCLONE", "RAD-Seq", "RIP-Seq", "RNA-Seq", "SELEX", + "ssRNA-seq", "Synthetic-Long-Read", "Targeted-Capture", "Tethered Chromatin Conformation Capture", @@ -281,24 +284,21 @@ "WCS", "WGA", "WGS", - "WXS", - "miRNA-Seq", - "ncRNA-Seq", - "ssRNA-seq" + "WXS" ] }, "librarySource": { "title": "Library Source", "type": "string", "enum": [ - "GENOMIC", "GENOMIC SINGLE CELL", + "GENOMIC", "METAGENOMIC", "METATRANSCRIPTOMIC", "OTHER", "SYNTHETIC", - "TRANSCRIPTOMIC", "TRANSCRIPTOMIC SINGLE CELL", + "TRANSCRIPTOMIC", "VIRAL RNA" ] }, @@ -308,34 +308,34 @@ "enum": [ "5-methylcytidine antibody", "CAGE", - "ChIP", + "cDNA_oligo_dT", + "cDNA_randomPriming", + "cDNA", "ChIP-Seq", + "ChIP", "DNase", "HMPR", "Hybrid Selection", - "Inverse rRNA", "Inverse rRNA selection", + "Inverse rRNA", "MBD2 protein methyl-CpG binding domain", "MDA", "MF", "MNase", "MSLL", "Oligo-dT", + "padlock probes capture method", "PCR", "PolyA", "RACE", - "RANDOM", "RANDOM PCR", - "RT-PCR", + "RANDOM", "Reduced Representation", - "Restriction Digest", - "cDNA", - "cDNA_oligo_dT", - "cDNA_randomPriming", - "other", - "padlock probes capture method", "repeat fractionation", + "Restriction Digest", + "RT-PCR", "size fractionation", + "other", "unspecified" ] }, @@ -375,11 +375,11 @@ "16S rRNA", "18S rRNA", "28S rRNA", - "RBCL", - "matK", "COX1", - "ITS1-5.8S-ITS2", "exome", + "ITS1-5.8S-ITS2", + "matK", + "RBCL", "other" ] }, @@ -440,12 +440,12 @@ "title": "Read Type", "type": "string", "enum": [ - "Forward", - "Reverse", "Adapter", - "Primer", - "Linker", "Barcode", + "Forward", + "Linker", + "Primer", + "Reverse", "Other" ] }, @@ -708,6 +708,19 @@ "description": " The PLATFORM record selects which sequencing platform and platform-specific runtime parameters. This will be determined by the Center.", "type": "string", "enum": [ + "454 GS 20", + "454 GS FLX Titanium", + "454 GS FLX", + "454 GS FLX+", + "454 GS Junior", + "454 GS", + "AB 310 Genetic Analyzer", + "AB 3130 Genetic Analyzer", + "AB 3130xL Genetic Analyzer", + "AB 3500 Genetic Analyzer", + "AB 3500xL Genetic Analyzer", + "AB 3730 Genetic Analyzer", + "AB 3730xL Genetic Analyzer", "AB 5500 Genetic Analyzer", "AB 5500xl Genetic Analyzer", "AB 5500xl-W Genetic Analysis System", @@ -715,26 +728,19 @@ "AB SOLiD 4 System", "AB SOLiD 4hq System", "AB SOLiD PI System", - "AB SOLiD System", "AB SOLiD System 2.0", "AB SOLiD System 3.0", - "AB 310 Genetic Analyzer", - "AB 3130 Genetic Analyzer", - "AB 3130xL Genetic Analyzer", - "AB 3500 Genetic Analyzer", - "AB 3500xL Genetic Analyzer", - "AB 3730 Genetic Analyzer", - "AB 3730xL Genetic Analyzer", + "AB SOLiD System", "BGISEQ-50", "BGISEQ-500", - "MGISEQ-2000RS", "Complete Genomics", + "GridION", "Helicos HeliScope", "HiSeq X Five", "HiSeq X Ten", - "Illumina Genome Analyzer", "Illumina Genome Analyzer II", "Illumina Genome Analyzer IIx", + "Illumina Genome Analyzer", "Illumina HiScanSQ", "Illumina HiSeq 1000", "Illumina HiSeq 1500", @@ -743,32 +749,26 @@ "Illumina HiSeq 3000", "Illumina HiSeq 4000", "Illumina HiSeq X", - "Illumina MiSeq", "Illumina MiniSeq", + "Illumina MiSeq", "Illumina NovaSeq 6000", - "NextSeq 500", - "NextSeq 550", + "Ion GeneStudio S5 Plus", + "Ion GeneStudio S5 Prime", + "Ion GeneStudio S5", + "Ion Torrent Genexus", "Ion Torrent PGM", "Ion Torrent Proton", - "Ion Torrent S5", "Ion Torrent S5 XL", - "Ion Torrent Genexus", - "Ion GeneStudio S5", - "Ion GeneStudio S5 Prime", - "Ion GeneStudio S5 Plus", - "454 GS", - "454 GS 20", - "454 GS FLX", - "454 GS FLX Titanium", - "454 GS FLX+", - "454 GS Junior", - "GridION", + "Ion Torrent S5", + "MGISEQ-2000RS", "MinION", - "PromethION", - "PacBio RS", + "NextSeq 500", + "NextSeq 550", "PacBio RS II", - "Sequel", + "PacBio RS", + "PromethION", "Sequel II", + "Sequel", "unspecified" ] }, diff --git a/metadata_backend/helpers/schemas/ena_policy.json b/metadata_backend/helpers/schemas/ena_policy.json index da3357a06..c7eb4b7ab 100644 --- a/metadata_backend/helpers/schemas/ena_policy.json +++ b/metadata_backend/helpers/schemas/ena_policy.json @@ -78,49 +78,49 @@ "title": "Database", "type": "string", "enum": [ - "pubmed", - "protein", - "nuccore", - "ipg", - "nucleotide", - "structure", - "genome", "annotinfo", "assembly", + "biocollections", "bioproject", "biosample", + "biosystems", "blastdbinfo", "books", "cdd", "clinvar", + "dbvar", "gap", "gapplus", - "grasp", - "dbvar", - "gene", "gds", + "gene", + "genome", "geoprofiles", + "grasp", + "gtr", "homologene", + "ipg", "medgen", "mesh", "ncbisearch", "nlmcatalog", + "nuccore", + "nucleotide", "omim", "orgtrack", + "pcassay", + "pccompound", + "pcsubstance", "pmc", "popset", + "protein", "proteinclusters", - "pcassay", "protfam", - "biosystems", - "pccompound", - "pcsubstance", + "pubmed", "seqannot", "snp", "sra", - "taxonomy", - "biocollections", - "gtr" + "structure", + "taxonomy" ] }, "entrezId": { diff --git a/metadata_backend/helpers/schemas/ena_run.json b/metadata_backend/helpers/schemas/ena_run.json index 77870da66..5cdd8ba99 100644 --- a/metadata_backend/helpers/schemas/ena_run.json +++ b/metadata_backend/helpers/schemas/ena_run.json @@ -78,49 +78,49 @@ "title": "Database", "type": "string", "enum": [ - "pubmed", - "protein", - "nuccore", - "ipg", - "nucleotide", - "structure", - "genome", "annotinfo", "assembly", + "biocollections", "bioproject", "biosample", + "biosystems", "blastdbinfo", "books", "cdd", "clinvar", + "dbvar", "gap", "gapplus", - "grasp", - "dbvar", - "gene", "gds", + "gene", + "genome", "geoprofiles", + "grasp", + "gtr", "homologene", + "ipg", "medgen", "mesh", "ncbisearch", "nlmcatalog", + "nuccore", + "nucleotide", "omim", "orgtrack", + "pcassay", + "pccompound", + "pcsubstance", "pmc", "popset", + "protein", "proteinclusters", - "pcassay", "protfam", - "biosystems", - "pccompound", - "pcsubstance", + "pubmed", "seqannot", "snp", "sra", - "taxonomy", - "biocollections", - "gtr" + "structure", + "taxonomy" ] }, "entrezId": { @@ -330,34 +330,34 @@ "type": "string", "title": "filetype", "enum": [ - "tab", - "bam", + "agp", "bai", - "cram", - "crai", - "vcf", - "vcf_aggregate", + "bam", "bcf", - "tabix", - "wig", - "sra", - "sff", - "srf", "bed", - "gff", + "BioNano_native", + "chromosome_list", + "crai", + "cram", "fasta", "fastq", "flatfile", - "chromosome_list", - "sample_list", - "readme_file", - "phenotype_file", - "BioNano_native", - "Kallisto_native", - "agp", - "unlocalised_list", + "gff", "info", + "Kallisto_native", "manifest", + "phenotype_file", + "readme_file", + "sample_list", + "sff", + "sra", + "srf", + "tab", + "tabix", + "unlocalised_list", + "vcf_aggregate", + "vcf", + "wig", "other" ] }, @@ -377,12 +377,12 @@ "title": "Read Type", "type": "string", "enum": [ - "single", - "paired", "cell_barcode", - "umi_barcode", "feature_barcode", + "paired", "sample_barcode", + "single", + "umi_barcode", "Other" ] }, @@ -549,12 +549,12 @@ "title": "Read Type", "type": "string", "enum": [ - "single", - "paired", "cell_barcode", - "umi_barcode", "feature_barcode", + "paired", "sample_barcode", + "single", + "umi_barcode", "Other" ] }, @@ -621,6 +621,19 @@ "description": " The PLATFORM record selects which sequencing platform and platform-specific runtime parameters. This will be determined by the Center.", "type": "string", "enum": [ + "454 GS 20", + "454 GS FLX Titanium", + "454 GS FLX", + "454 GS FLX+", + "454 GS Junior", + "454 GS", + "AB 310 Genetic Analyzer", + "AB 3130 Genetic Analyzer", + "AB 3130xL Genetic Analyzer", + "AB 3500 Genetic Analyzer", + "AB 3500xL Genetic Analyzer", + "AB 3730 Genetic Analyzer", + "AB 3730xL Genetic Analyzer", "AB 5500 Genetic Analyzer", "AB 5500xl Genetic Analyzer", "AB 5500xl-W Genetic Analysis System", @@ -628,26 +641,19 @@ "AB SOLiD 4 System", "AB SOLiD 4hq System", "AB SOLiD PI System", - "AB SOLiD System", "AB SOLiD System 2.0", "AB SOLiD System 3.0", - "AB 310 Genetic Analyzer", - "AB 3130 Genetic Analyzer", - "AB 3130xL Genetic Analyzer", - "AB 3500 Genetic Analyzer", - "AB 3500xL Genetic Analyzer", - "AB 3730 Genetic Analyzer", - "AB 3730xL Genetic Analyzer", + "AB SOLiD System", "BGISEQ-50", "BGISEQ-500", - "MGISEQ-2000RS", "Complete Genomics", + "GridION", "Helicos HeliScope", "HiSeq X Five", "HiSeq X Ten", - "Illumina Genome Analyzer", "Illumina Genome Analyzer II", "Illumina Genome Analyzer IIx", + "Illumina Genome Analyzer", "Illumina HiScanSQ", "Illumina HiSeq 1000", "Illumina HiSeq 1500", @@ -656,32 +662,26 @@ "Illumina HiSeq 3000", "Illumina HiSeq 4000", "Illumina HiSeq X", - "Illumina MiSeq", "Illumina MiniSeq", + "Illumina MiSeq", "Illumina NovaSeq 6000", - "NextSeq 500", - "NextSeq 550", + "Ion GeneStudio S5 Plus", + "Ion GeneStudio S5 Prime", + "Ion GeneStudio S5", + "Ion Torrent Genexus", "Ion Torrent PGM", "Ion Torrent Proton", - "Ion Torrent S5", "Ion Torrent S5 XL", - "Ion Torrent Genexus", - "Ion GeneStudio S5", - "Ion GeneStudio S5 Prime", - "Ion GeneStudio S5 Plus", - "454 GS", - "454 GS 20", - "454 GS FLX", - "454 GS FLX Titanium", - "454 GS FLX+", - "454 GS Junior", - "GridION", + "Ion Torrent S5", + "MGISEQ-2000RS", "MinION", - "PromethION", - "PacBio RS", + "NextSeq 500", + "NextSeq 550", "PacBio RS II", - "Sequel", + "PacBio RS", + "PromethION", "Sequel II", + "Sequel", "unspecified" ] }, diff --git a/metadata_backend/helpers/schemas/ena_sample.json b/metadata_backend/helpers/schemas/ena_sample.json index 8d171bf53..cf158eb9f 100644 --- a/metadata_backend/helpers/schemas/ena_sample.json +++ b/metadata_backend/helpers/schemas/ena_sample.json @@ -78,49 +78,49 @@ "title": "Database", "type": "string", "enum": [ - "pubmed", - "protein", - "nuccore", - "ipg", - "nucleotide", - "structure", - "genome", "annotinfo", "assembly", + "biocollections", "bioproject", "biosample", + "biosystems", "blastdbinfo", "books", "cdd", "clinvar", + "dbvar", "gap", "gapplus", - "grasp", - "dbvar", - "gene", "gds", + "gene", + "genome", "geoprofiles", + "grasp", + "gtr", "homologene", + "ipg", "medgen", "mesh", "ncbisearch", "nlmcatalog", + "nuccore", + "nucleotide", "omim", "orgtrack", + "pcassay", + "pccompound", + "pcsubstance", "pmc", "popset", + "protein", "proteinclusters", - "pcassay", "protfam", - "biosystems", - "pccompound", - "pcsubstance", + "pubmed", "seqannot", "snp", "sra", - "taxonomy", - "biocollections", - "gtr" + "structure", + "taxonomy" ] }, "entrezId": { diff --git a/metadata_backend/helpers/schemas/ena_study.json b/metadata_backend/helpers/schemas/ena_study.json index 89a3807f8..91a1469da 100644 --- a/metadata_backend/helpers/schemas/ena_study.json +++ b/metadata_backend/helpers/schemas/ena_study.json @@ -78,49 +78,49 @@ "title": "Database", "type": "string", "enum": [ - "pubmed", - "protein", - "nuccore", - "ipg", - "nucleotide", - "structure", - "genome", "annotinfo", "assembly", + "biocollections", "bioproject", "biosample", + "biosystems", "blastdbinfo", "books", "cdd", "clinvar", + "dbvar", "gap", "gapplus", - "grasp", - "dbvar", - "gene", "gds", + "gene", + "genome", "geoprofiles", + "grasp", + "gtr", "homologene", + "ipg", "medgen", "mesh", "ncbisearch", "nlmcatalog", + "nuccore", + "nucleotide", "omim", "orgtrack", + "pcassay", + "pccompound", + "pcsubstance", "pmc", "popset", + "protein", "proteinclusters", - "pcassay", "protfam", - "biosystems", - "pccompound", - "pcsubstance", + "pubmed", "seqannot", "snp", "sra", - "taxonomy", - "biocollections", - "gtr" + "structure", + "taxonomy" ] }, "entrezId": { @@ -165,20 +165,20 @@ "description": "The Study type presents a controlled vocabulary for expressing the overall purpose of the study.", "type": "string", "enum": [ - "Whole Genome Sequencing", - "Metagenomics", - "Transcriptome Analysis", - "Resequencing", + "Cancer Genomics", "Epigenetics", - "Synthetic Genomics", + "Exome Sequencing", "Forensic or Paleo-genomics", "Gene Regulation Study", - "Cancer Genomics", + "Metagenomics", + "Pooled Clone Sequencing", "Population Genomics", + "Resequencing", "RNASeq", - "Exome Sequencing", - "Pooled Clone Sequencing", + "Synthetic Genomics", + "Transcriptome Analysis", "Transcriptome Sequencing", + "Whole Genome Sequencing", "Other" ] } @@ -251,4 +251,4 @@ } } } -} +} \ No newline at end of file diff --git a/metadata_backend/helpers/schemas/folders.json b/metadata_backend/helpers/schemas/folders.json index eadd9dbd5..4d364b456 100644 --- a/metadata_backend/helpers/schemas/folders.json +++ b/metadata_backend/helpers/schemas/folders.json @@ -147,54 +147,54 @@ "type": "string", "title": "FOS identifier", "enum": [ - "FOS: Natural sciences", - "FOS: Mathematics", - "FOS: Computer and information sciences", - "FOS: Physical sciences", - "FOS: Chemical sciences", - "FOS: Earth and related environmental sciences", + "FOS: Agricultural biotechnology", + "FOS: Agricultural sciences", + "FOS: Agriculture, forestry, and fisheries", + "FOS: Animal and dairy science", + "FOS: Arts (arts, history of arts, performing arts, music)", + "FOS: Basic medicine", "FOS: Biological sciences", - "FOS: Other natural sciences", - "FOS: Engineering and technology", + "FOS: Chemical engineering", + "FOS: Chemical sciences", "FOS: Civil engineering", + "FOS: Clinical medicine", + "FOS: Computer and information sciences", + "FOS: Earth and related environmental sciences", + "FOS: Economics and business", + "FOS: Educational sciences", "FOS: Electrical engineering, electronic engineering, information engineering", - "FOS: Mechanical engineering", - "FOS: Chemical engineering", - "FOS: Materials engineering", - "FOS: Medical engineering", - "FOS: Environmental engineering", + "FOS: Engineering and technology", "FOS: Environmental biotechnology", + "FOS: Environmental engineering", + "FOS: Health sciences", + "FOS: History and archaeology", + "FOS: Humanities", "FOS: Industrial biotechnology", - "FOS: Nano-technology", - "FOS: Other engineering and technologies", + "FOS: Languages and literature", + "FOS: Law", + "FOS: Materials engineering", + "FOS: Mathematics", + "FOS: Mechanical engineering", + "FOS: Media and communications", "FOS: Medical and health sciences", - "FOS: Basic medicine", - "FOS: Clinical medicine", - "FOS: Health sciences", "FOS: Medical biotechnology", - "FOS: Other medical sciences", - "FOS: Agricultural sciences", - "FOS: Agriculture, forestry, and fisheries", - "FOS: Animal and dairy science", - "FOS: Veterinary science", - "FOS: Agricultural biotechnology", + "FOS: Medical engineering", + "FOS: Nano-technology", + "FOS: Natural sciences", "FOS: Other agricultural sciences", - "FOS: Social sciences", - "FOS: Psychology", - "FOS: Economics and business", - "FOS: Educational sciences", - "FOS: Sociology", - "FOS: Law", - "FOS: Political science", - "FOS: Social and economic geography", - "FOS: Media and communications", + "FOS: Other engineering and technologies", + "FOS: Other humanities", + "FOS: Other medical sciences", + "FOS: Other natural sciences", "FOS: Other social sciences", - "FOS: Humanities", - "FOS: History and archaeology", - "FOS: Languages and literature", "FOS: Philosophy, ethics and religion", - "FOS: Arts (arts, history of arts, performing arts, music)", - "FOS: Other humanities" + "FOS: Physical sciences", + "FOS: Political science", + "FOS: Psychology", + "FOS: Social and economic geography", + "FOS: Social sciences", + "FOS: Sociology", + "FOS: Veterinary science" ] }, "subjectScheme": { @@ -250,8 +250,8 @@ "Project Manager", "Project Member", "Related Person", - "Researcher", "Research Group", + "Researcher", "Rights Holder", "Sponsor", "Supervisor", @@ -347,8 +347,8 @@ "enum": [ "Accepted", "Available", - "Copyrighted", "Collected", + "Copyrighted", "Created", "Issued", "Submitted", @@ -480,189 +480,189 @@ "title": "Language", "description": "Primary language of the submitted resources.", "enum": [ - "Afar", "Abkhaz", - "Avestan", + "Afar", "Afrikaans", "Akan", + "Albanian", "Amharic", - "Aragonese", "Arabic", + "Aragonese", + "Armenian", "Assamese", "Avaric", + "Avestan", "Aymara", "Azerbaijani", + "Bambara", "Bashkir", + "Basque", "Belarusian", - "Bulgarian", + "Bengali", "Bihari", "Bislama", - "Bambara", - "Bengali", - "Tibetan", - "Breton", "Bosnian", + "Breton", + "Bulgarian", + "Burmese", "Catalan", - "Chechen", "Chamorro", + "Chechen", + "Chichewa", + "Chinese", + "Chuvash", + "Cornish", "Corsican", "Cree", + "Croatian", "Czech", - "Old Church Slavonic", - "Chuvash", - "Welsh", "Danish", - "German", "Divehi", + "Dutch", "Dzongkha", - "Ewe", - "Greek", "English", "Esperanto", - "Spanish", "Estonian", - "Basque", - "Persian", - "Fula", - "Finnish", - "Fijian", + "Ewe", "Faroese", + "Fijian", + "Finnish", "French", - "Western Frisian", - "Irish", - "Scottish Gaelic", + "Fula", "Galician", + "Ganda", + "Georgian", + "German", + "Greek", "Guaraní", "Gujarati", - "Manx", + "Haitian", "Hausa", "Hebrew", + "Herero", "Hindi", "Hiri Motu", - "Croatian", - "Haitian", "Hungarian", - "Armenian", - "Herero", - "Interlingua", + "Icelandic", + "Ido", + "Igbo", "Indonesian", + "Interlingua", "Interlingue", - "Igbo", - "Nuosu", + "Inuktitut", "Inupiaq", - "Ido", - "Icelandic", + "Irish", "Italian", - "Inuktitut", "Japanese", "Javanese", - "Georgian", - "Kongo", - "Kikuyu", - "Kwanyama", - "Kazakh", "Kalaallisut", - "Khmer", "Kannada", - "Korean", "Kanuri", "Kashmiri", - "Kurdish", + "Kazakh", + "Khmer", + "Kikuyu", + "Kinyarwanda", + "Kirundi", "Komi", - "Cornish", + "Kongo", + "Korean", + "Kurdish", + "Kwanyama", "Kyrgyz", + "Lao", "Latin", - "Luxembourgish", - "Ganda", + "Latvian", "Limburgish", "Lingala", - "Lao", "Lithuanian", "Luba-Katanga", - "Latvian", - "Malagasy", - "Marshallese", - "Māori", + "Luxembourgish", "Macedonian", - "Malayalam", - "Mongolian", - "Marathi", + "Malagasy", "Malay", + "Malayalam", "Maltese", - "Burmese", + "Manx", + "Māori", + "Marathi", + "Marshallese", + "Mongolian", "Nauru", - "Norwegian Bokmål", - "Northern Ndebele", - "Nepali", + "Navajo", "Ndonga", - "Dutch", + "Nepali", + "Northern Ndebele", + "Northern Sami", + "Norwegian Bokmål", "Norwegian Nynorsk", "Norwegian", - "Southern Ndebele", - "Navajo", - "Chichewa", + "Nuosu", "Occitan", "Ojibwe", - "Oromo", + "Old Church Slavonic", "Oriya", + "Oromo", "Ossetian", - "Panjabi", "Pāli", - "Polish", + "Panjabi", "Pashto", + "Persian", + "Polish", "Portuguese", "Quechua", - "Romansh", - "Kirundi", "Romanian", + "Romansh", "Russian", - "Kinyarwanda", + "Samoan", + "Sango", "Sanskrit", "Sardinian", + "Scottish Gaelic", + "Serbian", + "Shona", "Sindhi", - "Northern Sami", - "Sango", "Sinhala", "Slovak", "Slovenian", - "Samoan", - "Shona", "Somali", - "Albanian", - "Serbian", - "Swati", + "Southern Ndebele", "Southern Sotho", + "Spanish", "Sundanese", - "Swedish", "Swahili", + "Swati", + "Swedish", + "Tagalog", + "Tahitian", + "Tajik", "Tamil", + "Tatar", "Telugu", - "Tajik", "Thai", + "Tibetan", "Tigrinya", - "Turkmen", - "Tagalog", - "Tswana", "Tonga", - "Turkish", "Tsonga", - "Tatar", + "Tswana", + "Turkish", + "Turkmen", "Twi", - "Tahitian", - "Uyghur", "Ukrainian", "Urdu", + "Uyghur", "Uzbek", "Venda", "Vietnamese", "Volapük", "Walloon", + "Welsh", + "Western Frisian", "WolOf", "Xhosa", "Yiddish", "Yoruba", "Zhuang", - "Chinese", "Zulu" ] }, From f497a8be42830c5e1cfe40457eca014eb548fee0 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 10 Jan 2022 17:13:41 +0200 Subject: [PATCH 169/336] fix processing value for run & experiment simplify processing so that it corresponds to correct interpretation of XML schema --- .github/config/.wordlist.txt | 4 ++ metadata_backend/helpers/parser.py | 16 ++++++++ .../helpers/schemas/ena_experiment.json | 35 ++++++++-------- metadata_backend/helpers/schemas/ena_run.json | 40 +++++++++++-------- tests/test_files/experiment/ERX000119.json | 5 +-- 5 files changed, 62 insertions(+), 38 deletions(-) diff --git a/.github/config/.wordlist.txt b/.github/config/.wordlist.txt index f62c4b02d..354447279 100644 --- a/.github/config/.wordlist.txt +++ b/.github/config/.wordlist.txt @@ -23,6 +23,7 @@ analysislinks analysisref analysistype annotinfo +anonymized antibiogram api apisauce @@ -147,6 +148,7 @@ dt dzongkha ean eastboundlongitude +EDirect ega eissn ena @@ -260,6 +262,7 @@ kwanyama kyrgyz lang leaveaspool +Lexically libraryconstructionprotocol librarydescriptor librarylayout @@ -469,6 +472,7 @@ secondaryid sectionname selex seqannot +sequenceable sequenceannotation sequenceassembly sequenceflatfile diff --git a/metadata_backend/helpers/parser.py b/metadata_backend/helpers/parser.py index 1228bf379..f4c88eaf2 100644 --- a/metadata_backend/helpers/parser.py +++ b/metadata_backend/helpers/parser.py @@ -155,6 +155,19 @@ def _flatten(self, data: Any) -> Union[Dict, List, str, None]: LOG.error(reason) raise web.HTTPBadRequest(reason=reason) + if "processing" in key: + if not bool(value): + continue + + if "pipeSection" in key: + children[key] = [value] + continue + + if "prevStepIndex" in key: + if not bool(value): + children[key] = None + continue + if key in links and len(value) == 1: grp = list() if isinstance(value[key[:-1]], dict): @@ -241,6 +254,9 @@ def element_decode( selected - analysisRef, sampleRef, runRef, experimentRef need to be an array - experimentRef in run is an array with maxitems 1 + - if processing is empty do not show it as it is not required + - processing pipeSection should be intepreted as an array + - processing pipeSection prevStepIndex can be None if not specified empty """ xsd_type = xsd_type or xsd_element.type diff --git a/metadata_backend/helpers/schemas/ena_experiment.json b/metadata_backend/helpers/schemas/ena_experiment.json index 761364c16..4bed3be89 100644 --- a/metadata_backend/helpers/schemas/ena_experiment.json +++ b/metadata_backend/helpers/schemas/ena_experiment.json @@ -520,9 +520,11 @@ "$id": "#/definitions/processingType", "type": "object", "title": "Processing Type", + "description": "Information about the processing type such as pipeline and sequencing directives.", "properties": { "pipeline": { "title": "Pipeline", + "description": "The Pipeline type identifies the sequence or tree of actions to process the sequencing data.", "type": "object", "required": [ "pipeSection" @@ -534,17 +536,21 @@ "items": { "type": "object", "required": [ - "prevStepIndex" + "stepIndex", + "prevStepIndex", + "program", + "version" ], "properties": { "stepIndex": { "type": "string", + "description": "Lexically ordered value that allows for the pipe section to be hierarchically ordered. The primitive data type is used to allow for pipe sections to be inserted later on.", "title": "Step Index" }, "prevStepIndex": { "oneOf": [ { - "title": "String value", + "title": "Float value", "type": "string" }, { @@ -552,22 +558,27 @@ "type": "null" } ], + "description": "stepIndex of the previous step in the workflow. Set Null if the first pipe section.", "title": "Prev Step Index" }, "program": { "type": "string", + "description": "Name of the program or process for primary analysis. This may include a test or condition that leads to branching in the workflow.", "title": "Program" }, "version": { "type": "string", + "description": " Version of the program or process for primary analysis. ", "title": "Version" }, "notes": { "type": "string", + "description": "Notes about the program or process for primary analysis. ", "title": "Notes" }, "sectionName": { "type": "string", + "description": "If there are multiple sections specify the name of the processing pipeline section.", "title": "Section Name" } } @@ -582,18 +593,18 @@ "properties": { "sampleDemuxDirective": { "type": "object", - "title": "Sample demux directive", "description": "Tells the Archive who will execute the sample demultiplexing operation.", + "title": "Sample Demux Directive", "properties": { "leaveAsPool": { "type": "string", "description": "There shall be no sample de-multiplexing at the level of assigning individual reads to sample pool members.", - "title": "Leave as pool" + "title": "Leave As Pool" }, "submitterDemultiplexed": { "type": "string", "description": "The submitter has assigned individual reads to sample pool members by providing individual files containing reads with the same member assignment.", - "title": "Submitter demultiplexed" + "title": "Submitter Demultiplexed" } } } @@ -774,19 +785,7 @@ }, "processing": { "title": "Processing", - "oneOf": [ - { - "title": "Single Processing", - "type": "string" - }, - { - "title": "Complex Processing", - "type": "array", - "items": { - "$ref": "#/definitions/processingType" - } - } - ] + "$ref": "#/definitions/processingType" }, "experimentLinks": { "type": "array", diff --git a/metadata_backend/helpers/schemas/ena_run.json b/metadata_backend/helpers/schemas/ena_run.json index 5cdd8ba99..08f7cc3d0 100644 --- a/metadata_backend/helpers/schemas/ena_run.json +++ b/metadata_backend/helpers/schemas/ena_run.json @@ -163,9 +163,11 @@ "$id": "#/definitions/processingType", "type": "object", "title": "Processing Type", + "description": "Information about the processing type such as pipeline and sequencing directives.", "properties": { "pipeline": { "title": "Pipeline", + "description": "The Pipeline type identifies the sequence or tree of actions to process the sequencing data.", "type": "object", "required": [ "pipeSection" @@ -177,17 +179,21 @@ "items": { "type": "object", "required": [ - "prevStepIndex" + "stepIndex", + "prevStepIndex", + "program", + "version" ], "properties": { "stepIndex": { "type": "string", + "description": "Lexically ordered value that allows for the pipe section to be hierarchically ordered. The primitive data type is used to allow for pipe sections to be inserted later on.", "title": "Step Index" }, "prevStepIndex": { "oneOf": [ { - "title": "String value", + "title": "Float value", "type": "string" }, { @@ -195,22 +201,27 @@ "type": "null" } ], + "description": "stepIndex of the previous step in the workflow. Set Null if the first pipe section.", "title": "Prev Step Index" }, "program": { "type": "string", + "description": "Name of the program or process for primary analysis. This may include a test or condition that leads to branching in the workflow.", "title": "Program" }, "version": { "type": "string", + "description": " Version of the program or process for primary analysis. ", "title": "Version" }, "notes": { "type": "string", + "description": "Notes about the program or process for primary analysis. ", "title": "Notes" }, "sectionName": { "type": "string", + "description": "If there are multiple sections specify the name of the processing pipeline section.", "title": "Section Name" } } @@ -225,12 +236,18 @@ "properties": { "sampleDemuxDirective": { "type": "object", + "description": "Tells the Archive who will execute the sample demultiplexing operation.", + "title": "Sample Demux Directive", "properties": { "leaveAsPool": { - "type": "string" + "type": "string", + "description": "There shall be no sample de-multiplexing at the level of assigning individual reads to sample pool members.", + "title": "Leave As Pool" }, "submitterDemultiplexed": { - "type": "string" + "type": "string", + "description": "The submitter has assigned individual reads to sample pool members by providing individual files containing reads with the same member assignment.", + "title": "Submitter Demultiplexed" } } } @@ -687,19 +704,8 @@ }, "processing": { "title": "Processing", - "oneOf": [ - { - "title": "Single processing", - "type": "string" - }, - { - "title": "Complex Processing", - "type": "array", - "items": { - "$ref": "#/definitions/processingType" - } - } - ] + "type": "object", + "$ref": "#/definitions/processingType" }, "files": { "type": "array", diff --git a/tests/test_files/experiment/ERX000119.json b/tests/test_files/experiment/ERX000119.json index ba4a1d80b..110ab4fa6 100644 --- a/tests/test_files/experiment/ERX000119.json +++ b/tests/test_files/experiment/ERX000119.json @@ -1,4 +1,4 @@ -{ +{ "title": "Experiment", "alias": "NA18504.3", "centerName": "MPIMG", @@ -54,7 +54,6 @@ } }, "platform": "AB SOLiD System", - "processing": "true", "experimentAttributes": [ { "tag": "center_name", @@ -66,4 +65,4 @@ "units": "MB" } ] -} +} \ No newline at end of file From 0168656f1443e1085a16a7639ef6ea8f429e9fad Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 10 Jan 2022 17:37:23 +0200 Subject: [PATCH 170/336] add default gender unknown if sample xml add default gender unknown if csv and the gender is known add default gender else add unknown --- metadata_backend/helpers/parser.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/metadata_backend/helpers/parser.py b/metadata_backend/helpers/parser.py index f4c88eaf2..e110360f6 100644 --- a/metadata_backend/helpers/parser.py +++ b/metadata_backend/helpers/parser.py @@ -257,6 +257,8 @@ def element_decode( - if processing is empty do not show it as it is not required - processing pipeSection should be intepreted as an array - processing pipeSection prevStepIndex can be None if not specified empty + - if sampleData does not exist (as it can only be added via forms) we will + add it with default gender unknown """ xsd_type = xsd_type or xsd_element.type @@ -271,6 +273,8 @@ def element_decode( tmp = self.dict((self._to_camel(key.lower()), value) for key, value in self.map_attributes(data.attributes)) if "accession" in tmp: tmp["accessionId"] = tmp.pop("accession") + if "sampleName" in tmp and "sampleData" not in tmp: + tmp["sampleData"] = {"gender": "unknown"} if children is not None: if isinstance(children, dict): for key, value in children.items(): @@ -386,6 +390,12 @@ def parse(self, schema_type: str, content: str) -> List: # Without TaxonID provided we assume the sample relates to # Homo Sapien which has default TaxonID of 9606 _tmp["sampleName"] = {"taxonId": 9606} + # if geneder exists we will format it accordingly + if not bool(_tmp["gender"]): + _tmp["sampleData"] = {"gender": "unknown"} + else: + _tmp["sampleData"] = {"gender": _tmp["gender"]} + _tmp.pop("gender") JSONValidator(_tmp, schema_type.lower()).validate _parsed.append(_tmp) From 53dc4564fed2df02c5e657bde37e53d50956c42d Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 10 Jan 2022 19:28:05 +0200 Subject: [PATCH 171/336] correct spellchecker with more info --- .github/config/.spellcheck.yml | 2 +- .github/config/.wordlist.txt | 51 ++++++++++++++++++++++++++++++ CONTRIBUTING.md | 6 ++-- README.md | 2 +- docs/validator.rst | 2 +- metadata_backend/conf/schemas.json | 8 ++--- scripts/pre-commit.sh | 4 +-- 7 files changed, 63 insertions(+), 12 deletions(-) diff --git a/.github/config/.spellcheck.yml b/.github/config/.spellcheck.yml index 2cbf92b62..b44ec0607 100644 --- a/.github/config/.spellcheck.yml +++ b/.github/config/.spellcheck.yml @@ -17,7 +17,7 @@ matrix: - open: '(?P`+)' close: '(?P=open)' # Ignore surrounded in <> as in RST it is link - - open: '<([A-Za-z0-9-_:.]+)|(https?://[^\s/$.?#].[^\s]+|[A-Za-z0-9-_:.]+)' + - open: '<(https?://[^\\s/$.?#].[^\\s]+|[A-Za-z0-9-_:.]+)' close: '>' sources: - 'docs/*.rst' diff --git a/.github/config/.wordlist.txt b/.github/config/.wordlist.txt index 354447279..ecf0c26af 100644 --- a/.github/config/.wordlist.txt +++ b/.github/config/.wordlist.txt @@ -1,3 +1,4 @@ +aai abkhaz accessionid additionalproperties @@ -6,6 +7,7 @@ addobjecttofolder affiliationidentifier affiliationidentifierscheme agp +ajv akan allof alternateidentifier @@ -35,6 +37,7 @@ atac auth authdb automodule +autosummary avaric avestan awardnumber @@ -66,16 +69,20 @@ blastdbinfo bokmål bookchapter boolean +bugfix +buildkit buildx cdd cdna centername centerprojectname +certreqs chamorro checksummethod chia chichewa chip +chmod cli clinvar cloneend @@ -138,6 +145,7 @@ demux descriptiontype designdescription destructure +dev devcontainer divehi dnase @@ -148,10 +156,13 @@ dt dzongkha ean eastboundlongitude +ebi +EBISPOT EDirect ega eissn ena +enasequence entrez entrezdb entrezid @@ -159,6 +170,9 @@ entrezlink enum env epigenetics +eppn +eslint +eslintrc exome expectedbasecalltable experimentattribute @@ -198,6 +212,7 @@ gds genbank genestudio genexus +genindex genomemap genomic genotyping @@ -210,6 +225,7 @@ geolocations geoprofiles gff github +githubusercontent givenname gridion groupedbyschema @@ -225,6 +241,7 @@ histone hmpr homologene hostname +hotfix hq html http @@ -235,6 +252,7 @@ igbo igsn iix illumina +ini insdc interactiveresource interlingua @@ -245,6 +263,7 @@ isni issn istc journalarticle +js json jsoncontent jwk @@ -277,12 +296,14 @@ lims lingala linux lissn +localhost locusname lsid lt luba luxembourgish maincontact +makestyles marshallese matchedge matk @@ -316,6 +337,7 @@ mirna miseq mkdir mnase +modindex mol moltype mongo @@ -331,11 +353,14 @@ nameidentifierscheme namespace nametype nano +nav ncbi ncbisearch ncrna ndebele ndonga +neic +newdraft nextseq nlmcatalog noindex @@ -344,19 +369,23 @@ nominalsdev northboundlatitude novaseq npm +npx nuccore nuosu nynorsk objectdetails objectinsidefolder objectinsidefolderwithtags +objectstatus objectsubmissiontypes objecttags objecttype +objecttypes occitan oecd oidc ojibwe +ol oligo omim oneof @@ -365,6 +394,7 @@ openid orcid orgtrack oromo +oss ossetian outputmanagementplan pacbio @@ -379,6 +409,7 @@ pcsubstance pdf peerreview pgm +phenome physicalobject pipesection pmc @@ -399,6 +430,7 @@ popset pre precedesreadindex preprint +prettierrc prevstepindex primaryid probeset @@ -407,9 +439,12 @@ processingtype promethion proteinclusters protfam +providermetadata publicationyear pubmed py +pycqa +pyspelling quickstart randompriming rbcl @@ -419,6 +454,7 @@ readindex readlabel readme readspec +readthedocs readtype redux refcenter @@ -466,6 +502,7 @@ schemas schemetype schemeuri scientificname +sda sdev se secondaryid @@ -482,6 +519,7 @@ sff sha shona sinhala +sllversion snp solid sotho @@ -489,6 +527,7 @@ southboundlatitude spotdescriptor spotlength sra +src srf ssl ssrna @@ -532,6 +571,8 @@ transcriptomics tsonga turkmen twi +txt +ui umi uniqueitems unlocalised @@ -540,6 +581,7 @@ url urllink usedispatch userid +userinfo useselector uuid uyghur @@ -552,8 +594,15 @@ westboundlongitude wga wget wgs +withstyles +wizardcomponents wizardobject +wizardobjectindex +wizardsavedobjectslist wizardsavedobjectslistprops +wizardshowsummarystep +wizardsteps +wizardsubmissionfolderslice wolof wxs xl @@ -565,5 +614,7 @@ xrefdb xrefid xreflink xsd +yaml +yml za zhuang \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c7b51dc2d..8f2e46346 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -23,7 +23,7 @@ Once submitted, the Pull Request will go through a review process, meaning we wi #### Git Branches -We use `develop` branch as the main developopment branch and `master` as the releases branch. +We use `develop` branch as the main development branch and `master` as the releases branch. All Pull Requests related to features should be done against `develop` branch, releases Pull Requests should be done against `master` branch. Give your branch a short descriptive name (like the names between the `<>` below) and prefix the name with something representative for that branch: @@ -42,12 +42,12 @@ We do optimize for readability, and it would be awesome if you go through the co - Indentation should be 4 *spaces* - 120 character limit is almost strict, but can be broken in documentation when hyperlinks go over the limits -- We use [black](https://github.com/psf/black) code formatter and also check for [pep8](https://www.python.org/dev/peps/pep-0008/) and [pep257](https://www.python.org/dev/peps/pep-0257/) with some small exceptions. You can see the stated exceptions in `tox.ini` configuration file +- We use [black](https://github.com/psf/black) for code format and also check for [pep8](https://www.python.org/dev/peps/pep-0008/) and [pep257](https://www.python.org/dev/peps/pep-0257/) with some small exceptions. You can see the stated exceptions in `tox.ini` configuration file - We like to keep things simple, so when possible avoid importing any big libraries. - Tools to help you: - Tox is configured to run bunch of tests: black, flake8, docstrings, missing type hints, mypy; - Tox is also ran in our CI, so please run tox before each push to this repository; - - If you like things to happen automagically, you can add pre-commit hook to your git workflow! Hook can be found from [scripts-folder](scripts) and it includes settings for tox and [pyspelling](https://facelessuser.github.io/pyspelling/) (which is there just for, well, spelling errors). + - If you like things to happen in an automated manner, you can add pre-commit hook to your git workflow! Hook can be found from [scripts-folder](scripts) and it includes settings for tox and [pyspelling](https://facelessuser.github.io/pyspelling/) (which is there just for, well, spelling errors). Thanks, CSC developers diff --git a/README.md b/README.md index 31f38133a..37be4d6b7 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,6 @@ Frontend is built and added as static files to backend while building. Metadata submission interface is released under `MIT`, see [LICENSE](LICENSE). -## Contibuting +## contributing If you want to contribute to a project and make it better, your help is very welcome. For more info about how to contribute, see [CONTRIBUTING](CONTRIBUTING.md). diff --git a/docs/validator.rst b/docs/validator.rst index 0528ea204..0c171b00e 100644 --- a/docs/validator.rst +++ b/docs/validator.rst @@ -9,7 +9,7 @@ The tool can be found and installed from `metadata-submitter-tools repository Date: Mon, 10 Jan 2022 22:24:58 +0200 Subject: [PATCH 172/336] add descriptions and correct json schemas --- .github/config/.wordlist.txt | 3 + .../helpers/schemas/ena_analysis.json | 108 ++++++++----- metadata_backend/helpers/schemas/ena_dac.json | 44 +++++- .../helpers/schemas/ena_dataset.json | 25 +-- .../helpers/schemas/ena_experiment.json | 106 +++++++++---- .../helpers/schemas/ena_policy.json | 40 +++-- metadata_backend/helpers/schemas/ena_run.json | 144 +++++++++++++----- .../helpers/schemas/ena_sample.json | 15 +- .../helpers/schemas/ena_study.json | 15 +- 9 files changed, 345 insertions(+), 155 deletions(-) diff --git a/.github/config/.wordlist.txt b/.github/config/.wordlist.txt index ecf0c26af..cd8243bc2 100644 --- a/.github/config/.wordlist.txt +++ b/.github/config/.wordlist.txt @@ -113,6 +113,8 @@ curation currentsubmissiontype customfields dac +dacAttribute +dacAttributes daclinks dacref datacite @@ -574,6 +576,7 @@ twi txt ui umi +unencryptedChecksum uniqueitems unlocalised uri diff --git a/metadata_backend/helpers/schemas/ena_analysis.json b/metadata_backend/helpers/schemas/ena_analysis.json index c232bf055..0a8501462 100644 --- a/metadata_backend/helpers/schemas/ena_analysis.json +++ b/metadata_backend/helpers/schemas/ena_analysis.json @@ -74,7 +74,7 @@ ], "properties": { "entrezDb": { - "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references.", + "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references. Also known as Entrez Direct (EDirect) .", "title": "Database", "type": "string", "enum": [ @@ -161,6 +161,8 @@ }, "sequenceType": { "$id": "#/definitions/sequenceType", + "title": "Reference Alignment", + "additionalProperties": true, "type": "object", "properties": { "assembly": { @@ -170,23 +172,27 @@ { "type": "object", "title": "Standard", + "description": "A standard genome assembly.", "required": [ - "accessionId" + "accession" ], "properties": { "refname": { "type": "string", + "description": "A recognized name for the genome assembly.", "title": "Reference name" }, - "accessionId": { + "accession": { "type": "string", - "title": "Accession Id" + "description": "Accession.version with version being mandatory.", + "title": "Accession.version" } } }, { "type": "object", "title": "Custom", + "description": "Other genome assembly.", "required": [ "description" ], @@ -213,20 +219,23 @@ "sequence": { "type": "array", "title": "Sequence", + "description": "Reference sequence details.", "items": { "type": "object", "required": [ - "accessionId" + "accession" ], "additionalProperties": true, "properties": { "label": { "type": "string", + "description": "This is how Reference Sequence is labeled in submission file(s). It is equivalent to SQ label in BAM. Optional when submitted file uses INSDC accession.version.", "title": "Label" }, - "accessionId": { + "accession": { "type": "string", - "title": "Accession Id" + "description": "Accession.version with version being mandatory", + "title": "Accession.version" } } } @@ -236,23 +245,28 @@ "identifiers": { "$id": "#/definitions/identifiers", "title": "Identifiers", + "description": "Identifiers to be used in the International Nucleotide Sequence Database Collaboration (INSDC) namespace.", "type": "object", "properties": { "primaryId": { "type": "string", + "description": "A primary identifier in the INSDC namespace.", "title": "Primary Id" }, "secondaryId": { "type": "string", + "description": "A secondary identifier in the INSDC namespace.", "title": "Secondary Id" }, "uuid": { "type": "string", + "description": "A universally unique identifier that requires no namespace.", "title": "UUID" }, "externalId": { "type": "object", "title": "External Id", + "description": "An identifier from a public non-INSDC resource.", "properties": { "namespace": { "type": "string", @@ -267,6 +281,7 @@ "submitterId": { "type": "object", "title": "Submitter Id", + "description": "A submitter provided identifier.", "properties": { "namespace": { "type": "string", @@ -285,20 +300,19 @@ "additionalProperties": true, "type": "object", "properties": { - "label": { - "type": "string", - "title": "Label" - }, "refname": { "type": "string", + "description": "Identifies an object by name within the namespace defined by attribute refcenter", "title": "Record name" }, "refcenter": { "type": "string", + "description": "The namespace of the attribute refname.", "title": "Center Namespace" }, "accessionId": { "type": "string", + "description": "Identifies a record by its accession. The scope of resolution is the entire Storage.", "title": "Accession Id" }, "identifiers": { @@ -319,45 +333,49 @@ "properties": { "filename": { "type": "string", - "title": "Filename" + "description": "The name or relative path name of a run data file.", + "title": "File Name" }, "filetype": { "type": "string", - "title": "Filetype", + "description": "The Analysis data file model/type.", + "title": "File type", "enum": [ "agp", "bai", "bam", - "bcf_aggregate", "bcf", "bed", - "BioNano_native", - "chromosome_list", + "BioNano native", + "chromosome list", "crai", "cram", - "csi", "fasta", "fastq", "flatfile", "gff", "info", - "Kallisto_native", + "Kallisto native", "manifest", - "other", - "phenotype_file", - "readme_file", - "sample_list", + "phenotype file", + "readme file", + "sample list", + "sff", + "sra", + "srf", "tab", "tabix", - "unlocalised_list", - "vcf_aggregate", + "unlocalised list", + "vcf aggregate", "vcf", - "wig" + "wig", + "other" ] }, "checksumMethod": { "type": "string", "title": "Checksum Method", + "description": "Checksum method used MD5 or SHA-256.", "enum": [ "MD5", "SHA-256" @@ -365,7 +383,13 @@ }, "checksum": { "type": "string", + "description": "Checksum of uncompressed file.", "title": "Checksum" + }, + "unencryptedChecksum": { + "type": "string", + "description": "Checksum of un-encrypted file (used in conjunction with checksum of encrypted file).", + "title": "Un-encrypted Checksum" } } }, @@ -414,7 +438,7 @@ }, "minGapLength": { "type": "number", - "title": "MinGapLength" + "title": "Min Gap Length" }, "molType": { "type": "string", @@ -504,20 +528,30 @@ { "type": "object", "title": "Standard", + "description": "A standard genome assembly.", + "required": [ + "accession" + ], "properties": { "refname": { "type": "string", + "description": "A recognized name for the genome assembly.", "title": "Reference name" }, - "accessionId": { + "accession": { "type": "string", - "title": "Accession Id" + "description": "Accession.version with version being mandatory.", + "title": "Accession.version" } } }, { "type": "object", "title": "Custom", + "description": "Other genome assembly.", + "required": [ + "description" + ], "properties": { "label": { "type": "string", @@ -541,20 +575,23 @@ "sequence": { "type": "array", "title": "Sequence", + "description": "Reference sequence details.", "items": { "type": "object", "required": [ - "accessionId" + "accession" ], "additionalProperties": true, "properties": { "label": { "type": "string", + "description": "This is how Reference Sequence is labeled in submission file(s). It is equivalent to SQ label in BAM. Optional when submitted file uses INSDC accession.version.", "title": "Label" }, - "accessionId": { + "accession": { "type": "string", - "title": "Accession Id" + "description": "Accession.version with version being mandatory", + "title": "Accession.version" } } } @@ -917,20 +954,17 @@ }, "studyRef": { "title": "Study Reference", - "description": "Identifies the associated study.", + "description": "Identifies the associated parent study.", "$ref": "#/definitions/reference" }, "experimentRef": { "title": "Experiment Reference", "description": "Identifies the associated experiment.", - "type": "array", - "items": { - "$ref": "#/definitions/reference" - } + "$ref": "#/definitions/reference" }, "sampleRef": { "title": "Sample Reference", - "description": "Identifies the associated sample.", + "description": "Identifies the associated sample(s).", "type": "array", "items": { "$ref": "#/definitions/reference" diff --git a/metadata_backend/helpers/schemas/ena_dac.json b/metadata_backend/helpers/schemas/ena_dac.json index 10a64df05..4afa76cc3 100644 --- a/metadata_backend/helpers/schemas/ena_dac.json +++ b/metadata_backend/helpers/schemas/ena_dac.json @@ -74,7 +74,7 @@ ], "properties": { "entrezDb": { - "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references.", + "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references. Also known as Entrez Direct (EDirect) .", "title": "Database", "type": "string", "enum": [ @@ -153,24 +153,47 @@ "email": { "type": "string", "title": "Contact Email", - "description": "email of the person to contact.", + "description": "Email of the person to contact.", "format": "email" }, "telephoneNumber": { "type": "string", "title": "Contact Telephone Number", - "description": "telephone number of the person to contact.", + "description": "Telephone number of the person to contact.", "pattern": "^[-a-zA-Z0-9-()+ ]*" }, "organisation": { "type": "string", "title": "Organisation", - "description": "Center or institution name." + "description": "Center or institution name. We will use ROR to suggest an organisation." }, "mainContact": { "type": "boolean", "title": "Main Contact", - "description": "If true then this is the main contact." + "description": "If selected then this is the main contact for the DAC." + } + } + }, + "dacAttribute": { + "$id": "#/definitions/dacAttribute", + "type": "object", + "title": "DAC Attribute", + "required": [ + "tag", + "value" + ], + "properties": { + "tag": { + "type": "string", + "title": "Tag title" + }, + "value": { + "type": "string", + "title": "Description" + }, + "units": { + "type": "string", + "title": "Optional scientific units." } } } @@ -184,6 +207,7 @@ "contacts": { "type": "array", "title": "Contacts", + "description": "List of persons that ar part of the Data Access Committee. At least one main contact is required.", "items": { "$ref": "#/definitions/contact" }, @@ -209,7 +233,7 @@ }, "title": { "title": "DAC Title", - "description": "Short text that can be used to call out DAC records in searches or in displays.", + "description": "Title of the Data Access Committee (DAC) that will approve applications to the datasets.", "type": "string" }, "dacLinks": { @@ -219,6 +243,14 @@ "items": { "$ref": "#/definitions/Links" } + }, + "dacAttributes": { + "type": "array", + "title": "Study Attributes", + "description": "Properties and attributes of the DAC. These can be entered as free-form tag-value pairs. Submitters may be asked to follow a community established ontology when describing the work.", + "items": { + "$ref": "#/definitions/dacAttribute" + } } } } \ No newline at end of file diff --git a/metadata_backend/helpers/schemas/ena_dataset.json b/metadata_backend/helpers/schemas/ena_dataset.json index 967069d8e..15fb6816e 100644 --- a/metadata_backend/helpers/schemas/ena_dataset.json +++ b/metadata_backend/helpers/schemas/ena_dataset.json @@ -74,7 +74,7 @@ ], "properties": { "entrezDb": { - "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references.", + "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references. Also known as Entrez Direct (EDirect) .", "title": "Database", "type": "string", "enum": [ @@ -162,23 +162,28 @@ "identifiers": { "$id": "#/definitions/identifiers", "title": "Identifiers", + "description": "Identifiers to be used in the International Nucleotide Sequence Database Collaboration (INSDC) namespace.", "type": "object", "properties": { "primaryId": { "type": "string", + "description": "A primary identifier in the INSDC namespace.", "title": "Primary Id" }, "secondaryId": { "type": "string", + "description": "A secondary identifier in the INSDC namespace.", "title": "Secondary Id" }, "uuid": { "type": "string", + "description": "A universally unique identifier that requires no namespace.", "title": "UUID" }, "externalId": { "type": "object", "title": "External Id", + "description": "An identifier from a public non-INSDC resource.", "properties": { "namespace": { "type": "string", @@ -193,6 +198,7 @@ "submitterId": { "type": "object", "title": "Submitter Id", + "description": "A submitter provided identifier.", "properties": { "namespace": { "type": "string", @@ -211,20 +217,19 @@ "additionalProperties": true, "type": "object", "properties": { - "label": { - "type": "string", - "title": "Label" - }, "refname": { "type": "string", + "description": "Identifies an object by name within the namespace defined by attribute refcenter", "title": "Record name" }, "refcenter": { "type": "string", + "description": "The namespace of the attribute refname.", "title": "Center Namespace" }, "accessionId": { "type": "string", + "description": "Identifies a record by its accession. The scope of resolution is the entire Storage.", "title": "Accession Id" }, "identifiers": { @@ -241,7 +246,7 @@ "properties": { "title": { "title": "Dataset Title", - "description": "Short text that can be used to call out data sets in searches or in displays.", + "description": "Title of the Dataset as would be used in a publication.", "type": "string" }, "datasetType": { @@ -270,17 +275,17 @@ }, "description": { "title": "Dataset Description", - "description": "Free-form text describing the data sets.", + "description": "Free-form text describing the Dataset.", "type": "string" }, "policyRef": { "title": "Policy Reference", - "description": "Identifies the data access committee to which this policy pertains.", + "description": "Identifies the data access policy controlling this Dataset.", "$ref": "#/definitions/reference" }, "runRef": { "title": "Run Reference", - "description": "Identifies the runs which are part of this dataset.", + "description": "Identifies the Runs which are part of this Dataset.", "type": "array", "items": { "$ref": "#/definitions/reference" @@ -288,7 +293,7 @@ }, "analysisRef": { "title": "Analysis Reference", - "description": "Identifies the data access committee to which this policy pertains.", + "description": "Identifies the Analyses which are part of this Dataset.", "type": "array", "items": { "$ref": "#/definitions/reference" diff --git a/metadata_backend/helpers/schemas/ena_experiment.json b/metadata_backend/helpers/schemas/ena_experiment.json index 4bed3be89..b03bd91de 100644 --- a/metadata_backend/helpers/schemas/ena_experiment.json +++ b/metadata_backend/helpers/schemas/ena_experiment.json @@ -74,7 +74,7 @@ ], "properties": { "entrezDb": { - "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references.", + "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references. Also known as Entrez Direct (EDirect) .", "title": "Database", "type": "string", "enum": [ @@ -206,7 +206,7 @@ "libraryType": { "$id": "#/definitions/libraryType", "type": "object", - "title": "Library used for experiment design", + "title": "Library used for experiment design.", "required": [ "designDescription", "sampleDescriptor", @@ -232,7 +232,7 @@ ] }, "libraryDescriptor": { - "description": "The LIBRARY_DESCRIPTOR specifies the origin of the material being sequenced and any treatments that the material might have undergone that affect the sequencing result. This specification is needed even if the platform does not require a library construction step per se.", + "description": "The Library Descriptor specifies the origin of the material being sequenced and any treatments that the material might have undergone that affect the sequencing result. This specification is needed even if the platform does not require a library construction step per se.", "title": "Library Descriptor", "type": "object", "required": [ @@ -247,6 +247,7 @@ }, "libraryStrategy": { "title": "Library Strategy", + "description": "Sequencing technique intended for this library.", "type": "string", "enum": [ "AMPLICON", @@ -289,6 +290,7 @@ }, "librarySource": { "title": "Library Source", + "description": "The Library Source specifies the type of source material that is being sequenced.", "type": "string", "enum": [ "GENOMIC SINGLE CELL", @@ -304,6 +306,7 @@ }, "librarySelection": { "title": "Library Selection", + "description": "Method used to enrich the target in the sequence library preparation.", "type": "string", "enum": [ "5-methylcytidine antibody", @@ -341,6 +344,7 @@ }, "libraryLayout": { "title": "Library Layout", + "description": "Library Layout specifies whether to expect single, paired, or other configuration of reads. In the case of paired reads, information about the relative distance and orientation is specified.", "type": "string", "enum": [ "single", @@ -403,7 +407,7 @@ } }, "spotDescriptor": { - "description": "The SPOT_DESCRIPTOR specifies how to decode the individual reads of interest from the monolithic spot sequence. The spot descriptor contains aspects of the experimental design, platform, and processing information. There will be two methods of specification: one will be an index into a table of typical decodings, the other being an exact specification. This construct is needed for loading data and for interpreting the loaded runs. It can be omitted if the loader can infer read layout (from multiple input files or from one input files).", + "description": "The spot descriptor specifies how to decode the individual reads of interest from the monolithic spot sequence. The spot descriptor contains aspects of the experimental design, platform, and processing information. There will be two methods of specification: one will be an index into a table of typical decodings, the other being an exact specification. This construct is needed for loading data and for interpreting the loaded runs. It can be omitted if the loader can infer read layout (from multiple input files or from one input files).", "title": "Spot Descriptor", "type": "object", "required": [ @@ -412,6 +416,7 @@ "properties": { "spotLength": { "title": "Spot Length", + "description": "Number of base/color calls, cycles, or flows per spot (raw sequence length or flow length including all application and technical tags and mate pairs, but not including gap lengths). This value will be platform dependent, library dependent, and possibly run dependent. Variable length platforms will still have a constant flow/cycle length.", "type": "number", "minimum": 0 }, @@ -421,11 +426,13 @@ "properties": { "readIndex": { "title": "Read Index", + "description": "Read Index starts at 0 and is incrementally increased for each sequential read_spec within a spot decode specification", "type": "number", "minimum": 0 }, "readLabel": { "title": "Read Label", + "description": "Read Label is a name for this tag, and can be used to on output to determine read name, for example F or R.", "type": "string" }, "readClass": { @@ -438,28 +445,32 @@ }, "readType": { "title": "Read Type", + "description": "", "type": "string", "enum": [ - "Adapter", - "Barcode", "Forward", - "Linker", - "Primer", "Reverse", + "Adapter", + "Primer", + "Linker", + "BarCode", "Other" ] }, "relativeOrder": { "type": "object", "title": "Relative Order", + "description": "The read is located beginning at the offset or cycle relative to another read. This choice is appropriate for example when specifying a read that follows a variable length expected sequence(s).", "properties": { "followsReadIndex": { - "title": "Read Index", + "title": "Follows Read Index", "type": "number", + "description": "Specify the read index that precedes this read.", "minimum": 0 }, "precedesReadIndex": { - "title": "Read Index", + "title": "Precedes Read Index", + "description": "Specify the read index that follows this read.", "type": "number", "minimum": 0 } @@ -467,45 +478,69 @@ }, "baseCoord": { "title": "Base Coordinate", + "description": "The location of the read start in terms of base count (1 is beginning of spot).", "type": "number" }, "expectedBaseCallTable": { "title": "Expected Base Call Table", + "description": " A set of choices of expected base calls for a current read. Read will be zero-length if none is found.", "type": "array", "items": { "type": "object", "properties": { "baseCall": { - "title": "Base Call", - "type": "string" + "type": "string", + "description": "Element's body contains a basecall, attribute provide description of this read meaning as well as matching rules.", + "title": "Base Call" }, "readGroupTag": { - "title": "Read group tag", - "type": "string" + "type": "string", + "description": "When match occurs, the read will be tagged with this group membership.", + "title": "Read Group Tag" }, "minMatch": { - "title": "Min match", "type": "number", - "minimum": 0 + "description": " Minimum number of matches to trigger identification.", + "minimum": 0, + "title": "Min Match" }, "maxMisMatch": { - "title": "Max mismatch", + "description": "Maximum number of mismatches.", "type": "number", - "minimum": 0 + "minimum": 0, + "title": "Max MisMatch" }, "matchEdge": { - "title": "Match edge", - "type": "string" + "description": "Where the match should occur. Changes the rules on how min_match and max_mismatch are counted.", + "type": "string", + "title": "Match Edge" + }, + "full": { + "description": "Only @max_mismatch influences matching process.", + "type": "string", + "title": "Full" + }, + "start": { + "description": "Both matches and mismatches are counted. When @max_mismatch is exceeded - it is not a match. When @min_match is reached - match is declared. ", + "type": "string", + "title": "Start" + }, + "end": { + "description": "Both matches and mismatches are counted. When @max_mismatch is exceeded - it is not a match. When @min_match is reached - match is declared.", + "type": "string", + "title": "End" }, "defaultLength": { - "title": "Default length", + "description": "Specify whether the spot should have a default length for this tag if the expected base cannot be matched.", "type": "number", - "minimum": 0 + "minimum": 0, + "title": "Default Length" }, "baseCoord": { - "title": "Base coordinate", + "description": "Specify an optional starting point for tag (base offset from 1).", "type": "number", - "minimum": 0 + "minimum": 0, + "title": "Base Coordinate" } } } @@ -568,7 +603,7 @@ }, "version": { "type": "string", - "description": " Version of the program or process for primary analysis. ", + "description": "Version of the program or process for primary analysis. ", "title": "Version" }, "notes": { @@ -589,11 +624,11 @@ "directives": { "type": "object", "title": "Directives", - "description": "Processing directives tell the Sequence Read Archive how to treat the input data, if any treatment is requested.", + "description": "Processing directives tell the Sequence Read Storage how to treat the input data, if any treatment is requested.", "properties": { "sampleDemuxDirective": { "type": "object", - "description": "Tells the Archive who will execute the sample demultiplexing operation.", + "description": "Tells the Sequence Read Storage who will execute the sample demultiplexing operation.", "title": "Sample Demux Directive", "properties": { "leaveAsPool": { @@ -615,23 +650,28 @@ "identifiers": { "$id": "#/definitions/identifiers", "title": "Identifiers", + "description": "Identifiers to be used in the International Nucleotide Sequence Database Collaboration (INSDC) namespace.", "type": "object", "properties": { "primaryId": { "type": "string", + "description": "A primary identifier in the INSDC namespace.", "title": "Primary Id" }, "secondaryId": { "type": "string", + "description": "A secondary identifier in the INSDC namespace.", "title": "Secondary Id" }, "uuid": { "type": "string", + "description": "A universally unique identifier that requires no namespace.", "title": "UUID" }, "externalId": { "type": "object", "title": "External Id", + "description": "An identifier from a public non-INSDC resource.", "properties": { "namespace": { "type": "string", @@ -646,6 +686,7 @@ "submitterId": { "type": "object", "title": "Submitter Id", + "description": "A submitter provided identifier.", "properties": { "namespace": { "type": "string", @@ -664,20 +705,19 @@ "additionalProperties": false, "type": "object", "properties": { - "label": { - "type": "string", - "title": "Label" - }, "refname": { "type": "string", + "description": "Identifies an object by name within the namespace defined by attribute refcenter", "title": "Record name" }, "refcenter": { "type": "string", + "description": "The namespace of the attribute refname.", "title": "Center Namespace" }, "accessionId": { "type": "string", + "description": "Identifies a record by its accession. The scope of resolution is the entire Storage.", "title": "Accession Id" }, "identifiers": { @@ -696,7 +736,7 @@ "properties": { "title": { "title": "Experiment Title", - "description": "Short text that can be used to call out experiment records in searches or in displays. This element is technically optional but should be used for all new records.", + "description": "Short text that can be used to call out experiment records in searches or in displays.", "type": "string" }, "description": { @@ -716,7 +756,7 @@ }, "platform": { "title": "Platform / Instrument", - "description": " The PLATFORM record selects which sequencing platform and platform-specific runtime parameters. This will be determined by the Center.", + "description": "The PLATFORM record selects which sequencing platform and platform-specific runtime parameters. This will be determined by the Center.", "type": "string", "enum": [ "454 GS 20", diff --git a/metadata_backend/helpers/schemas/ena_policy.json b/metadata_backend/helpers/schemas/ena_policy.json index c7eb4b7ab..ad86855ec 100644 --- a/metadata_backend/helpers/schemas/ena_policy.json +++ b/metadata_backend/helpers/schemas/ena_policy.json @@ -74,7 +74,7 @@ ], "properties": { "entrezDb": { - "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references.", + "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references. Also known as Entrez Direct (EDirect) .", "title": "Database", "type": "string", "enum": [ @@ -140,7 +140,7 @@ "$id": "#/definitions/dataUseType", "type": "object", "title": "Data Use Type", - "description": "tag title and its associated value (description)", + "description": "Data Use ontology", "required": [ "ontology", "code", @@ -149,18 +149,19 @@ "properties": { "modifier": { "title": "List of Use Modifiers", + "description": "If Data Use Ontology used, see https://github.com/EBISPOT/DUO for examples.", "type": "array", "items": { "type": "object", "properties": { "modifier": { - "description": "Describes modifiers to the Data Use Restriction", + "description": "Describes modifiers to the Data Use Restriction.", "title": "Modifier", "$ref": "#/definitions/xrefLink" }, "url": { "type": "string", - "description": "Link to URL describing the Data Use" + "description": "Link to URL describing the Data Use." } } } @@ -172,10 +173,12 @@ }, "ontology": { "type": "string", - "title": "Ontology abbreviation, e.g. DUO for Data Use Ontology" + "description": "If Data Use Ontology then use DUO.", + "title": "Ontology abbreviation" }, "code": { "type": "string", + "description": "Where the ontology can be found.", "title": "Code for the ontology" }, "version": { @@ -210,23 +213,28 @@ "identifiers": { "$id": "#/definitions/identifiers", "title": "Identifiers", + "description": "Identifiers to be used in the International Nucleotide Sequence Database Collaboration (INSDC) namespace.", "type": "object", "properties": { "primaryId": { "type": "string", + "description": "A primary identifier in the INSDC namespace.", "title": "Primary Id" }, "secondaryId": { "type": "string", + "description": "A secondary identifier in the INSDC namespace.", "title": "Secondary Id" }, "uuid": { "type": "string", + "description": "A universally unique identifier that requires no namespace.", "title": "UUID" }, "externalId": { "type": "object", "title": "External Id", + "description": "An identifier from a public non-INSDC resource.", "properties": { "namespace": { "type": "string", @@ -241,6 +249,7 @@ "submitterId": { "type": "object", "title": "Submitter Id", + "description": "A submitter provided identifier.", "properties": { "namespace": { "type": "string", @@ -259,20 +268,19 @@ "additionalProperties": true, "type": "object", "properties": { - "label": { - "type": "string", - "title": "Label" - }, "refname": { "type": "string", + "description": "Identifies an object by name within the namespace defined by attribute refcenter", "title": "Record name" }, "refcenter": { "type": "string", + "description": "The namespace of the attribute refname.", "title": "Center Namespace" }, "accessionId": { "type": "string", + "description": "Identifies a record by its accession. The scope of resolution is the entire Storage.", "title": "Accession Id" }, "identifiers": { @@ -282,7 +290,7 @@ } }, "type": "object", - "description": "Describes an object that contains data access policy information.", + "description": "Describes an object that contains data access Policy information.", "required": [ "dacRef", "policy" @@ -290,21 +298,22 @@ "properties": { "title": { "title": "Policy Title", - "description": "Short text that can be used to call out data access policies in searches or in displays.", + "description": "Title of the Policy so it can be easily be found.", "type": "string" }, "dacRef": { "title": "Data Access Committee Reference", - "description": "Identifies the data access committee to which this policy pertains.", + "description": "Identifies the data access committee to which this Policy pertains.", "$ref": "#/definitions/reference" }, "policy": { "title": "Policy", - "description": "Identifies the data access committee to which this policy pertains.", + "description": "Policies can be added either by adding the text of the Policy of by pointing to an existing URL.", "oneOf": [ { "type": "object", "title": "Policy Text", + "description": "Text containing the policy.", "properties": { "policyText": { "type": "string", @@ -318,6 +327,7 @@ { "type": "object", "title": "Policy URL", + "description": "Links to the Policy text / information.", "properties": { "policyUrl": { "type": "string", @@ -341,7 +351,7 @@ }, "dataUses": { "type": "array", - "description": "Data use ontologies (DUO) related to the policy", + "description": "Data use ontologies (DUO) related to the Policy. More information at: https://github.com/EBISPOT/DUO .", "items": { "$ref": "#/definitions/dataUseType" }, @@ -350,7 +360,7 @@ "policyAttributes": { "type": "array", "title": "Policy Attributes", - "description": "Properties and attributes of the policy. These can be entered as free-form tag-value pairs. Submitters may be asked to follow a community established ontology when describing the work.", + "description": "Properties and attributes of the Policy. These can be entered as free-form tag-value pairs. Submitters may be asked to follow a community established ontology when describing the work.", "items": { "$ref": "#/definitions/policyAttribute" } diff --git a/metadata_backend/helpers/schemas/ena_run.json b/metadata_backend/helpers/schemas/ena_run.json index 08f7cc3d0..30af5db49 100644 --- a/metadata_backend/helpers/schemas/ena_run.json +++ b/metadata_backend/helpers/schemas/ena_run.json @@ -74,7 +74,7 @@ ], "properties": { "entrezDb": { - "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references.", + "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references. Also known as Entrez Direct (EDirect) .", "title": "Database", "type": "string", "enum": [ @@ -211,7 +211,7 @@ }, "version": { "type": "string", - "description": " Version of the program or process for primary analysis. ", + "description": "Version of the program or process for primary analysis. ", "title": "Version" }, "notes": { @@ -232,11 +232,11 @@ "directives": { "type": "object", "title": "Directives", - "description": "Processing directives tell the Sequence Read Archive how to treat the input data, if any treatment is requested.", + "description": "Processing directives tell the Sequence Read Storage how to treat the input data, if any treatment is requested.", "properties": { "sampleDemuxDirective": { "type": "object", - "description": "Tells the Archive who will execute the sample demultiplexing operation.", + "description": "Tells the Sequence Read Storage who will execute the sample demultiplexing operation.", "title": "Sample Demux Directive", "properties": { "leaveAsPool": { @@ -258,23 +258,28 @@ "identifiers": { "$id": "#/definitions/identifiers", "title": "Identifiers", + "description": "Identifiers to be used in the International Nucleotide Sequence Database Collaboration (INSDC) namespace.", "type": "object", "properties": { "primaryId": { "type": "string", + "description": "A primary identifier in the INSDC namespace.", "title": "Primary Id" }, "secondaryId": { "type": "string", + "description": "A secondary identifier in the INSDC namespace.", "title": "Secondary Id" }, "uuid": { "type": "string", + "description": "A universally unique identifier that requires no namespace.", "title": "UUID" }, "externalId": { "type": "object", "title": "External Id", + "description": "An identifier from a public non-INSDC resource.", "properties": { "namespace": { "type": "string", @@ -289,6 +294,7 @@ "submitterId": { "type": "object", "title": "Submitter Id", + "description": "A submitter provided identifier.", "properties": { "namespace": { "type": "string", @@ -307,20 +313,19 @@ "additionalProperties": true, "type": "object", "properties": { - "label": { - "type": "string", - "title": "Label" - }, "refname": { "type": "string", + "description": "Identifies an object by name within the namespace defined by attribute refcenter", "title": "Record name" }, "refcenter": { "type": "string", + "description": "The namespace of the attribute refname.", "title": "Center Namespace" }, "accessionId": { "type": "string", + "description": "Identifies a record by its accession. The scope of resolution is the entire Storage.", "title": "Accession Id" }, "identifiers": { @@ -341,19 +346,21 @@ "properties": { "filename": { "type": "string", - "title": "filename" + "description": "The name or relative path name of a run data file.", + "title": "File Name" }, "filetype": { "type": "string", - "title": "filetype", + "description": "The Run data file model/type.", + "title": "File Type", "enum": [ "agp", "bai", "bam", "bcf", "bed", - "BioNano_native", - "chromosome_list", + "BioNano native", + "chromosome list", "crai", "cram", "fasta", @@ -361,18 +368,18 @@ "flatfile", "gff", "info", - "Kallisto_native", + "Kallisto native", "manifest", - "phenotype_file", - "readme_file", - "sample_list", + "phenotype file", + "readme file", + "sample list", "sff", "sra", "srf", "tab", "tabix", - "unlocalised_list", - "vcf_aggregate", + "unlocalised list", + "vcf aggregate", "vcf", "wig", "other" @@ -380,7 +387,8 @@ }, "checksumMethod": { "type": "string", - "title": "checksumMethod", + "title": "Checksum Method", + "description": "Checksum method used MD5 or SHA-256.", "enum": [ "MD5", "SHA-256" @@ -388,10 +396,12 @@ }, "readLabel": { "title": "Read Label", + "description": "The Read Label can associate a certain file to a certain read label defined in the spot descriptor.", "type": "string" }, "readType": { "title": "Read Type", + "description": "Identifies a record by its accession. The scope of resolution is the entire Storage.", "type": "string", "enum": [ "cell_barcode", @@ -405,25 +415,31 @@ }, "checksum": { "type": "string", - "title": "checksum" + "description": "Checksum of uncompressed file.", + "title": "Checksum" + }, + "unencryptedChecksum": { + "type": "string", + "description": "Checksum of un-encrypted file (used in conjunction with checksum of encrypted file).", + "title": "Un-encrypted Checksum" } } } }, "type": "object", - "description": "A run contains a group of reads generated for a particular experiment.", + "description": "A Run contains a group of reads generated for a particular experiment.", "required": [ "experimentRef" ], "properties": { "title": { "title": "Run Title", - "description": "Short text that can be used to define submissions in searches or in displays.", + "description": "Title of the Run as would be used to identify it in reference to the Study and Dataset.", "type": "string" }, "description": { "title": "Run Description", - "description": "Free-form text describing the data sets.", + "description": "Free-form text describing the Run and any relevant information.", "type": "string" }, "runType": { @@ -443,23 +459,27 @@ { "type": "object", "title": "Standard", + "description": "A standard genome assembly.", "required": [ - "accessionId" + "accession" ], "properties": { "refname": { "type": "string", + "description": "A recognized name for the genome assembly.", "title": "Reference name" }, - "accessionId": { + "accession": { "type": "string", - "title": "Accession Id" + "description": "Accession.version with version being mandatory.", + "title": "Accession.version" } } }, { "type": "object", "title": "Custom", + "description": "Other genome assembly.", "required": [ "description" ], @@ -486,20 +506,23 @@ "sequence": { "type": "array", "title": "Sequence", + "description": "Reference sequence details.", "items": { "type": "object", "required": [ - "accessionId" + "accession" ], "additionalProperties": true, "properties": { "label": { "type": "string", + "description": "This is how Reference Sequence is labeled in submission file(s). It is equivalent to SQ label in BAM. Optional when submitted file uses INSDC accession.version.", "title": "Label" }, - "accessionId": { + "accession": { "type": "string", - "title": "Accession Id" + "description": "Accession.version with version being mandatory", + "title": "Accession.version" } } } @@ -510,13 +533,13 @@ }, "runDate": { "title": "Run Date", - "description": "Date when the run took place", + "description": "Date when the Run took place.", "type": "string", "format": "date-time" }, "runCenter": { "title": "Run Center", - "description": "If applicable, the name of the contract sequencing center that executed the run.", + "description": "If applicable, the name of the contract sequencing center that executed the Run.", "type": "string" }, "experimentRef": { @@ -538,6 +561,7 @@ "properties": { "spotLength": { "title": "Spot Length", + "description": "Number of base/color calls, cycles, or flows per spot (raw sequence length or flow length including all application and technical tags and mate pairs, but not including gap lengths). This value will be platform dependent, library dependent, and possibly run dependent. Variable length platforms will still have a constant flow/cycle length.", "type": "number", "minimum": 0 }, @@ -547,11 +571,13 @@ "properties": { "readIndex": { "title": "Read Index", + "description": "Read Index starts at 0 and is incrementally increased for each sequential read_spec within a spot decode specification", "type": "number", "minimum": 0 }, "readLabel": { "title": "Read Label", + "description": "Read Label is a name for this tag, and can be used to on output to determine read name, for example F or R.", "type": "string" }, "readClass": { @@ -564,6 +590,7 @@ }, "readType": { "title": "Read Type", + "description": "", "type": "string", "enum": [ "cell_barcode", @@ -578,14 +605,17 @@ "relativeOrder": { "type": "object", "title": "Relative Order", + "description": "The read is located beginning at the offset or cycle relative to another read. This choice is appropriate for example when specifying a read that follows a variable length expected sequence(s).", "properties": { "followsReadIndex": { - "title": "Read Index", + "title": "Follows Read Index", "type": "number", + "description": "Specify the read index that precedes this read.", "minimum": 0 }, "precedesReadIndex": { - "title": "Read Index", + "title": "Precedes Read Index", + "description": "Specify the read index that follows this read.", "type": "number", "minimum": 0 } @@ -593,38 +623,69 @@ }, "baseCoord": { "title": "Base Coordinate", + "description": "The location of the read start in terms of base count (1 is beginning of spot).", "type": "number" }, "expectedBaseCallTable": { "title": "Expected Base Call Table", + "description": " A set of choices of expected base calls for a current read. Read will be zero-length if none is found.", "type": "array", "items": { "type": "object", "properties": { "baseCall": { - "type": "string" + "type": "string", + "description": "Element's body contains a basecall, attribute provide description of this read meaning as well as matching rules.", + "title": "Base Call" }, "readGroupTag": { - "type": "string" + "type": "string", + "description": "When match occurs, the read will be tagged with this group membership.", + "title": "Read Group Tag" }, "minMatch": { "type": "number", - "minimum": 0 + "description": " Minimum number of matches to trigger identification.", + "minimum": 0, + "title": "Min Match" }, "maxMisMatch": { + "description": "Maximum number of mismatches.", "type": "number", - "minimum": 0 + "minimum": 0, + "title": "Max MisMatch" }, "matchEdge": { - "type": "string" + "description": "Where the match should occur. Changes the rules on how min_match and max_mismatch are counted.", + "type": "string", + "title": "Match Edge" + }, + "full": { + "description": "Only @max_mismatch influences matching process.", + "type": "string", + "title": "Full" + }, + "start": { + "description": "Both matches and mismatches are counted. When @max_mismatch is exceeded - it is not a match. When @min_match is reached - match is declared. ", + "type": "string", + "title": "Start" + }, + "end": { + "description": "Both matches and mismatches are counted. When @max_mismatch is exceeded - it is not a match. When @min_match is reached - match is declared.", + "type": "string", + "title": "End" }, "defaultLength": { + "description": "Specify whether the spot should have a default length for this tag if the expected base cannot be matched.", "type": "number", - "minimum": 0 + "minimum": 0, + "title": "Default Length" }, "baseCoord": { + "description": "Specify an optional starting point for tag (base offset from 1).", "type": "number", - "minimum": 0 + "minimum": 0, + "title": "Base Coordinate" } } } @@ -635,7 +696,7 @@ }, "platform": { "title": "Platform / Instrument", - "description": " The PLATFORM record selects which sequencing platform and platform-specific runtime parameters. This will be determined by the Center.", + "description": "The PLATFORM record selects which sequencing platform and platform-specific runtime parameters. This will be determined by the Center.", "type": "string", "enum": [ "454 GS 20", @@ -710,6 +771,7 @@ "files": { "type": "array", "title": "Files", + "description": "Data files associated with the Run.", "items": { "$ref": "#/definitions/file" } diff --git a/metadata_backend/helpers/schemas/ena_sample.json b/metadata_backend/helpers/schemas/ena_sample.json index cf158eb9f..d6a0b7fa0 100644 --- a/metadata_backend/helpers/schemas/ena_sample.json +++ b/metadata_backend/helpers/schemas/ena_sample.json @@ -74,7 +74,7 @@ ], "properties": { "entrezDb": { - "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references.", + "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references. Also known as Entrez Direct (EDirect) .", "title": "Database", "type": "string", "enum": [ @@ -161,6 +161,7 @@ } }, "type": "object", + "description": "A Sample defines an isolate of sequenceable material upon which sequencing experiments can be based. The Sample object may be a surrogate for taxonomy accession or an anonymized individual identifier. Or, it may fully specify provenance and isolation method of the starting material.", "required": [ "sampleName" ], @@ -172,7 +173,7 @@ }, "sampleName": { "title": "Sample Names", - "description": "Short text that can be used to call out sample records in search results or in displays.", + "description": "Add relevant information to properly identify the Sample: common and scientific names, taxonomy identifier etc. - information can be retrieved from NCBI Taxonomy Browser.", "type": "object", "required": [ "taxonId" @@ -180,12 +181,12 @@ "properties": { "taxonId": { "type": "integer", - "description": "NCBI Taxonomy Identifier. This is appropriate for individual organisms and some environmental samples.", + "description": "NCBI Taxonomy Identifier, this is appropriate for individual organisms and some environmental samples.", "title": "Taxon ID" }, "scientificName": { "title": "Scientific Name", - "description": "Scientific name of sample that distinguishes its taxonomy. Please use a name or synonym that is tracked in the INSDC Taxonomy database. Also, this field can be used to confirm the TAXON_ID setting.", + "description": "Scientific name of Sample that distinguishes its taxonomy. Please use a name or synonym that is tracked in the INSDC Taxonomy database. Also, this field can be used to confirm the TAXON_ID setting.", "type": "string" }, "commonName": { @@ -197,11 +198,12 @@ }, "description": { "title": "Sample Description", - "description": "More extensive free-form description of the sample.", + "description": "More extensive free-form description of the Sample.", "type": "string" }, "sampleData": { "title": "Sample Data Type", + "description": "Specify if the Sample represents a human or non-human species. Not specifying the type we will consider it human, with unknown gender.", "oneOf": [ { "type": "object", @@ -224,6 +226,7 @@ { "type": "object", "title": "Non Human Sample", + "description": "The non-human Sample requires a free-form description of the data e.g. species, gender if known and other relevant information.", "properties": { "dataDescription": { "type": "string", @@ -247,7 +250,7 @@ "sampleAttributes": { "type": "array", "title": "Sample Attributes", - "description": "Properties and attributes of a sample. These can be entered as free-form tag-value pairs. For certain studies, submitters may be asked to follow a community established ontology when describing the work.", + "description": "Properties and attributes of a Sample. These can be entered as free-form tag-value pairs. For certain studies, submitters may be asked to follow a community established ontology when describing the work.", "items": { "$ref": "#/definitions/sampleAttribute" } diff --git a/metadata_backend/helpers/schemas/ena_study.json b/metadata_backend/helpers/schemas/ena_study.json index 91a1469da..075399396 100644 --- a/metadata_backend/helpers/schemas/ena_study.json +++ b/metadata_backend/helpers/schemas/ena_study.json @@ -74,7 +74,7 @@ ], "properties": { "entrezDb": { - "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references.", + "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references. Also known as Entrez Direct (EDirect) .", "title": "Database", "type": "string", "enum": [ @@ -162,7 +162,7 @@ "studyType": { "$id": "#/definitions/studyType", "title": "Study Type", - "description": "The Study type presents a controlled vocabulary for expressing the overall purpose of the study.", + "description": "The Study type presents a controlled vocabulary for expressing the overall purpose of the Study.", "type": "string", "enum": [ "Cancer Genomics", @@ -184,6 +184,7 @@ } }, "type": "object", + "description": "A Study is a container for a sequencing investigation that may comprise multiple experiments. The Study has an overall goal, but is otherwise minimally defined with a descriptor, zero or more experiments, and zero or more analyses. The submitter may add to the Study web links and properties.", "required": [ "descriptor" ], @@ -198,7 +199,7 @@ "properties": { "studyTitle": { "title": "Study Title", - "description": "Title of the study as would be used in a publication.", + "description": "Title of the Study as would be used in a publication.", "type": "string" }, "studyType": { @@ -219,7 +220,7 @@ }, "studyDescription": { "title": "Study Description", - "description": "More extensive free-form description of the study.", + "description": "More extensive free-form description of the Study.", "type": "string" }, "studyLinks": { @@ -233,19 +234,19 @@ "studyAttributes": { "type": "array", "title": "Study Attributes", - "description": "Properties and attributes of the study. These can be entered as free-form tag-value pairs. For certain studies, submitters may be asked to follow a community established ontology when describing the work.", + "description": "Properties and attributes of the Study. These can be entered as free-form tag-value pairs. For certain studies, submitters may be asked to follow a community established ontology when describing the work.", "items": { "$ref": "#/definitions/studyAttribute" } }, "center": { "title": "Description for Center", - "description": "More for backwards compatibility, we might not need it.", + "description": "Description of the center is intended for backward tracking of the Study record to the submitter's LIMS.", "type": "object", "properties": { "centerProjectName": { "title": "Center Project Name", - "description": " Submitter defined project name. This field is intended for backward tracking of the study record to the submitter's LIMS.", + "description": "Submitter defined project name. This field is intended for backward tracking of the Study record to the submitter's LIMS.", "type": "string" } } From bdbfffabb62d2906f3d122d50ea9792c6d99345c Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 10 Jan 2022 23:05:58 +0200 Subject: [PATCH 173/336] allow for accession & process sequence separately not all accession attributes should be accessionId, allow for referenceAlignment accessions to be separate sequence should always be array --- metadata_backend/helpers/parser.py | 28 ++++++++++++++++++++++++++++ tests/test_parser.py | 2 +- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/metadata_backend/helpers/parser.py b/metadata_backend/helpers/parser.py index e110360f6..2cd415bdd 100644 --- a/metadata_backend/helpers/parser.py +++ b/metadata_backend/helpers/parser.py @@ -108,10 +108,21 @@ def _flatten(self, data: Any) -> Union[Dict, List, str, None]: if "assembly" in key: if next(iter(value)) in ["standard", "custom"]: children[key] = next(iter(value.values())) + if "accessionId" in children[key]: + children[key]["accession"] = children[key].pop("accessionId") else: children[key] = value continue + if key == "sequence": + if "sequence" not in children: + children[key] = list() + children[key].append(value) + for d in children[key]: + if "accessionId" in d: + d["accession"] = d.pop("accessionId") + continue + if "analysisType" in key: children[key] = value continue @@ -133,6 +144,21 @@ def _flatten(self, data: Any) -> Union[Dict, List, str, None]: children["files"] = value["files"] continue + if "processing" in key: + if not bool(value): + continue + + if "pipeSection" in key: + if "pipeSection" not in children: + children[key] = list() + children[key].append(value) + continue + + if "prevStepIndex" in key: + if not bool(value): + children[key] = None + continue + if "spotDescriptor" in key: children[key] = value["spotDecodeSpec"] continue @@ -271,6 +297,8 @@ def element_decode( if data.attributes: tmp = self.dict((self._to_camel(key.lower()), value) for key, value in self.map_attributes(data.attributes)) + # we add the bool(children) condition as for referenceAlignment + # this is to distinguish between the attributes if "accession" in tmp: tmp["accessionId"] = tmp.pop("accession") if "sampleName" in tmp and "sampleData" not in tmp: diff --git a/tests/test_parser.py b/tests/test_parser.py index fb5ad297d..5a2d1e2c4 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -73,7 +73,7 @@ def test_analysis_is_parsed(self): analysis_json = self.xml_parser.parse("analysis", analysis_xml) self.assertIn( "GCA_000001405.1", - analysis_json["analysisType"]["processedReads"]["assembly"]["accessionId"], + analysis_json["analysisType"]["processedReads"]["assembly"]["accession"], ) def test_submission_is_parsed(self): From c347ededa45c7e62443789eeb370c412a24edf4d Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 10 Jan 2022 23:53:12 +0200 Subject: [PATCH 174/336] arrange elements for front-end --- .../helpers/schemas/ena_analysis.json | 10 +++---- .../helpers/schemas/ena_dataset.json | 10 +++---- .../helpers/schemas/ena_experiment.json | 30 +++++++++---------- .../helpers/schemas/ena_policy.json | 26 ++++++++-------- metadata_backend/helpers/schemas/ena_run.json | 10 +++---- 5 files changed, 43 insertions(+), 43 deletions(-) diff --git a/metadata_backend/helpers/schemas/ena_analysis.json b/metadata_backend/helpers/schemas/ena_analysis.json index 0a8501462..087b69d16 100644 --- a/metadata_backend/helpers/schemas/ena_analysis.json +++ b/metadata_backend/helpers/schemas/ena_analysis.json @@ -300,6 +300,11 @@ "additionalProperties": true, "type": "object", "properties": { + "accessionId": { + "type": "string", + "description": "Identifies a record by its accession. The scope of resolution is the entire Storage.", + "title": "Accession Id" + }, "refname": { "type": "string", "description": "Identifies an object by name within the namespace defined by attribute refcenter", @@ -310,11 +315,6 @@ "description": "The namespace of the attribute refname.", "title": "Center Namespace" }, - "accessionId": { - "type": "string", - "description": "Identifies a record by its accession. The scope of resolution is the entire Storage.", - "title": "Accession Id" - }, "identifiers": { "$ref": "#/definitions/identifiers" } diff --git a/metadata_backend/helpers/schemas/ena_dataset.json b/metadata_backend/helpers/schemas/ena_dataset.json index 15fb6816e..f3fc832b3 100644 --- a/metadata_backend/helpers/schemas/ena_dataset.json +++ b/metadata_backend/helpers/schemas/ena_dataset.json @@ -217,6 +217,11 @@ "additionalProperties": true, "type": "object", "properties": { + "accessionId": { + "type": "string", + "description": "Identifies a record by its accession. The scope of resolution is the entire Storage.", + "title": "Accession Id" + }, "refname": { "type": "string", "description": "Identifies an object by name within the namespace defined by attribute refcenter", @@ -227,11 +232,6 @@ "description": "The namespace of the attribute refname.", "title": "Center Namespace" }, - "accessionId": { - "type": "string", - "description": "Identifies a record by its accession. The scope of resolution is the entire Storage.", - "title": "Accession Id" - }, "identifiers": { "$ref": "#/definitions/identifiers" } diff --git a/metadata_backend/helpers/schemas/ena_experiment.json b/metadata_backend/helpers/schemas/ena_experiment.json index b03bd91de..ed1b8bef7 100644 --- a/metadata_backend/helpers/schemas/ena_experiment.json +++ b/metadata_backend/helpers/schemas/ena_experiment.json @@ -705,6 +705,11 @@ "additionalProperties": false, "type": "object", "properties": { + "accessionId": { + "type": "string", + "description": "Identifies a record by its accession. The scope of resolution is the entire Storage.", + "title": "Accession Id" + }, "refname": { "type": "string", "description": "Identifies an object by name within the namespace defined by attribute refcenter", @@ -715,11 +720,6 @@ "description": "The namespace of the attribute refname.", "title": "Center Namespace" }, - "accessionId": { - "type": "string", - "description": "Identifies a record by its accession. The scope of resolution is the entire Storage.", - "title": "Accession Id" - }, "identifiers": { "$ref": "#/definitions/identifiers" } @@ -744,16 +744,6 @@ "description": "Free-form text describing the data sets.", "type": "string" }, - "studyRef": { - "title": "Study Reference", - "description": "Identifies the associated study.", - "$ref": "#/definitions/reference" - }, - "design": { - "title": "Design", - "description": "The library design including library properties, layout, protocol, targeting information, and spot and gap descriptors. ", - "$ref": "#/definitions/libraryType" - }, "platform": { "title": "Platform / Instrument", "description": "The PLATFORM record selects which sequencing platform and platform-specific runtime parameters. This will be determined by the Center.", @@ -823,6 +813,16 @@ "unspecified" ] }, + "studyRef": { + "title": "Study Reference", + "description": "Identifies the associated study.", + "$ref": "#/definitions/reference" + }, + "design": { + "title": "Design", + "description": "The library design including library properties, layout, protocol, targeting information, and spot and gap descriptors. ", + "$ref": "#/definitions/libraryType" + }, "processing": { "title": "Processing", "$ref": "#/definitions/processingType" diff --git a/metadata_backend/helpers/schemas/ena_policy.json b/metadata_backend/helpers/schemas/ena_policy.json index ad86855ec..510ddb9fd 100644 --- a/metadata_backend/helpers/schemas/ena_policy.json +++ b/metadata_backend/helpers/schemas/ena_policy.json @@ -268,6 +268,11 @@ "additionalProperties": true, "type": "object", "properties": { + "accessionId": { + "type": "string", + "description": "Identifies a record by its accession. The scope of resolution is the entire Storage.", + "title": "Accession Id" + }, "refname": { "type": "string", "description": "Identifies an object by name within the namespace defined by attribute refcenter", @@ -278,11 +283,6 @@ "description": "The namespace of the attribute refname.", "title": "Center Namespace" }, - "accessionId": { - "type": "string", - "description": "Identifies a record by its accession. The scope of resolution is the entire Storage.", - "title": "Accession Id" - }, "identifiers": { "$ref": "#/definitions/identifiers" } @@ -341,14 +341,6 @@ } ] }, - "policyLinks": { - "type": "array", - "title": "Policy Links", - "description": "Links to resources related to this experiment or experiment set (publication, datasets, online databases). Used to encode URL links, Entrez links, and xref DB links. ", - "items": { - "$ref": "#/definitions/Links" - } - }, "dataUses": { "type": "array", "description": "Data use ontologies (DUO) related to the Policy. More information at: https://github.com/EBISPOT/DUO .", @@ -357,6 +349,14 @@ }, "title": "Data Use Ontology" }, + "policyLinks": { + "type": "array", + "title": "Policy Links", + "description": "Links to resources related to this experiment or experiment set (publication, datasets, online databases). Used to encode URL links, Entrez links, and xref DB links. ", + "items": { + "$ref": "#/definitions/Links" + } + }, "policyAttributes": { "type": "array", "title": "Policy Attributes", diff --git a/metadata_backend/helpers/schemas/ena_run.json b/metadata_backend/helpers/schemas/ena_run.json index 30af5db49..c4b234f61 100644 --- a/metadata_backend/helpers/schemas/ena_run.json +++ b/metadata_backend/helpers/schemas/ena_run.json @@ -313,6 +313,11 @@ "additionalProperties": true, "type": "object", "properties": { + "accessionId": { + "type": "string", + "description": "Identifies a record by its accession. The scope of resolution is the entire Storage.", + "title": "Accession Id" + }, "refname": { "type": "string", "description": "Identifies an object by name within the namespace defined by attribute refcenter", @@ -323,11 +328,6 @@ "description": "The namespace of the attribute refname.", "title": "Center Namespace" }, - "accessionId": { - "type": "string", - "description": "Identifies a record by its accession. The scope of resolution is the entire Storage.", - "title": "Accession Id" - }, "identifiers": { "$ref": "#/definitions/identifiers" } From 7f0ae9d8cf54fa844615136eadada4d0e036d516 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 11 Jan 2022 11:51:42 +0200 Subject: [PATCH 175/336] misses for formatting and styling suggested by https://github.com/CSCfi/metadata-submitter/pull/323#pullrequestreview-848805734 --- .github/config/.wordlist.txt | 26 +++++++++---------- README.md | 2 +- .../helpers/schemas/datacite.json | 12 ++++----- .../helpers/schemas/ena_policy.json | 2 +- metadata_backend/helpers/schemas/folders.json | 12 ++++----- 5 files changed, 27 insertions(+), 27 deletions(-) diff --git a/.github/config/.wordlist.txt b/.github/config/.wordlist.txt index cd8243bc2..49d435a9b 100644 --- a/.github/config/.wordlist.txt +++ b/.github/config/.wordlist.txt @@ -113,8 +113,8 @@ curation currentsubmissiontype customfields dac -dacAttribute -dacAttributes +dacattribute +dacattributes daclinks dacref datacite @@ -123,7 +123,7 @@ datapaper dataset datasetattribute datasetattributes -datasetIdentifiers +datasetidentifiers datasetlinks datasets datasettype @@ -135,7 +135,7 @@ datepublished datetype dbprobe dbvar -DDI +ddi ddialliance de decodings @@ -159,8 +159,8 @@ dzongkha ean eastboundlongitude ebi -EBISPOT -EDirect +ebispot +edirect ega eissn ena @@ -189,12 +189,12 @@ familyname faroese fasta fastq -FBtr +fbtr filename filetype flatfile flx -FLYBASE +flybase folderid followsreadindex formdata @@ -283,7 +283,7 @@ kwanyama kyrgyz lang leaveaspool -Lexically +lexically libraryconstructionprotocol librarydescriptor librarylayout @@ -292,7 +292,7 @@ libraryselection librarysource librarystrategy librarytype -Lifecycle +lifecycle limburgish lims lingala @@ -538,7 +538,7 @@ studyabstract studyattribute studyattributes studydescription -studyIdentifier +studyidentifier studylinks studyref studytitle @@ -576,7 +576,7 @@ twi txt ui umi -unencryptedChecksum +unencryptedchecksum uniqueitems unlocalised uri @@ -611,7 +611,7 @@ wxs xl xml xmlfile -XMLSchema +xmlschema xref xrefdb xrefid diff --git a/README.md b/README.md index 37be4d6b7..4705c787e 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,6 @@ Frontend is built and added as static files to backend while building. Metadata submission interface is released under `MIT`, see [LICENSE](LICENSE). -## contributing +## Contributing If you want to contribute to a project and make it better, your help is very welcome. For more info about how to contribute, see [CONTRIBUTING](CONTRIBUTING.md). diff --git a/metadata_backend/helpers/schemas/datacite.json b/metadata_backend/helpers/schemas/datacite.json index 7b9b4dc58..3a641f729 100644 --- a/metadata_backend/helpers/schemas/datacite.json +++ b/metadata_backend/helpers/schemas/datacite.json @@ -30,7 +30,7 @@ "affiliation": { "type": "array", "title": "Affiliations", - "description": "The organizational or institutional affiliation of the creator. Start writing the name of the organization or institution and a suggestion will be made from Research Organization Registry (ROR) Community API.", + "description": "The organizational or institutional affiliation of the creator. Upon filling the form with the organization or institution suggestion will be made from Research Organization Registry (ROR) Community API.", "items": { "type": "object", "title": "Affiliation Details", @@ -212,7 +212,7 @@ "affiliation": { "type": "array", "title": "Affiliations", - "description": "The organizational or institutional affiliation of the creator. Start writing the name of the organization or institution and a suggestion will be made from Research Organization Registry (ROR) Community API.", + "description": "The organizational or institutional affiliation of the creator. Upon filling the form with the organization or institution suggestion will be made from Research Organization Registry (ROR) Community API.", "items": { "type": "object", "title": "Affiliation Details", @@ -590,7 +590,7 @@ "Walloon", "Welsh", "Western Frisian", - "WolOf", + "Wolof", "Xhosa", "Yiddish", "Yoruba", @@ -684,17 +684,17 @@ "relatedMetadataScheme": { "type": "string", "title": "Related Metadata Scheme", - "description": "To be Use only with this relation pair: (Has Metadata/Is Metadata For). Example DDI-L" + "description": "To be used only with this relation pair: (Has Metadata/Is Metadata For). Example DDI-L" }, "schemeUri": { "type": "string", "title": "Related Metadata Scheme URI", - "description": "To be Use only with this relation pair: (Has Metadata/Is Metadata For). Example: http://www.ddialliance.org/Specification/DDI-Lifecycle/3.1/XMLSchema/instance.xsd" + "description": "To be used only with this relation pair: (Has Metadata/Is Metadata For). Example: http://www.ddialliance.org/Specification/DDI-Lifecycle/3.1/XMLSchema/instance.xsd" }, "schemeType": { "type": "string", "title": "Related Metadata Scheme Type", - "description": "To be Use only with this relation pair: (Has Metadata/Is Metadata For). Examples: XSD, DDT, Turtle" + "description": "To be used only with this relation pair: (Has Metadata/Is Metadata For). Examples: XSD, DDT, Turtle" }, "resourceTypeGeneral": { "type": "string", diff --git a/metadata_backend/helpers/schemas/ena_policy.json b/metadata_backend/helpers/schemas/ena_policy.json index 510ddb9fd..639eea014 100644 --- a/metadata_backend/helpers/schemas/ena_policy.json +++ b/metadata_backend/helpers/schemas/ena_policy.json @@ -308,7 +308,7 @@ }, "policy": { "title": "Policy", - "description": "Policies can be added either by adding the text of the Policy of by pointing to an existing URL.", + "description": "Policies can be added either by providing the text of the Policy of by pointing to an existing URL.", "oneOf": [ { "type": "object", diff --git a/metadata_backend/helpers/schemas/folders.json b/metadata_backend/helpers/schemas/folders.json index 4d364b456..a95844d62 100644 --- a/metadata_backend/helpers/schemas/folders.json +++ b/metadata_backend/helpers/schemas/folders.json @@ -71,7 +71,7 @@ "affiliation": { "type": "array", "title": "Affiliations", - "description": "The organizational or institutional affiliation of the creator. Start writing the name of the organization or institution and a suggestion will be made from Research Organization Registry (ROR) Community API.", + "description": "The organizational or institutional affiliation of the creator. Upon filling the form with the organization or institution suggestion will be made from Research Organization Registry (ROR) Community API.", "items": { "type": "object", "title": "Affiliation Details", @@ -262,7 +262,7 @@ "affiliation": { "type": "array", "title": "Affiliations", - "description": "The organizational or institutional affiliation of the creator. Start writing the name of the organization or institution and a suggestion will be made from Research Organization Registry (ROR) Community API.", + "description": "The organizational or institutional affiliation of the creator. Upon filling the form with the organization or institution suggestion will be made from Research Organization Registry (ROR) Community API.", "items": { "type": "object", "title": "Affiliation Details", @@ -658,7 +658,7 @@ "Walloon", "Welsh", "Western Frisian", - "WolOf", + "Wolof", "Xhosa", "Yiddish", "Yoruba", @@ -752,17 +752,17 @@ "relatedMetadataScheme": { "type": "string", "title": "Related Metadata Scheme", - "description": "To be Use only with this relation pair: (Has Metadata/Is Metadata For). Example DDI-L" + "description": "To be used only with this relation pair: (Has Metadata/Is Metadata For). Example DDI-L" }, "schemeUri": { "type": "string", "title": "Related Metadata Scheme URI", - "description": "To be Use only with this relation pair: (Has Metadata/Is Metadata For). Example: http://www.ddialliance.org/Specification/DDI-Lifecycle/3.1/XMLSchema/instance.xsd" + "description": "To be used only with this relation pair: (Has Metadata/Is Metadata For). Example: http://www.ddialliance.org/Specification/DDI-Lifecycle/3.1/XMLSchema/instance.xsd" }, "schemeType": { "type": "string", "title": "Related Metadata Scheme Type", - "description": "To be Use only with this relation pair: (Has Metadata/Is Metadata For). Examples: XSD, DDT, Turtle" + "description": "To be used only with this relation pair: (Has Metadata/Is Metadata For). Examples: XSD, DDT, Turtle" }, "resourceTypeGeneral": { "type": "string", From 79a4f73b1dd6ad56b013fa93d67f52814f4f557e Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 13 Jan 2022 12:31:25 +0200 Subject: [PATCH 176/336] oidcp does not allow empty values --- tests/integration/mock_auth.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integration/mock_auth.py b/tests/integration/mock_auth.py index ede29bf25..c8f91daa2 100644 --- a/tests/integration/mock_auth.py +++ b/tests/integration/mock_auth.py @@ -37,12 +37,12 @@ def generate_token() -> Tuple: ttl = 3600 exp = iat + ttl -nonce = "" +nonce = "nonce" jwk_pair = generate_token() -user_sub = "" -user_given_name = "" -user_family_name = "" +user_sub = "test@test.example" +user_given_name = "User" +user_family_name = "test" mock_auth_url_docker = getenv("OIDC_URL", "http://mockauth:8000") # called from inside docker-network mock_auth_url_local = getenv("OIDC_URL_TEST", "http://localhost:8000") # called from local machine From fc97e01537a4261665ff6de139d35cfda2c46c58 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 17 Jan 2022 09:05:15 +0000 Subject: [PATCH 177/336] Bump jsonschema from 4.3.3 to 4.4.0 Bumps [jsonschema](https://github.com/Julian/jsonschema) from 4.3.3 to 4.4.0. - [Release notes](https://github.com/Julian/jsonschema/releases) - [Changelog](https://github.com/Julian/jsonschema/blob/main/CHANGELOG.rst) - [Commits](https://github.com/Julian/jsonschema/compare/v4.3.3...v4.4.0) --- updated-dependencies: - dependency-name: jsonschema dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.txt | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index c5c78099c..d2aad5e51 100644 --- a/requirements.txt +++ b/requirements.txt @@ -43,9 +43,7 @@ idna==3.3 # via # requests # yarl -importlib-resources==5.4.0 - # via jsonschema -jsonschema==4.3.3 +jsonschema==4.4.0 # via -r requirements.in motor==2.5.1 # via -r requirements.in @@ -99,8 +97,6 @@ xmlschema==1.9.2 # via -r requirements.in yarl==1.7.2 # via aiohttp -zipp==3.6.0 - # via importlib-resources # The following packages are considered to be unsafe in a requirements file: # setuptools From 51aa3f1463782104e49af605abb1adfd942859f7 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Fri, 21 Jan 2022 13:57:02 +0000 Subject: [PATCH 178/336] Fix token endpoint, OIDC env vars and logging in mock auth Add expiration time configs to token endpoint. Add env vars OIDC_URL and OIDC_URL_TEST to mockauth dev containers as they weren't seen by mockauth. Adds logging configs for mockauth and add env var LOG_LEVEL to mockauth container. --- .env.example | 3 +++ docker-compose-tls.yml | 5 +++++ docker-compose.yml | 5 +++++ metadata_backend/api/auth.py | 11 +++++------ tests/integration/mock_auth.py | 32 +++++++++++++++++++++----------- 5 files changed, 39 insertions(+), 17 deletions(-) diff --git a/.env.example b/.env.example index 3da080cc5..c9ea0384b 100644 --- a/.env.example +++ b/.env.example @@ -2,6 +2,9 @@ AAI_CLIENT_SECRET=secret_must_be_long AAI_CLIENT_ID=aud2 OIDC_URL=http://mockauth:8000 +# change to http://mockauth:8000 if tests are run from container +OIDC_URL_TEST=http://localhost:8000 + # app urls BASE_URL=http://localhost:5430 diff --git a/docker-compose-tls.yml b/docker-compose-tls.yml index 8deaddbeb..0663066ed 100644 --- a/docker-compose-tls.yml +++ b/docker-compose-tls.yml @@ -24,6 +24,7 @@ services: - "AAI_CLIENT_SECRET=${AAI_CLIENT_SECRET}" - "AAI_CLIENT_ID=${AAI_CLIENT_ID}" - "OIDC_URL=${OIDC_URL}" + - "OIDC_URL_TEST=${OIDC_URL_TEST}" - "BASE_URL=${BASE_URL}" # Enable this for working with front-end on localhost # - "REDIRECT_URL=${REDIRECT_URL}" @@ -55,6 +56,10 @@ services: dockerfile: Dockerfile-dev context: . image: cscfi/metadata-submitter-dev + environment: + - "LOG_LEVEL=${LOG_LEVEL}" + - "OIDC_URL=${OIDC_URL}" + - "OIDC_URL_TEST=${OIDC_URL_TEST}" hostname: mockauth expose: - 8000 diff --git a/docker-compose.yml b/docker-compose.yml index 27809132b..d7a8308f3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -19,6 +19,7 @@ services: - "AAI_CLIENT_SECRET=${AAI_CLIENT_SECRET}" - "AAI_CLIENT_ID=${AAI_CLIENT_ID}" - "OIDC_URL=${OIDC_URL}" + - "OIDC_URL_TEST=${OIDC_URL_TEST}" - "BASE_URL=${BASE_URL}" # Enable this for working with front-end on localhost # - "REDIRECT_URL=${REDIRECT_URL}" @@ -49,6 +50,10 @@ services: context: . target: develop image: cscfi/metadata-submitter-dev + environment: + - "LOG_LEVEL=${LOG_LEVEL}" + - "OIDC_URL=${OIDC_URL}" + - "OIDC_URL_TEST=${OIDC_URL_TEST}" hostname: mockauth expose: - 8000 diff --git a/metadata_backend/api/auth.py b/metadata_backend/api/auth.py index 5879545d8..5ba267459 100644 --- a/metadata_backend/api/auth.py +++ b/metadata_backend/api/auth.py @@ -1,18 +1,17 @@ """Handle Access for request and OIDC workflow.""" import hashlib -import ujson +from typing import Dict, Tuple +import ujson from aiohttp import web from aiohttp.web import Request, Response -from .middlewares import decrypt_cookie, generate_cookie -from .operators import UserOperator -from oidcrp.rp_handler import RPHandler from oidcrp.exception import OidcServiceError - -from typing import Dict, Tuple +from oidcrp.rp_handler import RPHandler from ..helpers.logger import LOG +from .middlewares import decrypt_cookie, generate_cookie +from .operators import UserOperator class AccessHandler: diff --git a/tests/integration/mock_auth.py b/tests/integration/mock_auth.py index c8f91daa2..bb44af924 100644 --- a/tests/integration/mock_auth.py +++ b/tests/integration/mock_auth.py @@ -1,15 +1,22 @@ """Mock OAUTH2 aiohttp.web server.""" +import logging +import urllib from os import getenv from time import time +from typing import Tuple + from aiohttp import web +from authlib.jose import jwk, jwt +from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives import serialization from cryptography.hazmat.primitives.asymmetric import rsa -from cryptography.hazmat.backends import default_backend -from authlib.jose import jwt, jwk -from typing import Tuple -import urllib -import logging + +FORMAT = "[%(asctime)s][%(levelname)-8s](L:%(lineno)s) %(funcName)s: %(message)s" +logging.basicConfig(format=FORMAT, datefmt="%Y-%m-%d %H:%M:%S") + +LOG = logging.getLogger("server") +LOG.setLevel(getenv("LOG_LEVEL", "INFO")) def generate_token() -> Tuple: @@ -56,8 +63,7 @@ async def setmock(req: web.Request) -> web.Response: user_sub = req.query["sub"] user_family_name = req.query["family"] user_given_name = req.query["given"] - - logging.info(user_sub, user_family_name, user_given_name) + LOG.info(f"{mock_auth_url_local}: {user_sub}, {user_family_name}, {user_given_name}") return web.HTTPOk() @@ -73,7 +79,7 @@ async def auth(req: web.Request) -> web.Response: callback_url = req.query["redirect_uri"] url = f"{callback_url}?{urllib.parse.urlencode(params)}" - logging.info(url) + LOG.info(url) response = web.HTTPSeeOther(url) return response @@ -82,6 +88,10 @@ async def auth(req: web.Request) -> web.Response: async def token(req: web.Request) -> web.Response: """Auth endpoint.""" global nonce, user_sub, user_family_name, user_given_name + # oidcrp is strict about iat, exp, ttl, so we can't hard code them + iat = int(time()) + ttl = 3600 + exp = iat + ttl id_token = { "at_hash": "fSi3VUa5i2o2SgY5gPJZgg", "eduPersonAffiliation": "member;staff", @@ -110,7 +120,7 @@ async def token(req: web.Request) -> web.Response: "expires_in": ttl, } - logging.info(data) + LOG.info(data) return web.json_response(data) @@ -121,7 +131,7 @@ async def jwk_response(request: web.Request) -> web.Response: keys[0]["kid"] = "rsa1" data = {"keys": keys} - logging.info(data) + LOG.info(data) return web.json_response(data) @@ -144,7 +154,7 @@ async def userinfo(request: web.Request) -> web.Response: "email": user_sub, } - logging.info(user_info) + LOG.info(user_info) return web.json_response(user_info) From bf7ab975edeb932a30802cc1641bae0e225fa6bb Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Fri, 21 Jan 2022 14:04:59 +0000 Subject: [PATCH 179/336] Updates pre-commit hooks configs Updates use of pre-commit hooks as instructed by python pre-commit library. --- .pre-commit-config.yaml | 9 +++++++++ Dockerfile-dev | 2 -- 2 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..d2febe843 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,9 @@ +repos: +- repo: local + hooks: + - id: custom-script-file + name: custom-script-file + entry: ./scripts/pre-commit.sh + language: script + pass_filenames: false + verbose: true diff --git a/Dockerfile-dev b/Dockerfile-dev index 40dcefcf7..66aada08d 100644 --- a/Dockerfile-dev +++ b/Dockerfile-dev @@ -28,10 +28,8 @@ FROM appbase as local #======================= COPY requirements-dev.txt . -COPY ./scripts/install-hooks.sh ./scripts/install-hooks.sh RUN pip install --no-cache-dir -r requirements.txt RUN pip install --no-cache-dir -r requirements-dev.txt -RUN ./scripts/install-hooks.sh ENV PYTHONUNBUFFERED=1 From 4aacfb0a396753f98b40da401041254c6e9c2623 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Fri, 21 Jan 2022 14:22:53 +0000 Subject: [PATCH 180/336] Fix redirecting to frontend Add permanet use of REDIRECT_URL in dev backend container --- .env.example | 1 + docker-compose-tls.yml | 3 +-- docker-compose.yml | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.env.example b/.env.example index c9ea0384b..e54037e01 100644 --- a/.env.example +++ b/.env.example @@ -9,6 +9,7 @@ OIDC_URL_TEST=http://localhost:8000 # app urls BASE_URL=http://localhost:5430 # change to http://frontend:3000 if started using docker-compose for frontend +# should be commented out when running integration tests # REDIRECT_URL=http://localhost:3000 # logging diff --git a/docker-compose-tls.yml b/docker-compose-tls.yml index 0663066ed..f669dfe8a 100644 --- a/docker-compose-tls.yml +++ b/docker-compose-tls.yml @@ -26,8 +26,7 @@ services: - "OIDC_URL=${OIDC_URL}" - "OIDC_URL_TEST=${OIDC_URL_TEST}" - "BASE_URL=${BASE_URL}" - # Enable this for working with front-end on localhost - # - "REDIRECT_URL=${REDIRECT_URL}" + - "REDIRECT_URL=${REDIRECT_URL}" - "LOG_LEVEL=${LOG_LEVEL}" - "MONGO_DATABASE=${MONGO_DATABASE}" - "MONGO_AUTHDB=${MONGO_AUTHDB}" diff --git a/docker-compose.yml b/docker-compose.yml index d7a8308f3..739a11e06 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -21,8 +21,7 @@ services: - "OIDC_URL=${OIDC_URL}" - "OIDC_URL_TEST=${OIDC_URL_TEST}" - "BASE_URL=${BASE_URL}" - # Enable this for working with front-end on localhost - # - "REDIRECT_URL=${REDIRECT_URL}" + - "REDIRECT_URL=${REDIRECT_URL}" - "LOG_LEVEL=${LOG_LEVEL}" - "MONGO_DATABASE=${MONGO_DATABASE}" - "MONGO_AUTHDB=${MONGO_AUTHDB}" From 2e09778a8d96ba58abd67dee8062c60642d2297e Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Fri, 21 Jan 2022 14:29:45 +0000 Subject: [PATCH 181/336] Add logging configs to doi mock api --- tests/integration/mock_doi_api.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tests/integration/mock_doi_api.py b/tests/integration/mock_doi_api.py index 327523f65..a3e2f04f6 100644 --- a/tests/integration/mock_doi_api.py +++ b/tests/integration/mock_doi_api.py @@ -3,9 +3,16 @@ import json import logging from datetime import datetime +from os import getenv from aiohttp import web +FORMAT = "[%(asctime)s][%(levelname)-8s](L:%(lineno)s) %(funcName)s: %(message)s" +logging.basicConfig(format=FORMAT, datefmt="%Y-%m-%d %H:%M:%S") + +LOG = logging.getLogger("server") +LOG.setLevel(getenv("LOG_LEVEL", "INFO")) + async def dois(req: web.Request) -> web.Response: """DOI endpoint.""" @@ -13,14 +20,14 @@ async def dois(req: web.Request) -> web.Response: content = await req.json() except json.decoder.JSONDecodeError as e: reason = f"JSON is not correctly formatted. See: {e}" - logging.info(reason) + LOG.info(reason) raise web.HTTPBadRequest(reason=reason) try: attributes = content["data"]["attributes"] except KeyError: reason = "Provided payload did not include required attributes." - logging.info(reason) + LOG.info(reason) raise web.HTTPBadRequest(reason=reason) data = { @@ -95,11 +102,11 @@ async def dois(req: web.Request) -> web.Response: } if "doi" in attributes or "prefix" in attributes: - logging.info(data) + LOG.info(data) return web.json_response(data) else: reason = "Provided payload include faulty attributes." - logging.info(reason) + LOG.info(reason) raise web.HTTPBadRequest(reason=reason) From 303bdfc8c5f3c7e96de59ce2decb6ce1d66a7fd1 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 24 Jan 2022 08:19:19 +0200 Subject: [PATCH 182/336] don't require pipesection pipesection is the only required element of another element, this creates issue with JSON Schema rendered forms. Also it does not make sense as that section has no other elements. If one is to use pipesection to fill in information there are requirement elements that would be of help. --- metadata_backend/helpers/schemas/ena_experiment.json | 3 --- metadata_backend/helpers/schemas/ena_run.json | 3 --- 2 files changed, 6 deletions(-) diff --git a/metadata_backend/helpers/schemas/ena_experiment.json b/metadata_backend/helpers/schemas/ena_experiment.json index ed1b8bef7..796adf01e 100644 --- a/metadata_backend/helpers/schemas/ena_experiment.json +++ b/metadata_backend/helpers/schemas/ena_experiment.json @@ -561,9 +561,6 @@ "title": "Pipeline", "description": "The Pipeline type identifies the sequence or tree of actions to process the sequencing data.", "type": "object", - "required": [ - "pipeSection" - ], "properties": { "pipeSection": { "type": "array", diff --git a/metadata_backend/helpers/schemas/ena_run.json b/metadata_backend/helpers/schemas/ena_run.json index c4b234f61..99336f620 100644 --- a/metadata_backend/helpers/schemas/ena_run.json +++ b/metadata_backend/helpers/schemas/ena_run.json @@ -169,9 +169,6 @@ "title": "Pipeline", "description": "The Pipeline type identifies the sequence or tree of actions to process the sequencing data.", "type": "object", - "required": [ - "pipeSection" - ], "properties": { "pipeSection": { "type": "array", From 48a57fef41350df86690943259aadfa0c3b4aaa9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Jan 2022 09:07:34 +0000 Subject: [PATCH 183/336] Bump pre-commit from 2.16.0 to 2.17.0 Bumps [pre-commit](https://github.com/pre-commit/pre-commit) from 2.16.0 to 2.17.0. - [Release notes](https://github.com/pre-commit/pre-commit/releases) - [Changelog](https://github.com/pre-commit/pre-commit/blob/master/CHANGELOG.md) - [Commits](https://github.com/pre-commit/pre-commit/compare/v2.16.0...v2.17.0) --- updated-dependencies: - dependency-name: pre-commit dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index f352430c2..f920db2d4 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -48,7 +48,7 @@ platformdirs==2.4.0 # virtualenv pluggy==1.0.0 # via tox -pre-commit==2.16.0 +pre-commit==2.17.0 # via -r requirements-dev.in py==1.11.0 # via tox From e95edea37c393cb294d3ed993702ab39b7e5497a Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Fri, 28 Jan 2022 12:28:21 +0200 Subject: [PATCH 184/336] add projects collection, link to user and folder --- metadata_backend/api/auth.py | 82 +++++++++++++++---- metadata_backend/api/operators.py | 82 +++++++++++++++++-- metadata_backend/database/db_service.py | 14 +++- metadata_backend/helpers/schemas/folders.json | 7 +- tests/integration/mock_auth.py | 1 + tests/test_auth.py | 73 +++++++++++++++-- tests/test_handlers.py | 26 ++++-- tests/test_operators.py | 80 +++++++++++++++++- 8 files changed, 320 insertions(+), 45 deletions(-) diff --git a/metadata_backend/api/auth.py b/metadata_backend/api/auth.py index 5ba267459..4fc1f11d9 100644 --- a/metadata_backend/api/auth.py +++ b/metadata_backend/api/auth.py @@ -1,7 +1,7 @@ """Handle Access for request and OIDC workflow.""" import hashlib -from typing import Dict, Tuple +from typing import Dict, Union, List import ujson from aiohttp import web @@ -11,7 +11,7 @@ from ..helpers.logger import LOG from .middlewares import decrypt_cookie, generate_cookie -from .operators import UserOperator +from .operators import UserOperator, ProjectOperator class AccessHandler: @@ -117,6 +117,12 @@ async def callback(self, req: Request) -> Response: LOG.error(f"OIDC Callback failed with: {e}") raise web.HTTPBadRequest(reason="Invalid OIDC callback.") + # If user has no project affiliations, they will be redirected to an instructions page + if "sdSubmitProjects" not in session["userinfo"]: + LOG.error("user has no project affiliations") + response = web.HTTPSeeOther(f"{self.redirect}/noproject") + return response + response = web.HTTPSeeOther(f"{self.redirect}/home") cookie, _ = generate_cookie(req) @@ -149,22 +155,20 @@ async def callback(self, req: Request) -> Response: req.app["Session"][session_id] = {"oidc_state": params["state"], "access_token": session["token"]} req.app["Cookies"].add(session_id) - user_data: Tuple[str, str] + # User data is read from AAI /userinfo and is used to create the user model in database + user_data = { + "user_id": "", + "real_name": f"{session['userinfo']['given_name']} {session['userinfo']['family_name']}", + # projects come from AAI in this form: "project1 project2 project3" + # if user is not affiliated to any projects the `sdSubmitProjects` key will be missing + "projects": session["userinfo"]["sdSubmitProjects"].split(" "), + } if "CSCUserName" in session["userinfo"]: - user_data = ( - session["userinfo"]["CSCUserName"], - f"{session['userinfo']['given_name']} {session['userinfo']['family_name']}", - ) - if "remoteUserIdentifier" in session["userinfo"]: - user_data = ( - session["userinfo"]["remoteUserIdentifier"], - f"{session['userinfo']['given_name']} {session['userinfo']['family_name']}", - ) + user_data["user_id"] = session["userinfo"]["CSCUserName"] + elif "remoteUserIdentifier" in session["userinfo"]: + user_data["user_id"] = session["userinfo"]["remoteUserIdentifier"] elif "sub" in session["userinfo"]: - user_data = ( - session["userinfo"]["sub"], - f"{session['userinfo']['given_name']} {session['userinfo']['family_name']}", - ) + user_data["user_id"] = session["userinfo"]["sub"] else: LOG.error( "User was authenticated, but they are missing mandatory claim CSCUserName, remoteUserIdentifier or sub." @@ -172,6 +176,9 @@ async def callback(self, req: Request) -> Response: raise web.HTTPBadRequest( reason="Could not set user, missing claim CSCUserName, remoteUserIdentifier or sub." ) + + # Process project external IDs into the database and return accession IDs back to user_data + user_data["projects"] = await self._process_projects(req, user_data["projects"]) await self._set_user(req, session_id, user_data) # done like this otherwise it will not redirect properly @@ -208,7 +215,33 @@ async def logout(self, req: Request) -> Response: raise response - async def _set_user(self, req: Request, session_id: str, user_data: Tuple[str, str]) -> None: + async def _process_projects(self, req: Request, projects: List[str]) -> List[Dict[str, str]]: + """Process project external IDs to internal accession IDs by getting IDs\ + from database and creating projects that are missing. + + :raises: HTTPBadRequest in failed to add project to database + :param req: A HTTP request instance + :param projects: A list of project external IDs + :returns: A list of objects containing project accession IDs and project numbers + """ + projects.sort() # sort project numbers to be increasing in order + new_project_ids: List[Dict[str, str]] = [] + + db_client = req.app["db_client"] + operator = ProjectOperator(db_client) + for project in projects: + project_id = await operator.create_project(project) + project_data = { + "projectId": project_id, # internal ID + "projectNumber": project, # human friendly + } + new_project_ids.append(project_data) + + return new_project_ids + + async def _set_user( + self, req: Request, session_id: str, user_data: Dict[str, Union[List[Dict[str, str]], str]] + ) -> None: """Set user in current session and return user id based on result of create_user. :raises: HTTPBadRequest in could not get user info from AAI OIDC @@ -219,5 +252,20 @@ async def _set_user(self, req: Request, session_id: str, user_data: Tuple[str, s db_client = req.app["db_client"] operator = UserOperator(db_client) + + # Create user user_id = await operator.create_user(user_data) + + # Check if user's projects have changed + old_user = await operator.read_user(user_id) + if old_user["projects"] != user_data["projects"]: + update_operation = [ + { + "op": "replace", + "path": "/projects", + "value": user_data["projects"], + } + ] + user_id = await operator.update_user(user_id, update_operation) + req.app["Session"][session_id]["user_info"] = user_id diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 669e8551c..50b1fbe0b 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -888,7 +888,7 @@ async def check_user_has_doc(self, collection: str, user_id: str, accession_id: LOG.info(f"found doc {accession_id} at current user") return True - async def create_user(self, data: Tuple) -> str: + async def create_user(self, data: Dict[str, Union[list, str]]) -> str: """Create new user object to database. :param data: User Data to identify user @@ -897,19 +897,18 @@ async def create_user(self, data: Tuple) -> str: """ user_data: Dict[str, Union[list, str]] = dict() - external_id = data[0] # this also can be sub key - name = data[1] try: - existing_user_id = await self.db_service.exists_user_by_external_id(external_id, name) + existing_user_id = await self.db_service.exists_user_by_external_id(data["user_id"], data["real_name"]) if existing_user_id: - LOG.info(f"User with identifier: {external_id} exists, no need to create.") + LOG.info(f"User with identifier: {data['user_id']} exists, no need to create.") return existing_user_id else: + user_data["projects"] = data["projects"] user_data["templates"] = [] user_data["folders"] = [] user_data["userId"] = user_id = self._generate_user_id() - user_data["name"] = name - user_data["externalId"] = external_id + user_data["name"] = data["real_name"] + user_data["externalId"] = data["user_id"] JSONValidator(user_data, "users") insert_success = await self.db_service.create("user", user_data) if not insert_success: @@ -1100,3 +1099,72 @@ def _generate_user_id(self) -> str: sequence = uuid4().hex LOG.debug("Generated user ID.") return sequence + + +class ProjectOperator: + """Operator class for handling database operations of project groups. + + Operations are implemented with JSON format. + """ + + def __init__(self, db_client: AsyncIOMotorClient) -> None: + """Init db_service. + + :param db_client: Motor client used for database connections. Should be + running on same loop with aiohttp, so needs to be passed from aiohttp + Application. + """ + self.db_service = DBService(mongo_database, db_client) + + async def create_project(self, project_number: str) -> str: + """Create new object project to database. + + :param project_numer: project external ID received from AAI + :raises: HTTPBadRequest if error occurs during the process of insert + :returns: Project id for the project inserted to database + """ + project_data: Dict[str, str] = dict() + + try: + existing_project_id = await self.db_service.exists_project_by_external_id(project_number) + if existing_project_id: + LOG.info(f"Project with external ID: {project_number} exists, no need to create.") + return existing_project_id + else: + project_id = self._generate_project_id() + project_data["projectId"] = project_id + project_data["externalId"] = project_number + insert_success = await self.db_service.create("project", project_data) + if not insert_success: + reason = "Inserting project to database failed for some reason." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + else: + LOG.info(f"Inserting project with id {project_id} to database succeeded.") + return project_id + except (ConnectionFailure, OperationFailure) as error: + reason = f"Error happened while inserting project: {error}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + async def _check_project_exists(self, project_id: str) -> None: + """Check the existence of a project by its id in the database. + + :param project_id: Identifier of project to find. + :raises: HTTPNotFound if project does not exist + :returns: None + """ + exists = await self.db_service.exists("project", project_id) + if not exists: + reason = f"Project with id {project_id} was not found." + LOG.error(reason) + raise web.HTTPNotFound(reason=reason) + + def _generate_project_id(self) -> str: + """Generate random project id. + + :returns: str with project id + """ + sequence = uuid4().hex + LOG.debug("Generated project ID.") + return sequence diff --git a/metadata_backend/database/db_service.py b/metadata_backend/database/db_service.py index 7e0f9977b..2c2c9430d 100644 --- a/metadata_backend/database/db_service.py +++ b/metadata_backend/database/db_service.py @@ -91,12 +91,24 @@ async def exists(self, collection: str, accession_id: str) -> bool: LOG.debug(f"DB check exists for {accession_id} in collection {collection}.") return True if exists else False + @auto_reconnect + async def exists_project_by_external_id(self, external_id: str) -> Union[None, str]: + """Check project exists by its external id. + + :param external_id: project external id + :returns: Id if exists and None if it does not + """ + find_by_id = {"externalId": external_id} + project = await self.database["project"].find_one(find_by_id, {"_id": False, "externalId": False}) + LOG.debug(f"DB check project exists for {external_id} returned {project}.") + return project["projectId"] if project else None + @auto_reconnect async def exists_user_by_external_id(self, external_id: str, name: str) -> Union[None, str]: """Check user exists by its eppn. :param eppn: eduPersonPrincipalName to be searched - :returns: True if exists and False if it does not + :returns: Id if exists and None if it does not """ find_by_id = {"externalId": external_id, "name": name} user = await self.database["user"].find_one(find_by_id, {"_id": False, "externalId": False}) diff --git a/metadata_backend/helpers/schemas/folders.json b/metadata_backend/helpers/schemas/folders.json index a95844d62..923a2dda4 100644 --- a/metadata_backend/helpers/schemas/folders.json +++ b/metadata_backend/helpers/schemas/folders.json @@ -3,9 +3,14 @@ "title": "Folder schema containing submitted metadata objects", "required": [ "name", - "description" + "description", + "projectId" ], "properties": { + "projectId": { + "type": "string", + "title": "Owner project of the folder" + }, "folderId": { "type": "string", "title": "Folder Id" diff --git a/tests/integration/mock_auth.py b/tests/integration/mock_auth.py index bb44af924..be1edfd3e 100644 --- a/tests/integration/mock_auth.py +++ b/tests/integration/mock_auth.py @@ -152,6 +152,7 @@ async def userinfo(request: web.Request) -> web.Response: "schacHomeOrganization": "test.what", "family_name": user_family_name, "email": user_sub, + "sdSubmitProjects": "1000 2000 3000", } LOG.info(user_info) diff --git a/tests/test_auth.py b/tests/test_auth.py index 2ba1e846f..182a5d1c5 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -91,7 +91,7 @@ def tearDown(self): """Cleanup mocked stuff.""" pass - async def test_set_user(self): + async def test_set_user_no_update(self): """Test set user success.""" request = get_request_with_fernet() session_id = "session_id" @@ -103,10 +103,53 @@ async def test_set_user(self): "sub": "user@test.fi", "given_name": "User", "family_name": "Test", + "projects": { + "projectId": "internal_1000", + "projectNumber": "1000", + }, + } + old_user_data = { + "projects": { + "projectId": "internal_1000", + "projectNumber": "1000", + } + } + + with patch("metadata_backend.api.operators.UserOperator.create_user", return_value=new_user_id): + with patch("metadata_backend.api.operators.UserOperator.read_user", return_value=old_user_data): + await self.AccessHandler._set_user(request, session_id, user_data) + + self.assertIn("user_info", request.app["Session"][session_id]) + self.assertEqual(new_user_id, request.app["Session"][session_id]["user_info"]) + + async def test_set_user_with_update(self): + """Test set user success.""" + request = get_request_with_fernet() + session_id = "session_id" + new_user_id = "USR12345" + + request.app["db_client"] = MagicMock() + request.app["Session"] = {session_id: {}} + user_data = { + "sub": "user@test.fi", + "given_name": "User", + "family_name": "Test", + "projects": { + "projectId": "internal_1000", + "projectNumber": "1000", + }, + } + old_user_data = { + "projects": { + "projectId": "internal_2000", + "projectNumber": "2000", + } } with patch("metadata_backend.api.operators.UserOperator.create_user", return_value=new_user_id): - await self.AccessHandler._set_user(request, session_id, user_data) + with patch("metadata_backend.api.operators.UserOperator.read_user", return_value=old_user_data): + with patch("metadata_backend.api.operators.UserOperator.update_user", return_value=new_user_id): + await self.AccessHandler._set_user(request, session_id, user_data) self.assertIn("user_info", request.app["Session"][session_id]) self.assertEqual(new_user_id, request.app["Session"][session_id]["user_info"]) @@ -133,11 +176,18 @@ async def test_callback_pass(self): request.query["code"] = "code" session = {"iss": "http://auth.domain.com:5430", "auth_request": {}} - finalize = {"token": "token", "userinfo": {"sub": "user", "given_name": "name", "family_name": "name"}} + finalize = { + "token": "token", + "userinfo": {"sub": "user", "given_name": "name", "family_name": "name", "sdSubmitProjects": "1000"}, + } with patch("oidcrp.rp_handler.RPHandler.get_session_information", return_value=session): with patch("oidcrp.rp_handler.RPHandler.finalize", return_value=finalize): - with patch("metadata_backend.api.auth.AccessHandler._set_user", return_value=None): - await self.AccessHandler.callback(request) + with patch( + "metadata_backend.api.auth.AccessHandler._process_projects", + return_value=[{"projectId": "internal_1000", "projectNumber": "1000"}], + ): + with patch("metadata_backend.api.auth.AccessHandler._set_user", return_value=None): + await self.AccessHandler.callback(request) async def test_callback_missing_claim(self): """Test callback missing claim validation.""" @@ -146,7 +196,10 @@ async def test_callback_missing_claim(self): request.query["code"] = "code" session = {"iss": "http://auth.domain.com:5430", "auth_request": {}} - finalize = {"token": "token", "userinfo": {}} + finalize = { + "token": "token", + "userinfo": {"given_name": "some", "family_name": "one", "sdSubmitProjects": "1000"}, + } with patch("oidcrp.rp_handler.RPHandler.get_session_information", return_value=session): with patch("oidcrp.rp_handler.RPHandler.finalize", return_value=finalize): with self.assertRaises(HTTPBadRequest): @@ -187,3 +240,11 @@ async def test_callback_missing_code(self): with self.assertRaises(HTTPBadRequest): await self.AccessHandler.callback(request) + + async def test_process_projects(self): + """Test that process projects returns accession IDs.""" + request = get_request_with_fernet() + request.app["db_client"] = MagicMock() + with patch("metadata_backend.api.operators.ProjectOperator.create_project", return_value="accession_id"): + processed_projects = await self.AccessHandler._process_projects(request, ["1000"]) + self.assertEqual(processed_projects, [{"projectId": "accession_id", "projectNumber": "1000"}]) diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 2156a586b..3eb3c4f95 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -815,21 +815,29 @@ async def tearDownAsync(self): async def test_folder_creation_works(self): """Test that folder is created and folder ID returned.""" - json_req = {"name": "test", "description": "test folder"} + json_req = {"name": "test", "description": "test folder", "projectId": "1000"} response = await self.client.post("/folders", json=json_req) json_resp = await response.json() self.MockedFolderOperator().create_folder.assert_called_once() self.assertEqual(response.status, 201) self.assertEqual(json_resp["folderId"], self.folder_id) - async def test_folder_creation_with_missing_data_fails(self): - """Test that folder creation fails when missing data in request.""" - json_req = {"description": "test folder"} + async def test_folder_creation_with_missing_name_fails(self): + """Test that folder creation fails when missing name in request.""" + json_req = {"description": "test folder", "projectId": "1000"} response = await self.client.post("/folders", json=json_req) json_resp = await response.json() self.assertEqual(response.status, 400) self.assertIn("'name' is a required property", json_resp["detail"]) + async def test_folder_creation_with_missing_project_fails(self): + """Test that folder creation fails when missing project in request.""" + json_req = {"description": "test folder", "name": "name"} + response = await self.client.post("/folders", json=json_req) + json_resp = await response.json() + self.assertEqual(response.status, 400) + self.assertIn("'projectId' is a required property", json_resp["detail"]) + async def test_folder_creation_with_empty_body_fails(self): """Test that folder creation fails when no data in request.""" response = await self.client.post("/folders") @@ -840,7 +848,7 @@ async def test_folder_creation_with_empty_body_fails(self): async def test_get_folders_with_1_folder(self): """Test get_folders() endpoint returns list with 1 folder.""" self.MockedFolderOperator().query_folders.return_value = (self.test_folder, 1) - response = await self.client.get("/folders") + response = await self.client.get("/folders?projectId=1000") self.MockedFolderOperator().query_folders.assert_called_once() self.assertEqual(response.status, 200) result = { @@ -857,7 +865,7 @@ async def test_get_folders_with_1_folder(self): async def test_get_folders_with_no_folders(self): """Test get_folders() endpoint returns empty list.""" self.MockedFolderOperator().query_folders.return_value = ([], 0) - response = await self.client.get("/folders") + response = await self.client.get("/folders?projectId=1000") self.MockedFolderOperator().query_folders.assert_called_once() self.assertEqual(response.status, 200) result = { @@ -873,17 +881,17 @@ async def test_get_folders_with_no_folders(self): async def test_get_folders_with_bad_params(self): """Test get_folders() with faulty pagination parameters.""" - response = await self.client.get("/folders?page=ayylmao") + response = await self.client.get("/folders?page=ayylmao&projectId=1000") self.assertEqual(response.status, 400) resp = await response.json() self.assertEqual(resp["detail"], "page parameter must be a number, now it is ayylmao") - response = await self.client.get("/folders?page=1&per_page=-100") + response = await self.client.get("/folders?page=1&per_page=-100&projectId=1000") self.assertEqual(response.status, 400) resp = await response.json() self.assertEqual(resp["detail"], "per_page parameter must be over 0") - response = await self.client.get("/folders?published=yes") + response = await self.client.get("/folders?published=yes&projectId=1000") self.assertEqual(response.status, 400) resp = await response.json() self.assertEqual(resp["detail"], "'published' parameter must be either 'true' or 'false'") diff --git a/tests/test_operators.py b/tests/test_operators.py index 20ca13851..ab1297770 100644 --- a/tests/test_operators.py +++ b/tests/test_operators.py @@ -16,6 +16,7 @@ Operator, XMLOperator, UserOperator, + ProjectOperator, ) @@ -74,6 +75,8 @@ def setUp(self): other patches and mocks for tests. """ self.client = MagicMock() + self.project_id = "project_1000" + self.project_generated_id = "64fbdce1c69b436e8d6c91fd746064d4" self.accession_id = uuid4().hex self.folder_id = uuid4().hex self.test_folder = { @@ -112,6 +115,12 @@ def setUp(self): autospec=True, ) self.patch_user.start() + self.patch_project = patch( + ("metadata_backend.api.operators.ProjectOperator._generate_project_id"), + return_value=self.project_generated_id, + autospec=True, + ) + self.patch_project.start() def tearDown(self): """Stop patchers.""" @@ -119,6 +128,7 @@ def tearDown(self): self.patch_accession.stop() self.patch_folder.stop() self.patch_user.stop() + self.patch_project.stop() async def test_reading_metadata_works(self): """Test JSON is read from db correctly.""" @@ -774,6 +784,13 @@ async def test_folder_object_remove_fails(self): with self.assertRaises(HTTPBadRequest): await operator.remove_object(self.test_folder, "study", self.accession_id) + async def test_check_folder_exists_passes(self): + """Test fails exists passes.""" + operator = FolderOperator(self.client) + operator.db_service.exists.return_value = True + await operator.check_folder_exists(self.folder_id) + operator.db_service.exists.assert_called_once() + async def test_check_folder_exists_fails(self): """Test fails exists fails.""" operator = FolderOperator(self.client) @@ -809,7 +826,7 @@ async def test_delete_folder_fails(self): async def test_create_user_works_and_returns_userId(self): """Test create method for users work.""" operator = UserOperator(self.client) - data = "externalId", "name" + data = {"user_id": "externalId", "real_name": "name", "projects": ""} operator.db_service.exists_user_by_external_id.return_value = None operator.db_service.create.return_value = True user = await operator.create_user(data) @@ -819,7 +836,7 @@ async def test_create_user_works_and_returns_userId(self): async def test_create_user_on_create_fails(self): """Test create method fails on db create.""" operator = UserOperator(self.client) - data = "externalId", "name" + data = {"user_id": "externalId", "real_name": "name", "projects": ""} operator.db_service.exists_user_by_external_id.return_value = None operator.db_service.create.return_value = False with self.assertRaises(HTTPBadRequest): @@ -866,7 +883,7 @@ async def test_check_user_doc_no_data(self): async def test_create_user_works_existing_userId(self): """Test create method for existing user.""" operator = UserOperator(self.client) - data = "eppn", "name" + data = {"user_id": "eppn", "real_name": "name", "projects": ""} operator.db_service.exists_user_by_external_id.return_value = self.user_generated_id user = await operator.create_user(data) operator.db_service.create.assert_not_called() @@ -874,7 +891,7 @@ async def test_create_user_works_existing_userId(self): async def test_create_user_fails(self): """Test create user fails.""" - data = "eppn", "name" + data = {"user_id": "eppn", "real_name": "name", "projects": ""} operator = UserOperator(self.client) operator.db_service.exists_user_by_external_id.side_effect = ConnectionFailure with self.assertRaises(HTTPBadRequest): @@ -897,6 +914,13 @@ async def test_read_user_fails(self): with self.assertRaises(HTTPBadRequest): await operator.read_user(self.user_id) + async def test_check_user_exists_passes(self): + """Test user exists passes.""" + operator = UserOperator(self.client) + operator.db_service.exists.return_value = True + await operator._check_user_exists(self.user_id) + operator.db_service.exists.assert_called_once() + async def test_check_user_exists_fails(self): """Test user exists fails.""" operator = UserOperator(self.client) @@ -1002,6 +1026,54 @@ async def test_user_objects_assing_fails(self): with self.assertRaises(HTTPBadRequest): await operator.assign_objects(self.user_generated_id, "study", []) + async def test_create_project_works_and_returns_projectId(self): + """Test create method for projects work.""" + operator = ProjectOperator(self.client) + operator.db_service.exists_project_by_external_id.return_value = None + operator.db_service.create.return_value = True + project = await operator.create_project(self.project_id) + operator.db_service.create.assert_called_once() + self.assertEqual(project, self.project_generated_id) + + async def test_create_project_works_existing_projectId(self): + """Test create method for existing user.""" + operator = ProjectOperator(self.client) + operator.db_service.exists_project_by_external_id.return_value = self.project_generated_id + project = await operator.create_project(self.project_id) + operator.db_service.create.assert_not_called() + self.assertEqual(project, self.project_generated_id) + + async def test_create_project_on_create_fails(self): + """Test create method fails on db create.""" + operator = ProjectOperator(self.client) + operator.db_service.exists_project_by_external_id.return_value = None + operator.db_service.create.return_value = False + with self.assertRaises(HTTPBadRequest): + await operator.create_project(self.project_id) + operator.db_service.create.assert_called_once() + + async def test_create_project_fails(self): + """Test create project fails.""" + operator = ProjectOperator(self.client) + operator.db_service.exists_project_by_external_id.side_effect = ConnectionFailure + with self.assertRaises(HTTPBadRequest): + await operator.create_project(self.project_id) + + async def test_check_project_exists_fails(self): + """Test project exists fails.""" + operator = ProjectOperator(self.client) + operator.db_service.exists.return_value = False + with self.assertRaises(HTTPNotFound): + await operator._check_project_exists(self.project_id) + operator.db_service.exists.assert_called_once() + + async def test_check_project_exists_passes(self): + """Test project exists passes.""" + operator = ProjectOperator(self.client) + operator.db_service.exists.return_value = True + await operator._check_project_exists(self.project_id) + operator.db_service.exists.assert_called_once() + if __name__ == "__main__": unittest.main() From eba22a2d2e0dbdfe5f31fea23ae4f769afe2ab25 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 31 Jan 2022 09:04:15 +0000 Subject: [PATCH 185/336] Bump rojopolis/spellcheck-github-actions from 0.20.0 to 0.21.1 Bumps [rojopolis/spellcheck-github-actions](https://github.com/rojopolis/spellcheck-github-actions) from 0.20.0 to 0.21.1. - [Release notes](https://github.com/rojopolis/spellcheck-github-actions/releases) - [Changelog](https://github.com/rojopolis/spellcheck-github-actions/blob/master/CHANGELOG.md) - [Commits](https://github.com/rojopolis/spellcheck-github-actions/compare/0.20.0...0.21.1) --- updated-dependencies: - dependency-name: rojopolis/spellcheck-github-actions dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index ab9ae0031..97f858c21 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -13,7 +13,7 @@ jobs: steps: - uses: actions/checkout@master - - uses: rojopolis/spellcheck-github-actions@0.20.0 + - uses: rojopolis/spellcheck-github-actions@0.21.1 name: Spellcheck with: config_path: .github/config/.spellcheck.yml From cd731162ada70d1305dd2e54a3e21ff0a9712097 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 31 Jan 2022 09:04:50 +0000 Subject: [PATCH 186/336] Bump black from 21.12b0 to 22.1.0 Bumps [black](https://github.com/psf/black) from 21.12b0 to 22.1.0. - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/commits/22.1.0) --- updated-dependencies: - dependency-name: black dependency-type: direct:development ... Signed-off-by: dependabot[bot] --- requirements-dev.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index f920db2d4..ebc3f6fa8 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -8,7 +8,7 @@ aiofiles==0.8.0 # via -r requirements-dev.in backports.entry-points-selectable==1.1.1 # via virtualenv -black==21.12b0 +black==22.1.0 # via -r requirements-dev.in certifi==2021.10.8 # via -r requirements-dev.in @@ -74,8 +74,6 @@ tomli==1.2.2 # pep517 tox==3.24.5 # via -r requirements-dev.in -typing-extensions==4.0.0 - # via black virtualenv==20.10.0 # via # pre-commit From 607568351d3af480b149f686a438260d2294327f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 31 Jan 2022 09:04:55 +0000 Subject: [PATCH 187/336] Bump coverage from 6.2 to 6.3 Bumps [coverage](https://github.com/nedbat/coveragepy) from 6.2 to 6.3. - [Release notes](https://github.com/nedbat/coveragepy/releases) - [Changelog](https://github.com/nedbat/coveragepy/blob/master/CHANGES.rst) - [Commits](https://github.com/nedbat/coveragepy/compare/6.2...6.3) --- updated-dependencies: - dependency-name: coverage dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 79fc88bde..dcf9b76fb 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ packages=find_packages(exclude=["tests"]), install_requires=requirements, extras_require={ - "test": ["coverage==6.2", "pytest==6.2.5", "pytest-cov==3.0.0", "tox==3.24.5"], + "test": ["coverage==6.3", "pytest==6.2.5", "pytest-cov==3.0.0", "tox==3.24.5"], "docs": ["sphinx >= 1.4", "sphinx_rtd_theme==1.0.0"], }, package_data={ From a3f3d8f0750e5613cd7617b377c16362f33b2909 Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Wed, 2 Feb 2022 11:00:12 +0200 Subject: [PATCH 188/336] refactor ownership checking --- metadata_backend/api/handlers/folder.py | 44 +++-- metadata_backend/api/handlers/object.py | 8 +- metadata_backend/api/handlers/restapi.py | 39 +++-- metadata_backend/api/handlers/template.py | 49 ++++-- metadata_backend/api/operators.py | 115 +++++++++++-- metadata_backend/database/db_service.py | 2 +- tests/test_handlers.py | 18 +- tests/test_operators.py | 195 ++++++++++++++++++---- 8 files changed, 368 insertions(+), 102 deletions(-) diff --git a/metadata_backend/api/handlers/folder.py b/metadata_backend/api/handlers/folder.py index cc5910fc9..b60a105c8 100644 --- a/metadata_backend/api/handlers/folder.py +++ b/metadata_backend/api/handlers/folder.py @@ -16,7 +16,7 @@ from ...helpers.validator import JSONValidator from .restapi import RESTAPIHandler from ..middlewares import get_session -from ..operators import FolderOperator, Operator, UserOperator +from ..operators import FolderOperator, Operator, UserOperator, ProjectOperator class FolderAPIHandler(RESTAPIHandler): @@ -83,14 +83,20 @@ async def get_folders(self, req: Request) -> Response: """ page = self._get_page_param(req, "page", 1) per_page = self._get_page_param(req, "per_page", 5) + project_id = self._get_param(req, "projectId") sort = {"date": True, "score": False} db_client = req.app["db_client"] user_operator = UserOperator(db_client) current_user = get_session(req)["user_info"] user = await user_operator.read_user(current_user) + user_has_project = await user_operator.check_user_has_project(project_id, user["userId"]) + if not user_has_project: + reason = f"user {user['userId']} is not affiliated with project {project_id}" + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) - folder_query = {"folderId": {"$in": user["folders"]}} + folder_query = {"projectId": project_id} # Check if only published or draft folders are requestsed if "published" in req.query: pub_param = req.query.get("published", "").title() @@ -104,7 +110,7 @@ async def get_folders(self, req: Request) -> Response: if "name" in req.query: name_param = req.query.get("name", "") if name_param: - folder_query = {"$text": {"$search": name_param}} + folder_query["$text"] = {"$search": name_param} sort["score"] = True sort["date"] = False @@ -169,12 +175,24 @@ async def post_folder(self, req: Request) -> Response: JSONValidator(content, "folders").validate - operator = FolderOperator(db_client) - folder = await operator.create_folder(content) + # Check that project exists + project_op = ProjectOperator(db_client) + await project_op._check_project_exists(content["projectId"]) + # Check that user is affiliated with project user_op = UserOperator(db_client) current_user = get_session(req)["user_info"] - await user_op.assign_objects(current_user, "folders", [folder]) + user = await user_op.read_user(current_user) + user_has_project = await user_op.check_user_has_project(content["projectId"], user["userId"]) + if not user_has_project: + reason = f"user {user['userId']} is not affiliated with project {content['projectId']}" + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + + operator = FolderOperator(db_client) + folder = await operator.create_folder(content) + + # await user_op.assign_objects(current_user, "folders", [folder]) body = ujson.dumps({"folderId": folder}, escape_forward_slashes=False) @@ -196,7 +214,7 @@ async def get_folder(self, req: Request) -> Response: await operator.check_folder_exists(folder_id) - await self._handle_check_ownedby_user(req, "folders", folder_id) + await self._handle_check_ownership(req, "folders", folder_id) folder = await operator.read_folder(folder_id) @@ -229,7 +247,7 @@ async def patch_folder(self, req: Request) -> Response: curr_folder["doiInfo"] = op["value"] JSONValidator(curr_folder, "folders").validate - await self._handle_check_ownedby_user(req, "folders", folder_id) + await self._handle_check_ownership(req, "folders", folder_id) upd_folder = await operator.update_folder(folder_id, patch_ops if isinstance(patch_ops, list) else [patch_ops]) @@ -249,7 +267,7 @@ async def publish_folder(self, req: Request) -> Response: await operator.check_folder_exists(folder_id) - await self._handle_check_ownedby_user(req, "folders", folder_id) + await self._handle_check_ownership(req, "folders", folder_id) folder = await operator.read_folder(folder_id) @@ -303,7 +321,7 @@ async def delete_folder(self, req: Request) -> Response: await operator.check_folder_exists(folder_id) await operator.check_folder_published(folder_id) - await self._handle_check_ownedby_user(req, "folders", folder_id) + await self._handle_check_ownership(req, "folders", folder_id) obj_ops = Operator(db_client) @@ -314,9 +332,9 @@ async def delete_folder(self, req: Request) -> Response: _folder_id = await operator.delete_folder(folder_id) - user_op = UserOperator(db_client) - current_user = get_session(req)["user_info"] - await user_op.remove_objects(current_user, "folders", [folder_id]) + # user_op = UserOperator(db_client) + # current_user = get_session(req)["user_info"] + # await user_op.remove_objects(current_user, "folders", [folder_id]) LOG.info(f"DELETE folder with ID {_folder_id} was successful.") return web.Response(status=204) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 506de11c5..da307c936 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -82,7 +82,7 @@ async def get_object(self, req: Request) -> Response: await operator.check_exists(collection, accession_id) - await self._handle_check_ownedby_user(req, collection, accession_id) + await self._handle_check_ownership(req, collection, accession_id) data, content_type = await operator.read_metadata_object(type_collection, accession_id) @@ -183,7 +183,7 @@ async def delete_object(self, req: Request) -> Response: await Operator(db_client).check_exists(collection, accession_id) - await self._handle_check_ownedby_user(req, collection, accession_id) + await self._handle_check_ownership(req, collection, accession_id) folder_op = FolderOperator(db_client) exists, folder_id, published = await folder_op.check_object_in_folder(collection, accession_id) @@ -235,7 +235,7 @@ async def put_object(self, req: Request) -> Response: await operator.check_exists(collection, accession_id) - await self._handle_check_ownedby_user(req, collection, accession_id) + await self._handle_check_ownership(req, collection, accession_id) accession_id = await operator.replace_metadata_object(collection, accession_id, content) @@ -268,7 +268,7 @@ async def patch_object(self, req: Request) -> Response: await operator.check_exists(collection, accession_id) - await self._handle_check_ownedby_user(req, collection, accession_id) + await self._handle_check_ownership(req, collection, accession_id) folder_op = FolderOperator(db_client) exists, _, published = await folder_op.check_object_in_folder(collection, accession_id) diff --git a/metadata_backend/api/handlers/restapi.py b/metadata_backend/api/handlers/restapi.py index 943d58742..71e3a48bc 100644 --- a/metadata_backend/api/handlers/restapi.py +++ b/metadata_backend/api/handlers/restapi.py @@ -50,12 +50,25 @@ def _get_page_param(self, req: Request, name: str, default: int) -> int: raise web.HTTPBadRequest(reason=reason) return param - async def _handle_check_ownedby_user(self, req: Request, collection: str, accession_id: str) -> bool: - """Check if object belongs to user. + def _get_param(self, req: Request, name: str) -> str: + """Extract mandatory query parameter from URL. + + :param req: GET Request + :param name: name of query param to get + :returns: project ID parameter value + """ + param = req.query.get(name, "") + if param == "": + reason = f"mandatory query parameter {name} is not set" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + return param + + async def _handle_check_ownership(self, req: Request, collection: str, accession_id: str) -> bool: + """Check if object belongs to project. For this we need to check the object is in exactly 1 folder and we need to check - that folder belongs to a user. If the folder is published that means it can be - browsed by other users as well. + that folder belongs to a project. :param req: HTTP request :param collection: collection or schema of document @@ -71,24 +84,24 @@ async def _handle_check_ownedby_user(self, req: Request, collection: str, access if collection != "folders": folder_op = FolderOperator(db_client) - check, folder_id, published = await folder_op.check_object_in_folder(collection, accession_id) - if published: - _check = True - elif check: + check, folder_id, _ = await folder_op.check_object_in_folder(collection, accession_id) + # if published: + # _check = True + if check: # if the draft object is found in folder we just need to check if the folder belongs to user - _check = await user_op.check_user_has_doc("folders", current_user, folder_id) + _check = await user_op.check_user_has_doc(req, "folders", current_user, folder_id) elif collection.startswith("template"): # if collection is template but not found in a folder # we also check if object is in templates of the user # they will be here if they will not be deleted after publish - _check = await user_op.check_user_has_doc(collection, current_user, accession_id) + _check = await user_op.check_user_has_doc(req, collection, current_user, accession_id) else: _check = False else: - _check = await user_op.check_user_has_doc(collection, current_user, accession_id) + _check = await user_op.check_user_has_doc(req, collection, current_user, accession_id) if not _check: - reason = f"The ID: {accession_id} does not belong to current user." + reason = f"{collection} {accession_id}." LOG.error(reason) raise web.HTTPUnauthorized(reason=reason) @@ -143,7 +156,7 @@ async def _filter_by_user(self, req: Request, collection: str, seq: List) -> Asy :returns: AsyncGenerator """ for el in seq: - if await self._handle_check_ownedby_user(req, collection, el["accessionId"]): + if await self._handle_check_ownership(req, collection, el["accessionId"]): yield el async def _get_data(self, req: Request) -> Dict: diff --git a/metadata_backend/api/handlers/template.py b/metadata_backend/api/handlers/template.py index c2bccc2cc..784745c04 100644 --- a/metadata_backend/api/handlers/template.py +++ b/metadata_backend/api/handlers/template.py @@ -8,7 +8,7 @@ from ...helpers.logger import LOG from ..middlewares import get_session -from ..operators import Operator, UserOperator, XMLOperator +from ..operators import Operator, ProjectOperator, UserOperator, XMLOperator from .restapi import RESTAPIHandler @@ -33,7 +33,7 @@ async def get_template(self, req: Request) -> Response: await operator.check_exists(collection, accession_id) - await self._handle_check_ownedby_user(req, collection, accession_id) + await self._handle_check_ownership(req, collection, accession_id) data, content_type = await operator.read_metadata_object(collection, accession_id) @@ -57,8 +57,25 @@ async def post_template(self, req: Request) -> Response: db_client = req.app["db_client"] content = await self._get_data(req) + # No schema validation, so must check that project is set + if "projectId" not in content: + reason = "projectId is a mandatory POST key" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + # Check that project exists + project_op = ProjectOperator(db_client) + await project_op._check_project_exists(content["projectId"]) + + # Check that user is affiliated with project user_op = UserOperator(db_client) current_user = get_session(req)["user_info"] + user = await user_op.read_user(current_user) + user_has_project = await user_op.check_user_has_project(content["projectId"], user["userId"]) + if not user_has_project: + reason = f"user {user['userId']} is not affiliated with project {content['projectId']}" + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) operator = Operator(db_client) @@ -73,7 +90,7 @@ async def post_template(self, req: Request) -> Response: data = [{"accessionId": accession_id, "schema": collection}] if "tags" in tmpl: data[0]["tags"] = tmpl["tags"] - await user_op.assign_objects(current_user, "templates", data) + # await user_op.assign_objects(current_user, "templates", data) tmpl_list.append({"accessionId": accession_id}) body = ujson.dumps(tmpl_list, escape_forward_slashes=False) @@ -86,7 +103,7 @@ async def post_template(self, req: Request) -> Response: data = [{"accessionId": accession_id, "schema": collection}] if "tags" in content: data[0]["tags"] = content["tags"] - await user_op.assign_objects(current_user, "templates", data) + # await user_op.assign_objects(current_user, "templates", data) body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) @@ -120,7 +137,7 @@ async def patch_template(self, req: Request) -> Response: await operator.check_exists(collection, accession_id) - await self._handle_check_ownedby_user(req, collection, accession_id) + await self._handle_check_ownership(req, collection, accession_id) accession_id = await operator.update_metadata_object(collection, accession_id, content) @@ -145,17 +162,17 @@ async def delete_template(self, req: Request) -> Response: await Operator(db_client).check_exists(collection, accession_id) - await self._handle_check_ownedby_user(req, collection, accession_id) - - user_op = UserOperator(db_client) - current_user = get_session(req)["user_info"] - check_user = await user_op.check_user_has_doc(collection, current_user, accession_id) - if check_user: - await user_op.remove_objects(current_user, "templates", [accession_id]) - else: - reason = "This template does not seem to belong to any user." - LOG.error(reason) - raise web.HTTPUnprocessableEntity(reason=reason) + await self._handle_check_ownership(req, collection, accession_id) + + # user_op = UserOperator(db_client) + # current_user = get_session(req)["user_info"] + # check_user = await user_op.check_user_has_doc(collection, current_user, accession_id) + # if check_user: + # await user_op.remove_objects(current_user, "templates", [accession_id]) + # else: + # reason = "This template does not seem to belong to any user." + # LOG.error(reason) + # raise web.HTTPUnprocessableEntity(reason=reason) accession_id = await Operator(db_client).delete_metadata_object(collection, accession_id) diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 50b1fbe0b..dcf5dc89b 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -12,6 +12,7 @@ from multidict import MultiDictProxy from pymongo.errors import ConnectionFailure, OperationFailure +from .middlewares import get_session from ..conf.conf import mongo_database, query_map from ..database.db_service import DBService, auto_reconnect from ..helpers.logger import LOG @@ -292,6 +293,34 @@ def __init__(self, db_client: AsyncIOMotorClient) -> None: """ super().__init__(mongo_database, "application/json", db_client) + async def get_object_project(self, collection: str, accession_id: str) -> str: + """Get the project ID the object is associated to. + + :param collection: database table to look into + :param object_id: internal accession ID of object + :returns: project ID object is associated to + """ + try: + object_cursor = self.db_service.query(collection, {"accessionId": accession_id}) + objects = [object async for object in object_cursor] + except (ConnectionFailure, OperationFailure) as error: + reason = f"Error happened while getting object from {collection}: {error}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + if len(objects) == 1: + try: + return objects[0]["projectId"] + except KeyError as error: + # This should not be possible and should never happen, if the object was created properly + reason = f"{collection} {accession_id} does not have an associated project, err={error}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + else: + reason = f"{collection} {accession_id} not found" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + async def query_metadata_database( self, schema_type: str, que: MultiDictProxy, page_num: int, page_size: int, filter_objects: List ) -> Tuple[List, int, int, int]: @@ -594,6 +623,33 @@ def __init__(self, db_client: AsyncIOMotorClient) -> None: """ self.db_service = DBService(mongo_database, db_client) + async def get_folder_project(self, folder_id: str) -> str: + """Get the project ID the folder is associated to. + + :param folder_id: internal accession ID of folder + :returns: project ID folder is associated to + """ + try: + folder_cursor = self.db_service.query("folder", {"folderId": folder_id}) + folders = [folder async for folder in folder_cursor] + except (ConnectionFailure, OperationFailure) as error: + reason = f"Error happened while getting folder: {error}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + if len(folders) == 1: + try: + return folders[0]["projectId"] + except KeyError as error: + # This should not be possible and should never happen, if the folder was created properly + reason = f"folder {folder_id} does not have an associated project, err={error}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + else: + reason = f"folder {folder_id} not found" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + async def check_object_in_folder(self, collection: str, accession_id: str) -> Tuple[bool, str, bool]: """Check a object/draft is in a folder. @@ -856,8 +912,8 @@ def __init__(self, db_client: AsyncIOMotorClient) -> None: """ self.db_service = DBService(mongo_database, db_client) - async def check_user_has_doc(self, collection: str, user_id: str, accession_id: str) -> bool: - """Check a folder/template belongs to user. + async def check_user_has_doc(self, req: web.Request, collection: str, user_id: str, accession_id: str) -> bool: + """Check a folder/template belongs to same project the user is in. :param collection: collection it belongs to, it would be used as path :param user_id: user_id from session @@ -865,29 +921,52 @@ async def check_user_has_doc(self, collection: str, user_id: str, accession_id: :raises: HTTPUnprocessableEntity if more users seem to have same folder :returns: True if accession_id belongs to user """ + LOG.debug(f"check that user {user_id} belongs to same project as {collection} {accession_id}") + + db_client = req.app["db_client"] + user_operator = UserOperator(db_client) + + project_id = "" + if collection.startswith("template"): + object_operator = Operator(db_client) + project_id = await object_operator.get_object_project("template", accession_id) + elif collection == "folders": + folder_operator = FolderOperator(db_client) + project_id = await folder_operator.get_folder_project(accession_id) + else: + reason = f"collection must be folders or template, received {collection}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + current_user = get_session(req)["user_info"] + user = await user_operator.read_user(current_user) + user_has_project = await user_operator.check_user_has_project(project_id, user["userId"]) + return user_has_project + + async def check_user_has_project(self, project_id: str, user_id: str) -> bool: + """Check that user has project affiliation. + + :param project_id: internal project ID + :param user_id: internal user ID + :raises HTTPBadRequest: on database error + :returns: True if user has project, False if user does not have project + """ try: - if collection.startswith("template"): - user_query = {"templates": {"$elemMatch": {"accessionId": accession_id}}, "userId": user_id} - else: - user_query = {"folders": {"$elemMatch": {"$eq": accession_id}}, "userId": user_id} + user_query = {"projects": {"$elemMatch": {"projectId": project_id}}, "userId": user_id} user_cursor = self.db_service.query("user", user_query) user_check = [user async for user in user_cursor] + if user_check: + LOG.debug(f"user {user_id} has project {project_id} affiliation") + return True + else: + reason = f"user {user_id} does not have project {project_id} affiliation" + LOG.debug(reason) + return False except (ConnectionFailure, OperationFailure) as error: - reason = f"Error happened while inserting user: {error}" + reason = f"Error happened while reading user project affiliation: {error}" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - if len(user_check) == 0: - LOG.info(f"doc {accession_id} belongs to no user something is off") - return False - elif len(user_check) > 1: - reason = "There seem to be more users with same ID and/or same folders." - LOG.error(reason) - raise web.HTTPUnprocessableEntity(reason=reason) - else: - LOG.info(f"found doc {accession_id} at current user") - return True - async def create_user(self, data: Dict[str, Union[list, str]]) -> str: """Create new user object to database. diff --git a/metadata_backend/database/db_service.py b/metadata_backend/database/db_service.py index 2c2c9430d..7bdd7b198 100644 --- a/metadata_backend/database/db_service.py +++ b/metadata_backend/database/db_service.py @@ -84,7 +84,7 @@ async def exists(self, collection: str, accession_id: str) -> bool: :param accession_id: ID of the object/folder/user to be searched :returns: True if exists and False if it does not """ - id_key = f"{collection}Id" if (collection in ["folder", "user"]) else "accessionId" + id_key = f"{collection}Id" if (collection in ["folder", "user", "project"]) else "accessionId" projection = {"_id": False, "externalId": False} if collection == "user" else {"_id": False} find_by_id = {id_key: accession_id} exists = await self.database[collection].find_one(find_by_id, projection) diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 3eb3c4f95..8f10f43b2 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -102,7 +102,7 @@ async def setUpAsync(self): "filter_user.side_effect": self.fake_useroperator_filter_user, } - RESTAPIHandler._handle_check_ownedby_user = make_mocked_coro(True) + RESTAPIHandler._handle_check_ownership = make_mocked_coro(True) async def tearDownAsync(self): """Cleanup mocked stuff.""" @@ -816,11 +816,15 @@ async def tearDownAsync(self): async def test_folder_creation_works(self): """Test that folder is created and folder ID returned.""" json_req = {"name": "test", "description": "test folder", "projectId": "1000"} - response = await self.client.post("/folders", json=json_req) - json_resp = await response.json() - self.MockedFolderOperator().create_folder.assert_called_once() - self.assertEqual(response.status, 201) - self.assertEqual(json_resp["folderId"], self.folder_id) + with patch( + "metadata_backend.api.operators.ProjectOperator._check_project_exists", + return_value=True, + ): + response = await self.client.post("/folders", json=json_req) + json_resp = await response.json() + self.MockedFolderOperator().create_folder.assert_called_once() + self.assertEqual(response.status, 201) + self.assertEqual(json_resp["folderId"], self.folder_id) async def test_folder_creation_with_missing_name_fails(self): """Test that folder creation fails when missing name in request.""" @@ -898,7 +902,7 @@ async def test_get_folders_with_bad_params(self): async def test_get_folder_works(self): """Test folder is returned when correct folder id is given.""" - # RESTAPIHandler._handle_check_ownedby_user = make_mocked_coro(True) + RESTAPIHandler._handle_check_ownership = make_mocked_coro(True) response = await self.client.get("/folders/FOL12345678") self.assertEqual(response.status, 200) diff --git a/tests/test_operators.py b/tests/test_operators.py index ab1297770..10d734f4a 100644 --- a/tests/test_operators.py +++ b/tests/test_operators.py @@ -6,10 +6,11 @@ from unittest.mock import MagicMock, patch, call from aiohttp.web import HTTPBadRequest, HTTPNotFound, HTTPUnprocessableEntity +from aiohttp.test_utils import make_mocked_coro from unittest import IsolatedAsyncioTestCase from multidict import MultiDict, MultiDictProxy -from pymongo.errors import ConnectionFailure +from pymongo.errors import ConnectionFailure, OperationFailure from metadata_backend.api.operators import ( FolderOperator, @@ -19,6 +20,8 @@ ProjectOperator, ) +from .mockups import get_request_with_fernet + class AsyncIterator: """Async iterator based on range.""" @@ -80,6 +83,14 @@ def setUp(self): self.accession_id = uuid4().hex self.folder_id = uuid4().hex self.test_folder = { + "folderId": self.folder_id, + "projectId": self.project_generated_id, + "name": "Mock folder", + "description": "test mock folder", + "published": False, + "metadataObjects": [{"accessionId": "EGA1234567", "schema": "study"}], + } + self.test_folder_no_project = { "folderId": self.folder_id, "name": "Mock folder", "description": "test mock folder", @@ -625,6 +636,92 @@ async def test_query_skip_and_limit_are_set_correctly(self): operator.db_service.do_aggregate.assert_has_calls(calls, any_order=True) self.assertEqual(operator.db_service.do_aggregate.call_count, 2) + async def test_get_object_project_connfail(self): + """Test get object project, db connection failure.""" + operator = Operator(self.client) + operator.db_service.query.side_effect = ConnectionFailure + with self.assertRaises(HTTPBadRequest): + await operator.get_object_project("template", self.accession_id) + + async def test_get_object_project_opfail(self): + """Test get object project, db operation failure.""" + operator = Operator(self.client) + operator.db_service.query.side_effect = OperationFailure("err") + with self.assertRaises(HTTPBadRequest): + await operator.get_object_project("template", self.accession_id) + + async def test_get_object_project_passes(self): + """Test get object project returns project id.""" + operator = Operator(self.client) + operator.db_service.query.return_value = AsyncIterator([self.test_folder]) + result = await operator.get_object_project("template", self.accession_id) + operator.db_service.query.assert_called_once_with("template", {"accessionId": self.accession_id}) + self.assertEqual(result, self.project_generated_id) + + async def test_get_object_project_fails(self): + """Test get object project returns nothing and raises an error.""" + operator = Operator(self.client) + operator.db_service.query.return_value = AsyncIterator([]) + with self.assertRaises(HTTPBadRequest): + await operator.get_object_project("template", self.accession_id) + + async def test_get_object_project_fails_missing_project(self): + """Test get object project returns faulty object record that is missing project id.""" + operator = Operator(self.client) + operator.db_service.query.return_value = AsyncIterator([self.test_folder_no_project]) + with self.assertRaises(HTTPBadRequest): + await operator.get_object_project("template", self.accession_id) + + async def test_get_object_project_fails_invalid_collection(self): + """Test get object project raises bad request on invalid collection.""" + operator = Operator(self.client) + operator.db_service.query.return_value = AsyncIterator([]) + with self.assertRaises(HTTPBadRequest): + await operator.get_object_project("something", self.accession_id) + + async def test_get_folder_project_connfail(self): + """Test get folder project, db connection failure.""" + operator = FolderOperator(self.client) + operator.db_service.query.side_effect = ConnectionFailure + with self.assertRaises(HTTPBadRequest): + await operator.get_folder_project(self.folder_id) + + async def test_get_folder_project_opfail(self): + """Test get folder project, db operation failure.""" + operator = FolderOperator(self.client) + operator.db_service.query.side_effect = OperationFailure("err") + with self.assertRaises(HTTPBadRequest): + await operator.get_folder_project(self.folder_id) + + async def test_get_folder_project_passes(self): + """Test get folder project returns project id.""" + operator = FolderOperator(self.client) + operator.db_service.query.return_value = AsyncIterator([self.test_folder]) + result = await operator.get_folder_project(self.folder_id) + operator.db_service.query.assert_called_once_with("folder", {"folderId": self.folder_id}) + self.assertEqual(result, self.project_generated_id) + + async def test_get_folder_project_fails(self): + """Test get folder project returns nothing and raises an error.""" + operator = FolderOperator(self.client) + operator.db_service.query.return_value = AsyncIterator([]) + with self.assertRaises(HTTPBadRequest): + await operator.get_folder_project(self.folder_id) + + async def test_get_folder_project_fails_missing_project(self): + """Test get folder project returns faulty folder record that is missing project id.""" + operator = FolderOperator(self.client) + operator.db_service.query.return_value = AsyncIterator([self.test_folder_no_project]) + with self.assertRaises(HTTPBadRequest): + await operator.get_folder_project(self.folder_id) + + async def test_get_folder_project_fails_invalid_collection(self): + """Test get folder project raises bad request on invalid collection.""" + operator = FolderOperator(self.client) + operator.db_service.query.return_value = AsyncIterator([]) + with self.assertRaises(HTTPBadRequest): + await operator.get_folder_project(self.folder_id) + async def test_create_folder_works_and_returns_folderId(self): """Test create method for folders work.""" operator = FolderOperator(self.client) @@ -845,40 +942,42 @@ async def test_create_user_on_create_fails(self): async def test_check_user_doc_fails(self): """Test check user doc fails.""" + request = get_request_with_fernet() + request.app["db_client"] = MagicMock() operator = UserOperator(self.client) - operator.db_service.query.side_effect = ConnectionFailure with self.assertRaises(HTTPBadRequest): - await operator.check_user_has_doc("folders", self.user_generated_id, self.folder_id) + await operator.check_user_has_doc(request, "something", self.user_generated_id, self.folder_id) async def test_check_user_doc_passes(self): - """Test check user doc returns proper data.""" - operator = UserOperator(self.client) - operator.db_service.query.return_value = AsyncIterator(["1"]) - result = await operator.check_user_has_doc("folders", self.user_generated_id, self.folder_id) - operator.db_service.query.assert_called_once_with( - "user", {"folders": {"$elemMatch": {"$eq": self.folder_id}}, "userId": self.user_generated_id} - ) - self.assertTrue(result) - - async def test_check_user_doc_multiple_folders_fails(self): - """Test check user doc returns multiple unique folders.""" - operator = UserOperator(self.client) - operator.db_service.query.return_value = AsyncIterator(["1", "2"]) - with self.assertRaises(HTTPUnprocessableEntity): - await operator.check_user_has_doc("folders", self.user_generated_id, self.folder_id) - operator.db_service.query.assert_called_once_with( - "user", {"folders": {"$elemMatch": {"$eq": self.folder_id}}, "userId": self.user_generated_id} - ) - - async def test_check_user_doc_no_data(self): - """Test check user doc returns no data.""" + """Test check user doc passes when object has same project id and user.""" + UserOperator.check_user_has_doc = make_mocked_coro(True) + request = get_request_with_fernet() + request.app["db_client"] = MagicMock() operator = UserOperator(self.client) - operator.db_service.query.return_value = AsyncIterator([]) - result = await operator.check_user_has_doc("folders", self.user_generated_id, self.folder_id) - operator.db_service.query.assert_called_once_with( - "user", {"folders": {"$elemMatch": {"$eq": self.folder_id}}, "userId": self.user_generated_id} - ) - self.assertFalse(result) + with patch( + "metadata_backend.api.operators.FolderOperator.get_folder_project", + return_value=self.project_generated_id, + ): + with patch( + "metadata_backend.api.middlewares.decrypt_cookie", + return_value={"id": "test"}, + ): + with patch( + "metadata_backend.api.middlewares.get_session", + return_value={"user_info": {}}, + ): + with patch( + "metadata_backend.api.operators.UserOperator.read_user", + return_value={"userId": "test"}, + ): + with patch( + "metadata_backend.api.operators.UserOperator.check_user_has_project", + return_value=True, + ): + result = await operator.check_user_has_doc( + request, "folders", self.user_generated_id, self.folder_id + ) + self.assertTrue(result) async def test_create_user_works_existing_userId(self): """Test create method for existing user.""" @@ -1026,6 +1125,42 @@ async def test_user_objects_assing_fails(self): with self.assertRaises(HTTPBadRequest): await operator.assign_objects(self.user_generated_id, "study", []) + async def test_check_user_has_project_passes(self): + """Test check user has project and doesn't raise an exception.""" + operator = UserOperator(self.client) + operator.db_service.query.return_value = AsyncIterator(["1"]) + result = await operator.check_user_has_project(self.project_generated_id, self.user_generated_id) + operator.db_service.query.assert_called_once_with( + "user", + {"projects": {"$elemMatch": {"projectId": self.project_generated_id}}, "userId": self.user_generated_id}, + ) + self.assertTrue(result) + + async def test_check_user_has_no_project(self): + """Test check user does not have project and raises unauthorised.""" + operator = UserOperator(self.client) + operator.db_service.query.return_value = AsyncIterator([]) + result = await operator.check_user_has_project(self.project_generated_id, self.user_generated_id) + operator.db_service.query.assert_called_once_with( + "user", + {"projects": {"$elemMatch": {"projectId": self.project_generated_id}}, "userId": self.user_generated_id}, + ) + self.assertFalse(result) + + async def test_check_user_has_project_connfail(self): + """Test check user has project, db connection failure.""" + operator = UserOperator(self.client) + operator.db_service.query.side_effect = ConnectionFailure + with self.assertRaises(HTTPBadRequest): + await operator.check_user_has_project(self.project_generated_id, self.user_generated_id) + + async def test_check_user_has_project_opfail(self): + """Test check user has project, db operation failure.""" + operator = UserOperator(self.client) + operator.db_service.query.side_effect = OperationFailure("err") + with self.assertRaises(HTTPBadRequest): + await operator.check_user_has_project(self.project_generated_id, self.user_generated_id) + async def test_create_project_works_and_returns_projectId(self): """Test create method for projects work.""" operator = ProjectOperator(self.client) From 131c4a41a6aabf776cbed3cc005e562232d81577 Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Wed, 2 Feb 2022 11:10:34 +0200 Subject: [PATCH 189/336] fix mypy type hint on mongo query object --- metadata_backend/api/handlers/folder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata_backend/api/handlers/folder.py b/metadata_backend/api/handlers/folder.py index b60a105c8..73986b7d4 100644 --- a/metadata_backend/api/handlers/folder.py +++ b/metadata_backend/api/handlers/folder.py @@ -3,7 +3,7 @@ from datetime import date, datetime from distutils.util import strtobool from math import ceil -from typing import Any +from typing import Any, Dict, Union import ujson from aiohttp import web @@ -96,7 +96,7 @@ async def get_folders(self, req: Request) -> Response: LOG.error(reason) raise web.HTTPUnauthorized(reason=reason) - folder_query = {"projectId": project_id} + folder_query: Dict[str, Union[str, Dict[str, Union[str, bool, float]]]] = {"projectId": project_id} # Check if only published or draft folders are requestsed if "published" in req.query: pub_param = req.query.get("published", "").title() From 2bf9fedffbe55cb83fc6837855b41551989700e5 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 3 Feb 2022 11:59:15 +0200 Subject: [PATCH 190/336] bump to node 16 as front-end did --- Dockerfile | 8 ++++---- docs/frontend.rst | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index b1ba0188d..9a56cce85 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ #======================= -FROM node:14-alpine as BUILD-FRONTEND +FROM node:16-alpine as BUILD-FRONTEND #======================= RUN apk add --update \ @@ -11,13 +11,13 @@ ARG BRANCH=master RUN git clone -b ${BRANCH} https://github.com/CSCfi/metadata-submitter-frontend.git WORKDIR /metadata-submitter-frontend -RUN npm install -g npm@7.21.0 \ +RUN npm install -g npm@8.4.0 \ && npx --quiet pinst --disable \ && npm install --production \ && npm run build --production #======================= -FROM python:3.8-alpine3.13 as BUILD-BACKEND +FROM python:3.8-alpine3.15 as BUILD-BACKEND #======================= RUN apk add --update \ @@ -37,7 +37,7 @@ RUN pip install --upgrade pip && \ pip install /root/submitter #======================= -FROM python:3.8-alpine3.13 +FROM python:3.8-alpine3.15 #======================= RUN apk add --no-cache --update libstdc++ diff --git a/docs/frontend.rst b/docs/frontend.rst index d1d84da5f..9a730db45 100644 --- a/docs/frontend.rst +++ b/docs/frontend.rst @@ -5,7 +5,7 @@ Metadata Submitter Frontend .. note:: Requirements: - * Node 14+ + * Node 16+ Environment Setup ----------------- From 71134df4e62b55768f5ff7f744b3001f6da7bba8 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 3 Feb 2022 12:01:12 +0200 Subject: [PATCH 191/336] remove npm install as node 16 comes with 8+ --- Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9a56cce85..af227f0ee 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,8 +11,7 @@ ARG BRANCH=master RUN git clone -b ${BRANCH} https://github.com/CSCfi/metadata-submitter-frontend.git WORKDIR /metadata-submitter-frontend -RUN npm install -g npm@8.4.0 \ - && npx --quiet pinst --disable \ +RUN npx --quiet pinst --disable \ && npm install --production \ && npm run build --production From 853d09c548952f50a992724efc29480ca2bc6f54 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Feb 2022 09:05:49 +0000 Subject: [PATCH 192/336] Bump pip-tools from 6.4.0 to 6.5.0 Bumps [pip-tools](https://github.com/jazzband/pip-tools) from 6.4.0 to 6.5.0. - [Release notes](https://github.com/jazzband/pip-tools/releases) - [Changelog](https://github.com/jazzband/pip-tools/blob/master/CHANGELOG.md) - [Commits](https://github.com/jazzband/pip-tools/compare/6.4.0...6.5.0) --- updated-dependencies: - dependency-name: pip-tools dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index ebc3f6fa8..2437f3593 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -40,7 +40,7 @@ pathspec==0.9.0 # via black pep517==0.12.0 # via pip-tools -pip-tools==6.4.0 +pip-tools==6.5.0 # via -r requirements-dev.in platformdirs==2.4.0 # via From 0c808f19438d585d06b93783b942a2a59cd3f5c4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Feb 2022 09:17:01 +0000 Subject: [PATCH 193/336] Bump coverage from 6.3 to 6.3.1 Bumps [coverage](https://github.com/nedbat/coveragepy) from 6.3 to 6.3.1. - [Release notes](https://github.com/nedbat/coveragepy/releases) - [Changelog](https://github.com/nedbat/coveragepy/blob/master/CHANGES.rst) - [Commits](https://github.com/nedbat/coveragepy/compare/6.3...6.3.1) --- updated-dependencies: - dependency-name: coverage dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index dcf9b76fb..8561017b5 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ packages=find_packages(exclude=["tests"]), install_requires=requirements, extras_require={ - "test": ["coverage==6.3", "pytest==6.2.5", "pytest-cov==3.0.0", "tox==3.24.5"], + "test": ["coverage==6.3.1", "pytest==6.2.5", "pytest-cov==3.0.0", "tox==3.24.5"], "docs": ["sphinx >= 1.4", "sphinx_rtd_theme==1.0.0"], }, package_data={ From beb1c38422d0d53b5f4b30ff500c9d1bd4f15142 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Feb 2022 10:26:00 +0000 Subject: [PATCH 194/336] Bump pytest from 6.2.5 to 7.0.0 Bumps [pytest](https://github.com/pytest-dev/pytest) from 6.2.5 to 7.0.0. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/6.2.5...7.0.0) --- updated-dependencies: - dependency-name: pytest dependency-type: direct:development update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8561017b5..ef36e8f41 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ packages=find_packages(exclude=["tests"]), install_requires=requirements, extras_require={ - "test": ["coverage==6.3.1", "pytest==6.2.5", "pytest-cov==3.0.0", "tox==3.24.5"], + "test": ["coverage==6.3.1", "pytest==7.0.0", "pytest-cov==3.0.0", "tox==3.24.5"], "docs": ["sphinx >= 1.4", "sphinx_rtd_theme==1.0.0"], }, package_data={ From 3fc4d9f0c66bc9be8f90d868460dc5957ce267e1 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 8 Feb 2022 16:28:26 +0200 Subject: [PATCH 195/336] add changelog & wordlist --- .github/config/.wordlist.txt | 35 ++++ CHANGELOG.md | 301 +++++++++++++++++++++++++++++++++++ 2 files changed, 336 insertions(+) create mode 100644 CHANGELOG.md diff --git a/.github/config/.wordlist.txt b/.github/config/.wordlist.txt index 49d435a9b..6e6dfa3a3 100644 --- a/.github/config/.wordlist.txt +++ b/.github/config/.wordlist.txt @@ -28,6 +28,7 @@ annotinfo anonymized antibiogram api +api's apisauce aragonese arxiv @@ -36,6 +37,7 @@ async atac auth authdb +authlib automodule autosummary avaric @@ -70,6 +72,7 @@ bokmål bookchapter boolean bugfix +bugfixes buildkit buildx cdd @@ -78,6 +81,7 @@ centername centerprojectname certreqs chamorro +changelog checksummethod chia chichewa @@ -86,11 +90,14 @@ chmod cli clinvar cloneend +codeql commonname computationalnotebook conf conferencepaper conferenceproceeding +config +configs const contributortype covid @@ -107,6 +114,7 @@ csc cscfi cscusername csi +csrf ctrl cts curation @@ -144,6 +152,7 @@ defaultmember demultiplexed demultiplexing demux +dependabot descriptiontype designdescription destructure @@ -151,6 +160,7 @@ dev devcontainer divehi dnase +dockerfile docstrings doi doiinfo @@ -171,6 +181,8 @@ entrezid entrezlink enum env +enviroment +envs epigenetics eppn eslint @@ -226,6 +238,7 @@ geolocationpolygon geolocations geoprofiles gff +gh github githubusercontent givenname @@ -249,7 +262,10 @@ html http https identifiertype +identitypython ido +idp +ietf igbo igsn iix @@ -269,6 +285,7 @@ js json jsoncontent jwk +jwtconnect kalaallisut kallisto kanuri @@ -277,6 +294,7 @@ katanga keyfile kinyarwanda kirundi +koalaman komi kubernetes kwanyama @@ -328,7 +346,9 @@ methylation methylcytidine mf mgiseq +middleware middlewares +mimetype mingaplength minion miniseq @@ -339,6 +359,7 @@ mirna miseq mkdir mnase +mockauth modindex mol moltype @@ -386,12 +407,14 @@ objecttypes occitan oecd oidc +oidcrp ojibwe ol oligo omim oneof ontologies +openapi openid orcid orgtrack @@ -399,6 +422,7 @@ oromo oss ossetian outputmanagementplan +ownedby pacbio paleo panjabi @@ -431,6 +455,7 @@ poolmembertype popset pre precedesreadindex +prefill preprint prettierrc prevstepindex @@ -446,6 +471,7 @@ publicationyear pubmed py pycqa +pymongo pyspelling quickstart randompriming @@ -471,10 +497,12 @@ relatedmetadatascheme relationtype relativeorder remoteuseridentifier +repsonses reqs resequencing resourcetypegeneral rnaseq +rojopolis rootreducer ror rrna @@ -519,6 +547,7 @@ sequencetype sequencevariation sff sha +shellcheck shona sinhala sllversion @@ -574,7 +603,9 @@ tsonga turkmen twi txt +typose ui +ujson umi unencryptedchecksum uniqueitems @@ -587,11 +618,14 @@ userid userinfo useselector uuid +uuids uyghur +validatad validator vcf venda volapük +vscode wcs westboundlongitude wga @@ -607,6 +641,7 @@ wizardshowsummarystep wizardsteps wizardsubmissionfolderslice wolof +wordlist wxs xl xml diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 000000000..7e497c2aa --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,301 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Added + +- templates API #256 + - use `ujson` as default json library +- creating draft Datacite DOI for folders #257 + - created a mock web app, which would act similarly to DataCite REST API + - altered `publish_folder` endpoint so that `extraInfo` containing the DOI data is added upon publishing + - added `datePublished` key to folders which takes in the date/time, when folder is published +- VScode Dev environment #287 + - Add VS Code development container + - Update docker for development +- Docker-compose and docker-compose-tls files changed to use variables from .env file. #301 +- Add folder querying by name #305 + - Add indexing on database initialization + - Add new field text_name to folder collection + - Python scripts for database operations. `mongo_indexes.py` for collections and indexes creation to be run if the database is destroyed and `clean_db.py` script with new functionality to only delete documents from collections + - update github actions +- Add folder querying by date #308 +- Add description to JSON schemas #323 + + - add JSON schema spelling checker to pyspelling github action + - optimise wordlist by adding regex ignore patterns + - added pyspelling to pre-commit hooks (fixed syntax for scripts according to https://github.com/koalaman/shellcheck ) + - enum are sorted alphabetically, with the exception of other and unspecified values which are left at the end of the list + - allow for accession key in `referenceAlignment` & `process sequence` as array, previously all accession keys were converted to `accessionId` which is not correct + - add default `gender` as `unknown` + + +### Changed + +- Refactor auth.py package by removing custom OIDC code and replacing it with https://github.com/IdentityPython/JWTConnect-Python-OidcRP. #315 + - New mandatory ENV `OIDC_URL` + - New optional ENVs `OIDC_SCOPE`, `AUTH_METHOD` + - Added oidcrp dependency +- use node 16+ #345 +- VScode Dev environment #287 + - Adds requirements-dev.in/txt files. Now pip dependencies can be managed with pip-tools + - README updated with tox command, development build instructions, and prettify Dockerfile. +- update ENA XML and JSON schemas #299 +- Github actions changed the use of https://git.io/misspell to rojopolis/spellcheck-github-actions #316 +- Separated most of the handlers to own files inside the handlers folder #319 + +### Fixed + +- coveralls report #267 +- typose for functions and tests #279 +- fix spelling mistakes for JSON schemas #323 +- oidcrp does not allow empty values, prefill them in mockauth so front-end can start #333 +- Fix development enviroment #336 + + - Add env vars OIDC_URL and OIDC_URL_TEST to mock auth container + - Adds logging configs for mock auth + - Updates mock auth api's token endpoint with expiration configs + - Adds config .pre-commit-config.yaml file required by pre-commit library + - Redirect url in docker-compose is now default + - Adds logging for doi mock api + +### Removed + +- Removed `Authlib` dependency #315 + +### Deprecated + +- Deprecated ENVs `ISS_URL`, `AUTH_URL`, `AUTH_REFERER`, `JWK_URL` #315 + +## [0.11.0] - 2021-08-31 + +### Changed + +- package updates + +### Added + +- Feature/sort folders #249 +- Include DOI information in the folder schema #246 + + +## [0.10.0] - 2021-08-12 + +### Added + +- add integration tests for misses in dataset, experiment, policy + +### Changed + +- package updates +- EGA XML schemas version:1.8.0 +- refactor analysis and experiment schemas to adhere to XML schema + +### Fixed + +- fix misses for DAC, experiment and policy processing of XML +- fix misses in JSON Schema + +## [0.9.0] - 2021-03-22 + +### Added + +- use dependabot +- support simultaneous sessions + +### Changed + +- Refactor JSON schema Links +- refactor handlers to be more streamlined +- validate patch requests for JSON content +- switch to python 3.8 + +## [0.8.1] - 2021-02-15 + +### Fixed + +- bugfix for error pages #202 + +## [0.8.0] - 2021-02-12 + +### Added + +- TLS support +- use `sub` as alternative to `eppn` to identify users +- `PATCH` for objects and `PUT` for XML objects enabled +- delete folders and objects associated to user on user delete + +### Changed + +- redirect to error pages +- extended integration tests + +### Fixed + +- fix replace on json patch +- general bug and fixes + +## [0.7.1] - 2021-01-19 + +### Fixed + +- hotfix release #176 + + - added check_object_exists to check object exists and fail early with 404 before checking it belongs to user + - refactor and added more check_folder_exists to check folder exists before doing anything + - integration test to check objects are deleted properly + +### Changes + +- check objects and folders exist before any operation +- integration check to see if deleted object or folder are still registered in db + +## [0.7.0] - 2021-01-06 + +### Added + +- CodeQL github action #162 +- `/health` endpoint #173 + +- map `users` to `folders` with `_handle_check_ownedby_user` #158 + - querying for objects is restricted to only the objects that belong to user + - return folders owned by user or published + - added a few db operators some used (aggregate, remove) + - process json patch to mongo query so that there is addition and replace instead of full rewrite of the document causing race condition + - standardise raises description and general improvements and fixes of logs and descriptions + +### Changed +- verify `/publish` endpoint #163 +- restrict endpoints to logged in users #151 +- updated documentation #165 +- switch to using uuids for accession ids #168 +- integration tests and increase unit test coverage #166 + +### Fixed + +- fixes for idp and location headers redirects #150 +- fix race condition in db operations #158 +- fix handling of draft deletion by removing redundant deletion #164, #169 and #172 + +## [0.6.1] - 2020-11-23 + +### Added + +- CSRF session #142 + +### Changed + +- refactor draft `/folder` #144 +- refactor gh actions #140 +- patch publish #141 + +### Fixed + +- bugfixes for login redirect #139 + +## [0.6.0] - 2020-10-08 + +### Added + +- authentication with OIDC #133 +- only 3.7 support going further #134 +- more submission actions `ADD` and `MODIFY` #137 + + +## [0.5.3] - 2020-08-21 + +### Changed + +- updated OpenAPI specifications #127 +- python modules, project description and instructions to documentation sources #128 +- added integration tests #129 +- updated documentation #130 + + +## [0.5.2] - 2020-08-14 + +### Fixes + +- fix mimetype for SVG image and package data + +## [0.5.1] - 2020-08-14 + +### Added + +- Add folder POST JSON schema +- Added `/user` endpoint with support for GET, PATCH and DELETE + +### Fixes + +- Dockerfile build fixes #115 +- fix JSON Schema details #117 +- missing env from github actions #119 +- typo fixes #120 +- await responses #122 + + +## [0.5.0] - 2020-08-06 + +### Added + +- Centralized status message handler #83 +- Alert dialog component #81 +- `/folders` endpoint +- `/drafts` endpoint +- JSON validation +- XML better parsing +- Auth middleware +- pagination + +### Changed + +- Improved current naming conventions #82 +- Login flow with new routes for Home & Login #76, #79, #80 +- change from pymongo to motor + +## [0.2.0] - 2020-07-01 + +### Added + +- Added integration tests +- switched to github actions +- added base docs folder +- added more refined XML parsing +- Integration tests added +- Refactor unit tests + +### Changed + +- refactor API endpoints and repsonses + - error using https://tools.ietf.org/html/rfc7807 + - `objects` and `schemas` endpoints added + +## [0.1.0] - 2020-06-08 + +### Added + +- RESTful API for metadata XML files, making it possible to Submit, List and Query files +- Files are also validatad during submission process. + + +[unreleased]: https://github.com/CSCfi/metadata-submitter/compare/v0.11.0...HEAD +[0.11.0]: https://github.com/CSCfi/metadata-submitter/compare/v0.10.0...v0.11.0 +[0.10.0]: https://github.com/CSCfi/metadata-submitter/compare/v0.9.0...v0.10.0 +[0.9.0]: https://github.com/CSCfi/metadata-submitter/compare/v0.8.1...v0.9.0 +[0.8.1]: https://github.com/CSCfi/metadata-submitter/compare/v0.8.0...v0.8.1 +[0.8.0]: https://github.com/CSCfi/metadata-submitter/compare/v0.7.1...v0.8.0 +[0.7.1]: https://github.com/CSCfi/metadata-submitter/compare/v0.7.0...v0.7.1 +[0.7.0]: https://github.com/CSCfi/metadata-submitter/compare/v0.6.1...v0.7.0 +[0.6.1]: https://github.com/CSCfi/metadata-submitter/compare/v0.6.0...v0.6.1 +[0.6.0]: https://github.com/CSCfi/metadata-submitter/compare/v0.5.3...v0.6.0 +[0.5.3]: https://github.com/CSCfi/metadata-submitter/compare/v0.5.2...v0.5.3 +[0.5.2]: https://github.com/CSCfi/metadata-submitter/compare/v0.5.1...v0.5.2 +[0.5.1]: https://github.com/CSCfi/metadata-submitter/compare/v0.5.0...v0.5.1 +[0.5.0]: https://github.com/CSCfi/metadata-submitter/compare/v0.2.0...v0.5.0 +[0.3.0]: https://github.com/CSCfi/metadata-submitter/compare/v0.1.0...v0.2.0 +[0.1.0]: https://github.com/CSCfi/metadata-submitter/releases/tag/v0.1.0 From 9ba731a3b1d4e6e3066125b6ef8eaa1014fd0cdb Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Thu, 10 Feb 2022 09:40:39 +0200 Subject: [PATCH 196/336] remove typos from spellcheck wordlist --- .github/config/.wordlist.txt | 4 ---- CHANGELOG.md | 8 ++++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/.github/config/.wordlist.txt b/.github/config/.wordlist.txt index 6e6dfa3a3..69cc2a43f 100644 --- a/.github/config/.wordlist.txt +++ b/.github/config/.wordlist.txt @@ -181,7 +181,6 @@ entrezid entrezlink enum env -enviroment envs epigenetics eppn @@ -497,7 +496,6 @@ relatedmetadatascheme relationtype relativeorder remoteuseridentifier -repsonses reqs resequencing resourcetypegeneral @@ -603,7 +601,6 @@ tsonga turkmen twi txt -typose ui ujson umi @@ -620,7 +617,6 @@ useselector uuid uuids uyghur -validatad validator vcf venda diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e497c2aa..7cefdde6e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,10 +52,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - coveralls report #267 -- typose for functions and tests #279 +- typoes for functions and tests #279 - fix spelling mistakes for JSON schemas #323 - oidcrp does not allow empty values, prefill them in mockauth so front-end can start #333 -- Fix development enviroment #336 +- Fix development environment #336 - Add env vars OIDC_URL and OIDC_URL_TEST to mock auth container - Adds logging configs for mock auth @@ -271,7 +271,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed -- refactor API endpoints and repsonses +- refactor API endpoints and responses - error using https://tools.ietf.org/html/rfc7807 - `objects` and `schemas` endpoints added @@ -280,7 +280,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - RESTful API for metadata XML files, making it possible to Submit, List and Query files -- Files are also validatad during submission process. +- Files are also validated during submission process. [unreleased]: https://github.com/CSCfi/metadata-submitter/compare/v0.11.0...HEAD From a464764e2c7bf92d19e6c2e0dbf32c70fa16f584 Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Thu, 10 Feb 2022 09:47:38 +0200 Subject: [PATCH 197/336] fix typo --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7cefdde6e..14057a1c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,7 +52,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - coveralls report #267 -- typoes for functions and tests #279 +- typos for functions and tests #279 - fix spelling mistakes for JSON schemas #323 - oidcrp does not allow empty values, prefill them in mockauth so front-end can start #333 - Fix development environment #336 From c974da739fe3a70d27b7d33bc696e3d100632823 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 26 Jan 2022 13:20:30 +0000 Subject: [PATCH 198/336] Add filename extraction for objects created from file Filename is needed for db entry (fileName and displayTitle) when new object created from file and is assigned to folder. --- metadata_backend/api/handlers/common.py | 10 +++++----- metadata_backend/api/handlers/object.py | 4 ++-- metadata_backend/api/handlers/submission.py | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/metadata_backend/api/handlers/common.py b/metadata_backend/api/handlers/common.py index 43ec14f70..c575c3d58 100644 --- a/metadata_backend/api/handlers/common.py +++ b/metadata_backend/api/handlers/common.py @@ -1,11 +1,10 @@ """Functions shared between handlers.""" import csv import string -from typing import List, Tuple, Dict, Any +from typing import Any, Dict, List, Tuple -from aiohttp import BodyPartReader, web, hdrs, MultipartReader +from aiohttp import BodyPartReader, MultipartReader, hdrs, web from aiohttp.web import Request - from xmlschema import XMLResource from xmlschema.etree import ElementTree @@ -16,7 +15,7 @@ async def multipart_content( req: Request, extract_one: bool = False, expect_xml: bool = False -) -> Tuple[List[Tuple[Any, str]], str]: +) -> Tuple[List[Tuple[Any, str]], str, str]: """Get content(s) and schema type(s) of a multipart request (from either csv or xml format). Note: for multiple files support check: https://docs.aiohttp.org/en/stable/multipart.html#hacking-multipart @@ -48,6 +47,7 @@ async def multipart_content( raise web.HTTPUnsupportedMediaType(reason=reason) if not part: break + filename = part.filename if part.filename else "" if extract_one and (xml_files or csv_files): reason = "Only one file can be sent to this endpoint at a time." LOG.error(reason) @@ -72,7 +72,7 @@ async def multipart_content( raise web.HTTPBadRequest(reason=reason) # Return extracted content - return _get_content_with_type(xml_files, csv_files) + return _get_content_with_type(xml_files, csv_files) + (filename,) async def _extract_upload(part: BodyPartReader) -> Tuple[str, str]: diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 506de11c5..871e776d7 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -109,7 +109,7 @@ async def post_object(self, req: Request) -> Response: operator: Union[Operator, XMLOperator] if req.content_type == "multipart/form-data": _only_xml = False if schema_type in _allowed_csv else True - files, cont_type = await multipart_content(req, extract_one=True, expect_xml=_only_xml) + files, cont_type, _ = await multipart_content(req, extract_one=True, expect_xml=_only_xml) if cont_type == "xml": # from this tuple we only care about the content # files should be of form (content, schema) @@ -222,7 +222,7 @@ async def put_object(self, req: Request) -> Response: content: Union[Dict, str] operator: Union[Operator, XMLOperator] if req.content_type == "multipart/form-data": - files, _ = await multipart_content(req, extract_one=True, expect_xml=True) + files, _, _ = await multipart_content(req, extract_one=True, expect_xml=True) content, _ = files[0] operator = XMLOperator(db_client) else: diff --git a/metadata_backend/api/handlers/submission.py b/metadata_backend/api/handlers/submission.py index 8d36b9a73..c9c2176e8 100644 --- a/metadata_backend/api/handlers/submission.py +++ b/metadata_backend/api/handlers/submission.py @@ -31,7 +31,7 @@ async def submit(self, req: Request) -> Response: :raises: HTTPBadRequest if request is missing some parameters or cannot be processed :returns: XML-based receipt from submission """ - files, _ = await multipart_content(req, expect_xml=True) + files, _, _ = await multipart_content(req, expect_xml=True) schema_types = Counter(file[1] for file in files) if "submission" not in schema_types: reason = "There must be a submission.xml file in submission." @@ -92,7 +92,7 @@ async def validate(self, req: Request) -> Response: :param req: Multipart POST request with submission.xml and files :returns: JSON response indicating if validation was successful or not """ - files, _ = await multipart_content(req, extract_one=True, expect_xml=True) + files, _, _ = await multipart_content(req, extract_one=True, expect_xml=True) xml_content, schema_type = files[0] validator = await self._perform_validation(schema_type, xml_content) return web.Response(body=validator.resp_body, content_type="application/json") From 9149bb6026d3cbe50eecf187e1bb8017e20b8987 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 26 Jan 2022 15:11:38 +0000 Subject: [PATCH 199/336] Add folder patching on object creation Moving responsibility for adding newly created object to a folder from frontend to object endpoint. Now folder id is required query parameter with POST object/. Tests updated. --- metadata_backend/api/handlers/object.py | 68 +++++++++- tests/integration/run_tests.py | 170 ++++++++++-------------- tests/test_handlers.py | 20 +-- 3 files changed, 145 insertions(+), 113 deletions(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 871e776d7..889b69641 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -1,6 +1,6 @@ """Handle HTTP methods for server.""" from math import ceil -from typing import Dict, Union, List, Any, Tuple +from typing import Any, Dict, List, Tuple, Union import ujson from aiohttp import web @@ -101,6 +101,13 @@ async def post_object(self, req: Request) -> Response: """ _allowed_csv = ["sample"] schema_type = req.match_info["schema"] + + folder_id = req.query.get("folder", "") + if not folder_id: + reason = "Folder ID is required query parameter." + raise web.HTTPBadRequest(reason=reason) + patch_params = {"folder": folder_id} + self._check_schema_exists(schema_type) collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type @@ -109,17 +116,18 @@ async def post_object(self, req: Request) -> Response: operator: Union[Operator, XMLOperator] if req.content_type == "multipart/form-data": _only_xml = False if schema_type in _allowed_csv else True - files, cont_type, _ = await multipart_content(req, extract_one=True, expect_xml=_only_xml) + files, cont_type, filename = await multipart_content(req, extract_one=True, expect_xml=_only_xml) if cont_type == "xml": # from this tuple we only care about the content # files should be of form (content, schema) content, _ = files[0] else: - # for CSV files we need to tread this as a list of tuples (content, schema) + # for CSV files we need to treat this as a list of tuples (content, schema) content = files # If multipart request contains XML, XML operator is used. # Else the multipart request is expected to contain CSV file(s) which are converted into JSON. operator = XMLOperator(db_client) if cont_type == "xml" else Operator(db_client) + patch_params.update({"cont_type": cont_type, "title": filename}) else: content = await self._get_data(req) if not req.path.startswith("/drafts"): @@ -147,6 +155,20 @@ async def post_object(self, req: Request) -> Response: location_headers = CIMultiDict(Location=f"{url}/{accession_id}") LOG.info(f"POST object with accesssion ID {accession_id} in schema {collection} was successful.") + # Gathering data for object to be added to folder + if not isinstance(data, List): + ids = [data] + if not patch_params.get("title", None) and isinstance(content, Dict): + try: + patch_params["title"] = ( + content["descriptor"]["studyTitle"] if collection == "study" else content["title"] + ) + except (TypeError, KeyError): + patch_params["title"] = "" + patch = await self.prepare_folder_patch(collection, ids, patch_params) + folder_op = FolderOperator(db_client) + folder_id = await folder_op.update_folder(folder_id, patch) + body = ujson.dumps(data, escape_forward_slashes=False) return web.Response( @@ -283,3 +305,43 @@ async def patch_object(self, req: Request) -> Response: body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) LOG.info(f"PATCH object with accession ID {accession_id} in schema {collection} was successful.") return web.Response(body=body, status=200, content_type="application/json") + + async def prepare_folder_patch(self, schema: str, ids: List, params: Dict[str, str]) -> List: + """Prepare patch operations list. + + :param schema: schema of objects to be added to the folder + :param ids: object IDs + :param params: addidtional data required for db entry + :returns: list of patch operations + """ + if not params.get("cont_type", None): + submission_type = "Form" + else: + submission_type = params["cont_type"].upper() + + if schema.startswith("draft"): + path = "/drafts/-" + else: + path = "/metadataObjects/-" + + patch = [] + patch_ops: Dict[str, Any] = {} + for id in ids: + patch_ops = { + "op": "add", + "path": path, + "value": { + "accessionId": id["accessionId"], + "schema": schema, + "tags": { + "submissionType": submission_type, + "displayTitle": params["title"], + }, + }, + } + + if submission_type != "Form": + patch_ops["value"]["tags"]["fileName"] = params["title"] + patch.append(patch_ops) + + return patch diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index edc404b95..ba0b82bc7 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -140,7 +140,7 @@ async def create_request_json_data(schema, filename): return request_data -async def post_object(sess, schema, filename): +async def post_object(sess, schema, folder_id, filename): """Post one metadata object within session, returns accessionId. :param sess: HTTP session in which request call is made @@ -148,14 +148,14 @@ async def post_object(sess, schema, filename): :param filename: name of the file used for testing. """ request_data = await create_request_data(schema, filename) - async with sess.post(f"{objects_url}/{schema}", data=request_data) as resp: + async with sess.post(f"{objects_url}/{schema}", params={"folder": folder_id}, data=request_data) as resp: LOG.debug(f"Adding new object to {schema}, via XML/CSV file {filename}") assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() return ans if isinstance(ans, list) else ans["accessionId"], schema -async def post_object_expect_status(sess, schema, filename, status): +async def post_object_expect_status(sess, schema, folder_id, filename, status): """Post one metadata object within session, returns accessionId. :param sess: HTTP session in which request call is made @@ -163,7 +163,7 @@ async def post_object_expect_status(sess, schema, filename, status): :param filename: name of the file used for testing. """ request_data = await create_request_data(schema, filename) - async with sess.post(f"{objects_url}/{schema}", data=request_data) as resp: + async with sess.post(f"{objects_url}/{schema}", params={"folder": folder_id}, data=request_data) as resp: LOG.debug(f"Adding new object to {schema}, via XML/CSV file {filename} and expecting status: {status}") assert resp.status == status, f"HTTP Status code error, got {resp.status}" if status < 400: @@ -171,7 +171,7 @@ async def post_object_expect_status(sess, schema, filename, status): return ans if isinstance(ans, list) else ans["accessionId"], schema -async def post_object_json(sess, schema, filename): +async def post_object_json(sess, schema, folder_id, filename): """Post & put one metadata object within session, returns accessionId. :param sess: HTTP session in which request call is made @@ -179,7 +179,7 @@ async def post_object_json(sess, schema, filename): :param filename: name of the file used for testing. """ request_data = await create_request_json_data(schema, filename) - async with sess.post(f"{objects_url}/{schema}", data=request_data) as resp: + async with sess.post(f"{objects_url}/{schema}", params={"folder": folder_id}, data=request_data) as resp: LOG.debug(f"Adding new object to {schema}, via JSON file {filename}") assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() @@ -198,7 +198,7 @@ async def delete_object(sess, schema, accession_id): assert resp.status == 204, f"HTTP Status code error, got {resp.status}" -async def post_draft(sess, schema, filename): +async def post_draft(sess, schema, folder_id, filename): """Post one draft metadata object within session, returns accessionId. :param sess: HTTP session in which request call is made @@ -206,14 +206,14 @@ async def post_draft(sess, schema, filename): :param filename: name of the file used for testing. """ request_data = await create_request_data(schema, filename) - async with sess.post(f"{drafts_url}/{schema}", data=request_data) as resp: + async with sess.post(f"{drafts_url}/{schema}", params={"folder": folder_id}, data=request_data) as resp: LOG.debug(f"Adding new draft object to {schema}, via XML file {filename}") assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() return ans["accessionId"] -async def post_draft_json(sess, schema, filename): +async def post_draft_json(sess, schema, folder_id, filename): """Post & put one metadata object within session, returns accessionId. :param sess: HTTP session in which request call is made @@ -221,7 +221,7 @@ async def post_draft_json(sess, schema, filename): :param filename: name of the file used for testing. """ request_data = await create_request_json_data(schema, filename) - async with sess.post(f"{drafts_url}/{schema}", data=request_data) as resp: + async with sess.post(f"{drafts_url}/{schema}", params={"folder": folder_id}, data=request_data) as resp: LOG.debug(f"Adding new draft object to {schema}, via JSON file {filename}") assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() @@ -516,11 +516,7 @@ async def test_crud_works(sess, schema, filename, folder_id): :param filename: name of the file used for testing :param folder_id: id of the folder used to group submission """ - accession_id = await post_object(sess, schema, filename) - patch_object = [ - {"op": "add", "path": "/metadataObjects/-", "value": {"accessionId": accession_id[0], "schema": schema}} - ] - await patch_folder(sess, folder_id, patch_object) + accession_id = await post_object(sess, schema, folder_id, filename) async with sess.get(f"{objects_url}/{schema}/{accession_id[0]}") as resp: LOG.debug(f"Checking that {accession_id[0]} JSON is in {schema}") assert resp.status == 200, f"HTTP Status code error, got {resp.status}" @@ -556,15 +552,11 @@ async def test_csv(sess, folder_id): """ _schema = "sample" _filename = "EGAformat.csv" - accession_id = await post_object(sess, _schema, _filename) + accession_id = await post_object(sess, _schema, folder_id, _filename) # there are 3 rows and we expected to get 3rd assert len(accession_id[0]) == 3, f"expected nb of CSV entries does not match, we got: {len(accession_id)}" _first_csv_row_id = accession_id[0][0]["accessionId"] - patch_object = [ - {"op": "add", "path": "/metadataObjects/-", "value": {"accessionId": _first_csv_row_id, "schema": _schema}} - ] - await patch_folder(sess, folder_id, patch_object) async with sess.get(f"{objects_url}/{_schema}/{_first_csv_row_id}") as resp: LOG.debug(f"Checking that {_first_csv_row_id} JSON is in {_schema}") assert resp.status == 200, f"HTTP Status code error, got {resp.status}" @@ -582,11 +574,11 @@ async def test_csv(sess, folder_id): _filename = "empty.csv" # status should be 400 - await post_object_expect_status(sess, _schema, _filename, 400) + await post_object_expect_status(sess, _schema, folder_id, _filename, 400) _filename = "EGA_sample_w_issue.csv" # status should be 201 but we expect 3 rows, as the CSV has 4 rows one of which is empty - accession_id = await post_object_expect_status(sess, _schema, _filename, 201) + accession_id = await post_object_expect_status(sess, _schema, folder_id, _filename, 201) assert len(accession_id[0]) == 3, f"expected nb of CSV entries does not match, we got: {len(accession_id)}" @@ -600,11 +592,7 @@ async def test_put_objects(sess, folder_id): :param sess: HTTP session in which request call is made :param folder_id: id of the folder used to group submission """ - accession_id = await post_object(sess, "study", "SRP000539.xml") - patch_object = [ - {"op": "add", "path": "/metadataObjects/-", "value": {"accessionId": accession_id[0], "schema": "study"}} - ] - await patch_folder(sess, folder_id, patch_object) + accession_id = await post_object(sess, "study", folder_id, "SRP000539.xml") await put_object_json(sess, "study", accession_id[0], "SRP000539.json") await put_object_xml(sess, "study", accession_id[0], "SRP000539_put.xml") @@ -622,11 +610,7 @@ async def test_crud_drafts_works(sess, schema, orginal_file, update_file, folder :param update_file: name of the file used for updating object. :param folder_id: id of the folder used to group submission objects """ - draft_id = await post_draft_json(sess, schema, orginal_file) - patch_draft_data = [ - {"op": "add", "path": "/drafts/-", "value": {"accessionId": draft_id, "schema": f"draft-{schema}"}} - ] - await patch_folder(sess, folder_id, patch_draft_data) + draft_id = await post_draft_json(sess, schema, folder_id, orginal_file) accession_id = await put_draft(sess, schema, draft_id, update_file) async with sess.get(f"{drafts_url}/{schema}/{accession_id}") as resp: LOG.debug(f"Checking that {accession_id} JSON is in {schema}") @@ -657,11 +641,7 @@ async def test_patch_drafts_works(sess, schema, orginal_file, update_file, folde :param update_file: name of the file used for updating object. :param folder_id: id of the folder used to group submission objects """ - draft_id = await post_draft_json(sess, schema, orginal_file) - patch_draft_data = [ - {"op": "add", "path": "/drafts/-", "value": {"accessionId": draft_id, "schema": f"draft-{schema}"}} - ] - await patch_folder(sess, folder_id, patch_draft_data) + draft_id = await post_draft_json(sess, schema, folder_id, orginal_file) accession_id = await patch_draft(sess, schema, draft_id, update_file) async with sess.get(f"{drafts_url}/{schema}/{accession_id}") as resp: LOG.debug(f"Checking that {accession_id} JSON is in {schema}") @@ -682,13 +662,9 @@ async def test_querying_works(sess, folder_id): :param sess: HTTP session in which request call is made :param folder_id: id of the folder used to group submission objects """ - files = await asyncio.gather(*[post_object(sess, schema, filename) for schema, filename in test_xml_files]) - - for accession_id, schema in files: - patch_folder_obj = [ - {"op": "add", "path": "/metadataObjects/-", "value": {"accessionId": accession_id, "schema": schema}} - ] - await patch_folder(sess, folder_id, patch_folder_obj) + files = await asyncio.gather( + *[post_object(sess, schema, folder_id, filename) for schema, filename in test_xml_files] + ) queries = { "study": [ @@ -740,13 +716,7 @@ async def test_getting_all_objects_from_schema_works(sess, folder_id): :param folder_id: id of the folder used to group submission objects """ # Add objects - files = await asyncio.gather(*[post_object(sess, "study", "SRP000539.xml") for _ in range(13)]) - - for accession_id, schema in files: - patch_folder_obj = [ - {"op": "add", "path": "/metadataObjects/-", "value": {"accessionId": accession_id, "schema": schema}} - ] - await patch_folder(sess, folder_id, patch_folder_obj) + files = await asyncio.gather(*[post_object(sess, "study", folder_id, "SRP000539.xml") for _ in range(13)]) # Test default values async with sess.get(f"{objects_url}/study") as resp: @@ -791,11 +761,7 @@ async def test_crud_folders_works(sess): assert resp.status == 200, f"HTTP Status code error, got {resp.status}" # Create draft from test XML file and patch the draft into the newly created folder - draft_id = await post_draft(sess, "sample", "SRS001433.xml") - patch_add_draft = [ - {"op": "add", "path": "/drafts/-", "value": [{"accessionId": draft_id, "schema": "draft-sample"}]} - ] - folder_id = await patch_folder(sess, folder_id, patch_add_draft) + draft_id = await post_draft(sess, "sample", folder_id, "SRS001433.xml") async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was patched") res = await resp.json() @@ -803,32 +769,43 @@ async def test_crud_folders_works(sess): assert res["name"] == folder_data["name"], "expected folder name does not match" assert res["description"] == folder_data["description"], "folder description content mismatch" assert res["published"] is False, "folder is published, expected False" - assert res["drafts"] == [{"accessionId": draft_id, "schema": "draft-sample"}], "folder drafts content mismatch" + assert res["drafts"] == [ + { + "accessionId": draft_id, + "schema": "draft-sample", + "tags": {"submissionType": "XML", "displayTitle": "SRS001433.xml", "fileName": "SRS001433.xml"}, + } + ], "folder drafts content mismatch" assert res["metadataObjects"] == [], "there are objects in folder, expected empty" # Get the draft from the collection within this session and post it to objects collection draft_data = await get_draft(sess, "sample", draft_id) - async with sess.post(f"{objects_url}/sample", data=draft_data) as resp: + async with sess.post(f"{objects_url}/sample", params={"folder": folder_id}, data=draft_data) as resp: LOG.debug("Adding draft to actual objects") assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() assert ans["accessionId"] != draft_id, "draft id does not match expected" accession_id = ans["accessionId"] - # Patch folder so that original draft becomes an object in the folder - patch_folder_move_draft = [ - {"op": "add", "path": "/metadataObjects/-", "value": [{"accessionId": accession_id, "schema": "sample"}]}, - ] - folder_id = await patch_folder(sess, folder_id, patch_folder_move_draft) async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was patched") res = await resp.json() assert res["folderId"] == folder_id, "expected folder id does not match" assert res["published"] is False, "folder is published, expected False" assert "datePublished" not in res.keys() - assert res["drafts"] == [{"accessionId": draft_id, "schema": "draft-sample"}], "folder drafts content mismatch" + assert res["drafts"] == [ + { + "accessionId": draft_id, + "schema": "draft-sample", + "tags": {"submissionType": "XML", "displayTitle": "SRS001433.xml", "fileName": "SRS001433.xml"}, + } + ], "folder drafts content mismatch" assert res["metadataObjects"] == [ - {"accessionId": accession_id, "schema": "sample"} + { + "accessionId": accession_id, + "schema": "sample", + "tags": {"submissionType": "Form", "displayTitle": "HapMap sample from Homo sapiens"}, + } ], "folder metadataObjects content mismatch" # Publish the folder @@ -845,7 +822,11 @@ async def test_crud_folders_works(sess): assert "extraInfo" in res.keys() assert res["drafts"] == [], "there are drafts in folder, expected empty" assert res["metadataObjects"] == [ - {"accessionId": accession_id, "schema": "sample"} + { + "accessionId": accession_id, + "schema": "sample", + "tags": {"submissionType": "Form", "displayTitle": "HapMap sample from Homo sapiens"}, + } ], "folder metadataObjects content mismatch" # Delete folder @@ -869,11 +850,7 @@ async def test_crud_folders_works_no_publish(sess): assert resp.status == 200, f"HTTP Status code error, got {resp.status}" # Create draft from test XML file and patch the draft into the newly created folder - draft_id = await post_draft(sess, "sample", "SRS001433.xml") - patch_add_draft = [ - {"op": "add", "path": "/drafts/-", "value": [{"accessionId": draft_id, "schema": "draft-sample"}]} - ] - folder_id = await patch_folder(sess, folder_id, patch_add_draft) + draft_id = await post_draft(sess, "sample", folder_id, "SRS001433.xml") async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was patched") res = await resp.json() @@ -881,31 +858,42 @@ async def test_crud_folders_works_no_publish(sess): assert res["name"] == folder_data["name"], "expected folder name does not match" assert res["description"] == folder_data["description"], "folder description content mismatch" assert res["published"] is False, "folder is published, expected False" - assert res["drafts"] == [{"accessionId": draft_id, "schema": "draft-sample"}], "folder drafts content mismatch" + assert res["drafts"] == [ + { + "accessionId": draft_id, + "schema": "draft-sample", + "tags": {"submissionType": "XML", "displayTitle": "SRS001433.xml", "fileName": "SRS001433.xml"}, + } + ], "folder drafts content mismatch" assert res["metadataObjects"] == [], "there are objects in folder, expected empty" # Get the draft from the collection within this session and post it to objects collection draft = await get_draft(sess, "sample", draft_id) - async with sess.post(f"{objects_url}/sample", data=draft) as resp: + async with sess.post(f"{objects_url}/sample", params={"folder": folder_id}, data=draft) as resp: LOG.debug("Adding draft to actual objects") assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() assert ans["accessionId"] != draft_id, "draft id does not match expected" accession_id = ans["accessionId"] - # Patch folder so that original draft becomes an object in the folder - patch_folder_move_draft = [ - {"op": "add", "path": "/metadataObjects/-", "value": [{"accessionId": accession_id, "schema": "sample"}]}, - ] - folder_id = await patch_folder(sess, folder_id, patch_folder_move_draft) async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was patched") res = await resp.json() assert res["folderId"] == folder_id, "expected folder id does not match" assert res["published"] is False, "folder is published, expected False" - assert res["drafts"] == [{"accessionId": draft_id, "schema": "draft-sample"}], "folder drafts content mismatch" + assert res["drafts"] == [ + { + "accessionId": draft_id, + "schema": "draft-sample", + "tags": {"submissionType": "XML", "displayTitle": "SRS001433.xml", "fileName": "SRS001433.xml"}, + } + ], "folder drafts content mismatch" assert res["metadataObjects"] == [ - {"accessionId": accession_id, "schema": "sample"} + { + "accessionId": accession_id, + "schema": "sample", + "tags": {"submissionType": "Form", "displayTitle": "HapMap sample from Homo sapiens"}, + } ], "folder metadataObjects content mismatch" # Delete folder @@ -1309,32 +1297,14 @@ async def test_get_folders_objects(sess, folder_id: str): :param sess: HTTP session in which request call is made :param folder_id: id of the folder used to group submission objects """ - accession_id = await post_object_json(sess, "study", "SRP000539.json") - patch_add_object = [ - {"op": "add", "path": "/metadataObjects/-", "value": {"accessionId": accession_id, "schema": "study"}} - ] - await patch_folder(sess, folder_id, patch_add_object) - async with sess.get(f"{folders_url}") as resp: - LOG.debug(f"Reading folder {folder_id}") - assert resp.status == 200, f"HTTP Status code error, got {resp.status}" - response = await resp.json() - assert len(response["folders"]) == 1 - assert response["folders"][0]["metadataObjects"][0]["accessionId"] == accession_id - assert "tags" not in response["folders"][0]["metadataObjects"][0] - patch_add_more_object = [ - { - "op": "add", - "path": "/metadataObjects/0/tags", - "value": {"submissionType": "Form"}, - } - ] - await patch_folder(sess, folder_id, patch_add_more_object) + accession_id = await post_object_json(sess, "study", folder_id, "SRP000539.json") async with sess.get(f"{folders_url}") as resp: LOG.debug(f"Reading folder {folder_id}") assert resp.status == 200, f"HTTP Status code error, got {resp.status}" response = await resp.json() assert len(response["folders"]) == 1 assert response["folders"][0]["metadataObjects"][0]["accessionId"] == accession_id + assert "tags" in response["folders"][0]["metadataObjects"][0] assert response["folders"][0]["metadataObjects"][0]["tags"]["submissionType"] == "Form" patch_change_tags_object = [ diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 2156a586b..0f6244a66 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -387,7 +387,7 @@ async def test_submit_object_works(self): """Test that submission is handled, XMLOperator is called.""" files = [("study", "SRP000539.xml")] data = self.create_submission_data(files) - response = await self.client.post("/objects/study", data=data) + response = await self.client.post("/objects/study", params={"folder": "some id"}, data=data) self.assertEqual(response.status, 201) self.assertIn(self.test_ega_string, await response.text()) self.MockedXMLOperator().create_metadata_object.assert_called_once() @@ -399,7 +399,7 @@ async def test_submit_object_works_with_json(self): "alias": "GSE10966", "descriptor": {"studyTitle": "Highly", "studyType": "Other"}, } - response = await self.client.post("/objects/study", json=json_req) + response = await self.client.post("/objects/study", params={"folder": "some id"}, json=json_req) self.assertEqual(response.status, 201) self.assertIn(self.test_ega_string, await response.text()) self.MockedOperator().create_metadata_object.assert_called_once() @@ -407,7 +407,7 @@ async def test_submit_object_works_with_json(self): async def test_submit_object_missing_field_json(self): """Test that JSON has missing property.""" json_req = {"centerName": "GEO", "alias": "GSE10966"} - response = await self.client.post("/objects/study", json=json_req) + response = await self.client.post("/objects/study", params={"folder": "some id"}, json=json_req) reason = "Provided input does not seem correct because: ''descriptor' is a required property'" self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) @@ -419,7 +419,7 @@ async def test_submit_object_bad_field_json(self): "alias": "GSE10966", "descriptor": {"studyTitle": "Highly", "studyType": "ceva"}, } - response = await self.client.post("/objects/study", json=json_req) + response = await self.client.post("/objects/study", params={"folder": "some id"}, json=json_req) reason = "Provided input does not seem correct for field: 'descriptor'" self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) @@ -431,7 +431,7 @@ async def test_post_object_bad_json(self): "alias": "GSE10966", "descriptor": {"studyTitle": "Highly", "studyType": "Other"}, } - response = await self.client.post("/objects/study", data=json_req) + response = await self.client.post("/objects/study", params={"folder": "some id"}, data=json_req) reason = "JSON is not correctly formatted. See: Expecting value: line 1 column 1" self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) @@ -442,7 +442,7 @@ async def test_post_object_works_with_csv(self): data = self.create_submission_data(files) file_content = self.get_file_data("sample", "EGAformat.csv") self.MockedCSVParser().parse.return_value = [{}, {}, {}] - response = await self.client.post("/objects/sample", data=data) + response = await self.client.post("/objects/sample", params={"folder": "some id"}, data=data) json_resp = await response.json() self.assertEqual(response.status, 201) self.assertEqual(self.test_ega_string, json_resp[0]["accessionId"]) @@ -460,7 +460,7 @@ async def test_post_objet_error_with_empty(self): """Test multipart request post fails when no objects are parsed.""" files = [("sample", "empty.csv")] data = self.create_submission_data(files) - response = await self.client.post("/objects/sample", data=data) + response = await self.client.post("/objects/sample", params={"folder": "some id"}, data=data) json_resp = await response.json() self.assertEqual(response.status, 400) self.assertEqual(json_resp["detail"], "Request data seems empty.") @@ -495,7 +495,7 @@ async def test_submit_draft_works_with_json(self): "alias": "GSE10966", "descriptor": {"studyTitle": "Highly", "studyType": "Other"}, } - response = await self.client.post("/drafts/study", json=json_req) + response = await self.client.post("/drafts/study", params={"folder": "some id"}, json=json_req) self.assertEqual(response.status, 201) self.assertIn(self.test_ega_string, await response.text()) self.MockedOperator().create_metadata_object.assert_called_once() @@ -544,7 +544,7 @@ async def test_submit_object_fails_with_too_many_files(self): """Test that sending two files to endpoint results failure.""" files = [("study", "SRP000539.xml"), ("study", "SRP000539_copy.xml")] data = self.create_submission_data(files) - response = await self.client.post("/objects/study", data=data) + response = await self.client.post("/objects/study", params={"folder": "some id"}, data=data) reason = "Only one file can be sent to this endpoint at a time." self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) @@ -616,7 +616,7 @@ async def test_operations_fail_for_wrong_schema_type(self): json_get_resp = await get_resp.json() self.assertIn("Specified schema", json_get_resp["detail"]) - post_rep = await self.client.post("/objects/bad_scehma_name") + post_rep = await self.client.post("/objects/bad_scehma_name", params={"folder": "some id"}) self.assertEqual(post_rep.status, 404) post_json_rep = await post_rep.json() self.assertIn("Specified schema", post_json_rep["detail"]) From 04ecce3e844cd756a6c0995431e3feb1fbf0c50c Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 26 Jan 2022 16:04:28 +0000 Subject: [PATCH 200/336] Update swagger doc --- docs/specification.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/specification.yml b/docs/specification.yml index 48e5e69b6..8ddd4a6fe 100644 --- a/docs/specification.yml +++ b/docs/specification.yml @@ -293,6 +293,11 @@ paths: - Submission summary: Submit data to a specific schema parameters: + - in: query + name: folder + schema: + type: string + description: The folder ID where object belongs to. - name: schema in: path description: Name of the Metadata schema. From 9a78733513a2220b9243e49a5fc9812dfe2a7100 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 26 Jan 2022 15:15:17 +0000 Subject: [PATCH 201/336] Add mypy linting checker for VSCode --- .devcontainer/devcontainer.json | 8 ++++++++ requirements-dev.in | 1 + requirements-dev.txt | 11 ++++++++++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 13140fc03..225e54305 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -23,6 +23,14 @@ "python.linting.pylintEnabled": true, "python.linting.enabled": true, "python.pythonPath": "/usr/local/bin/python", + "python.linting.mypyEnabled": true, + "python.linting.mypyArgs": [ + "--ignore-missing-imports", + "--follow-imports=silent", + "--show-column-numbers", + "--strict", + "--exclude tests" + ], }, "extensions": [ "ms-python.python", diff --git a/requirements-dev.in b/requirements-dev.in index 2181f099b..eff2402df 100644 --- a/requirements-dev.in +++ b/requirements-dev.in @@ -2,6 +2,7 @@ aiofiles # to run integration tests black certifi flake8 +mypy pip-tools # pip depedencies management pre-commit tox diff --git a/requirements-dev.txt b/requirements-dev.txt index 2437f3593..968887641 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -30,8 +30,12 @@ identify==2.3.6 # via pre-commit mccabe==0.6.1 # via flake8 +mypy==0.931 + # via -r requirements-dev.in mypy-extensions==0.4.3 - # via black + # via + # black + # mypy nodeenv==1.6.0 # via pre-commit packaging==21.2 @@ -71,9 +75,14 @@ toml==0.10.2 tomli==1.2.2 # via # black + # mypy # pep517 tox==3.24.5 # via -r requirements-dev.in +typing-extensions==4.0.0 + # via + # black + # mypy virtualenv==20.10.0 # via # pre-commit From d557d922dde4741aa9366bdee4c3050bdc998b81 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 9 Feb 2022 08:09:36 +0000 Subject: [PATCH 202/336] Extract title of object for folder patch --- metadata_backend/api/handlers/object.py | 38 +++++++++-------------- metadata_backend/api/operators.py | 24 ++++++++------ metadata_backend/helpers/schema_loader.py | 1 + tests/test_handlers.py | 8 ++--- tests/test_operators.py | 23 +++++--------- 5 files changed, 41 insertions(+), 53 deletions(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 889b69641..9905fed04 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -104,9 +104,9 @@ async def post_object(self, req: Request) -> Response: folder_id = req.query.get("folder", "") if not folder_id: - reason = "Folder ID is required query parameter." + reason = "Folder is required query parameter. Please provide folder id where object is added to." raise web.HTTPBadRequest(reason=reason) - patch_params = {"folder": folder_id} + patch_params = {} self._check_schema_exists(schema_type) collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type @@ -127,7 +127,7 @@ async def post_object(self, req: Request) -> Response: # If multipart request contains XML, XML operator is used. # Else the multipart request is expected to contain CSV file(s) which are converted into JSON. operator = XMLOperator(db_client) if cont_type == "xml" else Operator(db_client) - patch_params.update({"cont_type": cont_type, "title": filename}) + patch_params = {"cont_type": cont_type, "filename": filename} else: content = await self._get_data(req) if not req.path.startswith("/drafts"): @@ -141,33 +141,25 @@ async def post_object(self, req: Request) -> Response: LOG.debug(f"Inserting multiple objects for {schema_type}.") ids: List[Dict[str, str]] = [] for item in content: - accession_id = await operator.create_metadata_object(collection, item[0]) - ids.append({"accessionId": accession_id}) + accession_id, title = await operator.create_metadata_object(collection, item[0]) + ids.append({"accessionId": accession_id, "title": title}) LOG.info(f"POST object with accesssion ID {accession_id} in schema {collection} was successful.") # we format like this to make it consistent with the response from /submit endpoint - data = [dict(item, **{"schema": schema_type}) for item in ids] + data = [dict({"accessionId": item["accessionId"]}, **{"schema": schema_type}) for item in ids] # we take the first result if we get multiple location_headers = CIMultiDict(Location=f"{url}/{data[0]['accessionId']}") else: - accession_id = await operator.create_metadata_object(collection, content) + accession_id, title = await operator.create_metadata_object(collection, content) data = {"accessionId": accession_id} - location_headers = CIMultiDict(Location=f"{url}/{accession_id}") LOG.info(f"POST object with accesssion ID {accession_id} in schema {collection} was successful.") # Gathering data for object to be added to folder if not isinstance(data, List): - ids = [data] - if not patch_params.get("title", None) and isinstance(content, Dict): - try: - patch_params["title"] = ( - content["descriptor"]["studyTitle"] if collection == "study" else content["title"] - ) - except (TypeError, KeyError): - patch_params["title"] = "" - patch = await self.prepare_folder_patch(collection, ids, patch_params) + ids = [dict(data, **{"title": title})] folder_op = FolderOperator(db_client) - folder_id = await folder_op.update_folder(folder_id, patch) + patch = await self.prepare_folder_patch_new_object(collection, ids, patch_params) + await folder_op.update_folder(folder_id, patch) body = ujson.dumps(data, escape_forward_slashes=False) @@ -306,8 +298,8 @@ async def patch_object(self, req: Request) -> Response: LOG.info(f"PATCH object with accession ID {accession_id} in schema {collection} was successful.") return web.Response(body=body, status=200, content_type="application/json") - async def prepare_folder_patch(self, schema: str, ids: List, params: Dict[str, str]) -> List: - """Prepare patch operations list. + async def prepare_folder_patch_new_object(self, schema: str, ids: List, params: Dict[str, str]) -> List: + """Prepare patch operations list for adding an object or objects to a folder. :param schema: schema of objects to be added to the folder :param ids: object IDs @@ -335,13 +327,11 @@ async def prepare_folder_patch(self, schema: str, ids: List, params: Dict[str, s "schema": schema, "tags": { "submissionType": submission_type, - "displayTitle": params["title"], + "displayTitle": id["title"], }, }, } - if submission_type != "Form": - patch_ops["value"]["tags"]["fileName"] = params["title"] + patch_ops["value"]["tags"]["fileName"] = params["filename"] patch.append(patch_ops) - return patch diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 669e8551c..9dc7e8ee4 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -39,7 +39,7 @@ def __init__(self, db_name: str, content_type: str, db_client: AsyncIOMotorClien self.db_service = DBService(db_name, db_client) self.content_type = content_type - async def create_metadata_object(self, schema_type: str, data: Union[Dict, str]) -> str: + async def create_metadata_object(self, schema_type: str, data: Union[Dict, str]) -> Tuple[str, str]: """Create new metadata object to database. Data formatting and addition step for JSON or XML must be implemented @@ -49,9 +49,9 @@ async def create_metadata_object(self, schema_type: str, data: Union[Dict, str]) :param data: Data to be saved to database. :returns: Accession id for the object inserted to database """ - accession_id = await self._format_data_to_create_and_add_to_db(schema_type, data) + accession_id, title = await self._format_data_to_create_and_add_to_db(schema_type, data) LOG.info(f"Inserting object with schema {schema_type} to database succeeded with accession id: {accession_id}") - return accession_id + return accession_id, title async def replace_metadata_object(self, schema_type: str, accession_id: str, data: Union[Dict, str]) -> str: """Replace metadata object from database. @@ -127,7 +127,7 @@ async def delete_metadata_object(self, schema_type: str, accession_id: str) -> s LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - async def _insert_formatted_object_to_db(self, schema_type: str, data: Dict) -> str: + async def _insert_formatted_object_to_db(self, schema_type: str, data: Dict) -> Tuple[str, str]: """Insert formatted metadata object to database. :param schema_type: Schema type of the object to insert. @@ -142,7 +142,11 @@ async def _insert_formatted_object_to_db(self, schema_type: str, data: Dict) -> LOG.error(reason) raise web.HTTPBadRequest(reason=reason) if insert_success: - return data["accessionId"] + try: + title = data["descriptor"]["studyTitle"] if schema_type == "study" else data["title"] + except (TypeError, KeyError): + title = "" + return data["accessionId"], title else: reason = "Inserting object to database failed for some reason." LOG.error(reason) @@ -249,7 +253,7 @@ async def check_exists(self, schema_type: str, accession_id: str) -> None: raise web.HTTPNotFound(reason=reason) @abstractmethod - async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Any) -> str: + async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Any) -> Tuple[str, str]: """Format and add data to database. Must be implemented by subclass. @@ -380,7 +384,7 @@ async def query_metadata_database( ) return data, page_num, page_size, total_objects[0]["total"] - async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Dict) -> str: + async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Dict) -> Tuple[str, str]: """Format JSON metadata object and add it to db. Adds necessary additional information to object before adding to db. @@ -513,7 +517,7 @@ def __init__(self, db_client: AsyncIOMotorClient) -> None: """ super().__init__(mongo_database, "text/xml", db_client) - async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: str) -> str: + async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: str) -> Tuple[str, str]: """Format XML metadata object and add it to db. XML is validated, then parsed to JSON, which is added to database. @@ -527,10 +531,10 @@ async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: str # remove `draft-` from schema type schema = schema_type[6:] if schema_type.startswith("draft") else schema_type data_as_json = XMLToJSONParser().parse(schema, data) - accession_id = await Operator(db_client)._format_data_to_create_and_add_to_db(schema_type, data_as_json) + accession_id, title = await Operator(db_client)._format_data_to_create_and_add_to_db(schema_type, data_as_json) LOG.debug(f"XMLOperator formatted data for xml-{schema_type} to add to DB") return await self._insert_formatted_object_to_db( - f"xml-{schema_type}", {"accessionId": accession_id, "content": data} + f"xml-{schema_type}", {"accessionId": accession_id, "title": title, "content": data} ) async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accession_id: str, data: str) -> str: diff --git a/metadata_backend/helpers/schema_loader.py b/metadata_backend/helpers/schema_loader.py index a30a6ee25..42f9492a7 100644 --- a/metadata_backend/helpers/schema_loader.py +++ b/metadata_backend/helpers/schema_loader.py @@ -42,6 +42,7 @@ def _identify_file(self, schema_type: str) -> Path: for file in [x for x in self.path.iterdir()]: if schema_type in file.name and file.name.endswith(self.loader_type): schema_file = file + break if not schema_file: raise SchemaNotFoundException diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 0f6244a66..994a7f0aa 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -1,7 +1,7 @@ """Test API endpoints from handlers module.""" from pathlib import Path -from unittest.mock import patch, call +from unittest.mock import call, patch from aiohttp import FormData from aiohttp.test_utils import AioHTTPTestCase, make_mocked_coro @@ -153,7 +153,7 @@ async def fake_xmloperator_read_metadata_object(self, schema_type, accession_id) async def fake_xmloperator_create_metadata_object(self, schema_type, content): """Fake create operation to return mocked accessionId.""" - return self.test_ega_string + return self.test_ega_string, "title" async def fake_xmloperator_replace_metadata_object(self, schema_type, accession_id, content): """Fake replace operation to return mocked accessionId.""" @@ -161,7 +161,7 @@ async def fake_xmloperator_replace_metadata_object(self, schema_type, accession_ async def fake_operator_create_metadata_object(self, schema_type, content): """Fake create operation to return mocked accessionId.""" - return self.test_ega_string + return self.test_ega_string, "title" async def fake_operator_update_metadata_object(self, schema_type, accession_id, content): """Fake update operation to return mocked accessionId.""" @@ -443,6 +443,7 @@ async def test_post_object_works_with_csv(self): file_content = self.get_file_data("sample", "EGAformat.csv") self.MockedCSVParser().parse.return_value = [{}, {}, {}] response = await self.client.post("/objects/sample", params={"folder": "some id"}, data=data) + print("=== RESP ===", await response.text()) json_resp = await response.json() self.assertEqual(response.status, 201) self.assertEqual(self.test_ega_string, json_resp[0]["accessionId"]) @@ -549,7 +550,6 @@ async def test_submit_object_fails_with_too_many_files(self): self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) - # handle_check_ownedby_user.return_value = True async def test_get_object(self): """Test that accessionId returns correct JSON object.""" url = f"/objects/study/{self.query_accessionId}" diff --git a/tests/test_operators.py b/tests/test_operators.py index 20ca13851..773f615ac 100644 --- a/tests/test_operators.py +++ b/tests/test_operators.py @@ -2,22 +2,15 @@ import datetime import re import unittest +from unittest import IsolatedAsyncioTestCase +from unittest.mock import MagicMock, call, patch from uuid import uuid4 -from unittest.mock import MagicMock, patch, call from aiohttp.web import HTTPBadRequest, HTTPNotFound, HTTPUnprocessableEntity -from unittest import IsolatedAsyncioTestCase - +from metadata_backend.api.operators import FolderOperator, Operator, UserOperator, XMLOperator from multidict import MultiDict, MultiDictProxy from pymongo.errors import ConnectionFailure -from metadata_backend.api.operators import ( - FolderOperator, - Operator, - XMLOperator, - UserOperator, -) - class AsyncIterator: """Async iterator based on range.""" @@ -191,7 +184,7 @@ async def test_json_create_passes_and_returns_accessionId(self): "descriptor": {"studyTitle": "Highly", "studyType": "Other"}, } operator.db_service.create.return_value = True - accession = await operator.create_metadata_object("study", data) + accession, _ = await operator.create_metadata_object("study", data) operator.db_service.create.assert_called_once() self.assertEqual(accession, self.accession_id) @@ -264,10 +257,10 @@ async def test_xml_create_passes_and_returns_accessionId(self): operator.db_service.create.return_value = True with patch( ("metadata_backend.api.operators.Operator._format_data_to_create_and_add_to_db"), - return_value=self.accession_id, + return_value=(self.accession_id, "title"), ): with patch("metadata_backend.api.operators.XMLToJSONParser"): - accession = await operator.create_metadata_object("study", "") + accession, _ = await operator.create_metadata_object("study", "") operator.db_service.create.assert_called_once() self.assertEqual(accession, self.accession_id) @@ -375,7 +368,7 @@ async def test_correct_data_is_set_to_xml_when_creating(self): xml_data = "" with patch( ("metadata_backend.api.operators.Operator._format_data_to_create_and_add_to_db"), - return_value=self.accession_id, + return_value=(self.accession_id, "title"), ): with patch( ("metadata_backend.api.operators.XMLOperator._insert_formatted_object_to_db"), @@ -384,7 +377,7 @@ async def test_correct_data_is_set_to_xml_when_creating(self): with patch("metadata_backend.api.operators.XMLToJSONParser"): acc = await (operator._format_data_to_create_and_add_to_db("study", xml_data)) m_insert.assert_called_once_with( - "xml-study", {"accessionId": self.accession_id, "content": xml_data} + "xml-study", {"accessionId": self.accession_id, "title": "title", "content": xml_data} ) self.assertEqual(acc, self.accession_id) From 8c23e050b215bc09e10116bd035cd25f2b241ec0 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 9 Feb 2022 08:29:11 +0000 Subject: [PATCH 203/336] Add CSV to submission type check --- metadata_backend/api/handlers/folder.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/metadata_backend/api/handlers/folder.py b/metadata_backend/api/handlers/folder.py index cc5910fc9..ffabad1c6 100644 --- a/metadata_backend/api/handlers/folder.py +++ b/metadata_backend/api/handlers/folder.py @@ -14,9 +14,9 @@ from ...helpers.doi import DOIHandler from ...helpers.logger import LOG from ...helpers.validator import JSONValidator -from .restapi import RESTAPIHandler from ..middlewares import get_session from ..operators import FolderOperator, Operator, UserOperator +from .restapi import RESTAPIHandler class FolderAPIHandler(RESTAPIHandler): @@ -26,7 +26,7 @@ def _check_patch_folder(self, patch_ops: Any) -> None: """Check patch operations in request are valid. We check that ``metadataObjects`` and ``drafts`` have ``_required_values``. - For tags we check that the ``submissionType`` takes either ``XML`` or + For tags we check that the ``submissionType`` takes either ``CSV``, ``XML`` or ``Form`` as values. :param patch_ops: JSON patch request :raises: HTTPBadRequest if request does not fullfil one of requirements @@ -41,8 +41,12 @@ def _check_patch_folder(self, patch_ops: Any) -> None: for op in patch_ops: if _tags.match(op["path"]): LOG.info(f"{op['op']} on tags in folder") - if "submissionType" in op["value"].keys() and op["value"]["submissionType"] not in ["XML", "Form"]: - reason = "submissionType is restricted to either 'XML' or 'Form' values." + if "submissionType" in op["value"].keys() and op["value"]["submissionType"] not in [ + "XML", + "CSV", + "Form", + ]: + reason = "submissionType is restricted to either 'CSV', 'XML' or 'Form' values." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) pass From 4c23156f28fec18c6a6cabcaed6fd33c3d67b937 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 9 Feb 2022 15:10:00 +0000 Subject: [PATCH 204/336] Add patching of folder after object update and replace --- metadata_backend/api/handlers/object.py | 59 +++++++++++++++++++-- metadata_backend/api/handlers/submission.py | 3 +- metadata_backend/api/handlers/template.py | 4 +- metadata_backend/api/operators.py | 32 +++++++---- metadata_backend/helpers/parser.py | 6 ++- tests/integration/run_tests.py | 24 +++++++-- tests/test_handlers.py | 4 +- tests/test_operators.py | 4 +- 8 files changed, 110 insertions(+), 26 deletions(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 9905fed04..5d521e028 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -235,8 +235,9 @@ async def put_object(self, req: Request) -> Response: db_client = req.app["db_client"] content: Union[Dict, str] operator: Union[Operator, XMLOperator] + filename = "" if req.content_type == "multipart/form-data": - files, _, _ = await multipart_content(req, extract_one=True, expect_xml=True) + files, _, filename = await multipart_content(req, extract_one=True, expect_xml=True) content, _ = files[0] operator = XMLOperator(db_client) else: @@ -250,8 +251,17 @@ async def put_object(self, req: Request) -> Response: await operator.check_exists(collection, accession_id) await self._handle_check_ownedby_user(req, collection, accession_id) + folder_op = FolderOperator(db_client) + exists, folder_id, published = await folder_op.check_object_in_folder(collection, accession_id) + if exists: + if published: + reason = "Published objects cannot be updated." + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) - accession_id = await operator.replace_metadata_object(collection, accession_id, content) + accession_id, title = await operator.replace_metadata_object(collection, accession_id, content) + patch = await self.prepare_folder_patch_update_object(collection, accession_id, title, filename) + await folder_op.update_folder(folder_id, patch) body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) LOG.info(f"PUT object with accession ID {accession_id} in schema {collection} was successful.") @@ -285,7 +295,7 @@ async def patch_object(self, req: Request) -> Response: await self._handle_check_ownedby_user(req, collection, accession_id) folder_op = FolderOperator(db_client) - exists, _, published = await folder_op.check_object_in_folder(collection, accession_id) + exists, folder_id, published = await folder_op.check_object_in_folder(collection, accession_id) if exists: if published: reason = "Published objects cannot be updated." @@ -294,6 +304,14 @@ async def patch_object(self, req: Request) -> Response: accession_id = await operator.update_metadata_object(collection, accession_id, content) + # If there's changed title it will be updated to folder + try: + title = content["descriptor"]["studyTitle"] if collection == "study" else content["title"] + patch = await self.prepare_folder_patch_update_object(collection, accession_id, title) + await folder_op.update_folder(folder_id, patch) + except (TypeError, KeyError): + pass + body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) LOG.info(f"PATCH object with accession ID {accession_id} in schema {collection} was successful.") return web.Response(body=body, status=200, content_type="application/json") @@ -335,3 +353,38 @@ async def prepare_folder_patch_new_object(self, schema: str, ids: List, params: patch_ops["value"]["tags"]["fileName"] = params["filename"] patch.append(patch_ops) return patch + + async def prepare_folder_patch_update_object( + self, schema: str, accession_id: str, title: str, filename: str = "" + ) -> List: + """Prepare patch operation for updating object's title in a folder. + + :param schema: schema of object to be updated + :param accession_id: object ID + :param title: title to be updated + :returns: dict with patch operation + """ + if schema.startswith("draft"): + path = "/drafts" + else: + path = "/metadataObjects" + + patch_op = { + "op": "replace", + "match": {path.replace("/", ""): {"$elemMatch": {"schema": schema, "accessionId": accession_id}}}, + } + if not filename: + patch_op.update( + { + "path": f"{path}/$/tags/displayTitle", + "value": title, + } + ) + else: + patch_op.update( + { + "path": f"{path}/$/tags", + "value": {"submissionType": "XML", "fileName": filename, "displayTitle": title}, + } + ) + return [patch_op] diff --git a/metadata_backend/api/handlers/submission.py b/metadata_backend/api/handlers/submission.py index c9c2176e8..e8386ca50 100644 --- a/metadata_backend/api/handlers/submission.py +++ b/metadata_backend/api/handlers/submission.py @@ -128,8 +128,9 @@ async def _execute_action(self, schema: str, content: str, db_client: AsyncIOMot :returns: Dict containing specific action that was completed """ if action == "add": + assession_id, _ = await XMLOperator(db_client).create_metadata_object(schema, content) result = { - "accessionId": await XMLOperator(db_client).create_metadata_object(schema, content), + "accessionId": assession_id, "schema": schema, } LOG.debug(f"added some content in {schema} ...") diff --git a/metadata_backend/api/handlers/template.py b/metadata_backend/api/handlers/template.py index c2bccc2cc..105b5b74e 100644 --- a/metadata_backend/api/handlers/template.py +++ b/metadata_backend/api/handlers/template.py @@ -69,7 +69,7 @@ async def post_template(self, req: Request) -> Response: reason = f"template key is missing from request body for element: {num}." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - accession_id = await operator.create_metadata_object(collection, tmpl["template"]) + accession_id, _ = await operator.create_metadata_object(collection, tmpl["template"]) data = [{"accessionId": accession_id, "schema": collection}] if "tags" in tmpl: data[0]["tags"] = tmpl["tags"] @@ -82,7 +82,7 @@ async def post_template(self, req: Request) -> Response: reason = "template key is missing from request body." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - accession_id = await operator.create_metadata_object(collection, content["template"]) + accession_id, _ = await operator.create_metadata_object(collection, content["template"]) data = [{"accessionId": accession_id, "schema": collection}] if "tags" in content: data[0]["tags"] = content["tags"] diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 9dc7e8ee4..3a434cdca 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -53,7 +53,9 @@ async def create_metadata_object(self, schema_type: str, data: Union[Dict, str]) LOG.info(f"Inserting object with schema {schema_type} to database succeeded with accession id: {accession_id}") return accession_id, title - async def replace_metadata_object(self, schema_type: str, accession_id: str, data: Union[Dict, str]) -> str: + async def replace_metadata_object( + self, schema_type: str, accession_id: str, data: Union[Dict, str] + ) -> Tuple[str, str]: """Replace metadata object from database. Data formatting and addition step for JSON or XML must be implemented @@ -64,9 +66,9 @@ async def replace_metadata_object(self, schema_type: str, accession_id: str, dat :param data: Data to be saved to database. :returns: Accession id for the object replaced to database """ - await self._format_data_to_replace_and_add_to_db(schema_type, accession_id, data) + accession_id, title = await self._format_data_to_replace_and_add_to_db(schema_type, accession_id, data) LOG.info(f"Replacing object with schema {schema_type} to database succeeded with accession id: {accession_id}") - return accession_id + return accession_id, title async def update_metadata_object(self, schema_type: str, accession_id: str, data: Union[Dict, str]) -> str: """Update metadata object from database. @@ -143,7 +145,7 @@ async def _insert_formatted_object_to_db(self, schema_type: str, data: Dict) -> raise web.HTTPBadRequest(reason=reason) if insert_success: try: - title = data["descriptor"]["studyTitle"] if schema_type == "study" else data["title"] + title = data["descriptor"]["studyTitle"] if schema_type in ["study", "draft-study"] else data["title"] except (TypeError, KeyError): title = "" return data["accessionId"], title @@ -152,7 +154,7 @@ async def _insert_formatted_object_to_db(self, schema_type: str, data: Dict) -> LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - async def _replace_object_from_db(self, schema_type: str, accession_id: str, data: Dict) -> str: + async def _replace_object_from_db(self, schema_type: str, accession_id: str, data: Dict) -> Tuple[str, str]: """Replace formatted metadata object in database. :param schema_type: Schema type of the object to replace. @@ -173,7 +175,11 @@ async def _replace_object_from_db(self, schema_type: str, accession_id: str, dat LOG.error(reason) raise web.HTTPBadRequest(reason=reason) if replace_success: - return accession_id + try: + title = data["descriptor"]["studyTitle"] if schema_type in ["study", "draft-study"] else data["title"] + except (TypeError, KeyError): + title = "" + return accession_id, title else: reason = "Replacing object to database failed for some reason." LOG.error(reason) @@ -260,7 +266,9 @@ async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Any """ @abstractmethod - async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accession_id: str, data: Any) -> str: + async def _format_data_to_replace_and_add_to_db( + self, schema_type: str, accession_id: str, data: Any + ) -> Tuple[str, str]: """Format and replace data in database. Must be implemented by subclass. @@ -406,7 +414,9 @@ async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Dic LOG.debug(f"Operator formatted data for {schema_type} to add to DB.") return await self._insert_formatted_object_to_db(schema_type, data) - async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accession_id: str, data: Dict) -> str: + async def _format_data_to_replace_and_add_to_db( + self, schema_type: str, accession_id: str, data: Dict + ) -> Tuple[str, str]: """Format JSON metadata object and replace it in db. Replace information in object before adding to db. @@ -537,7 +547,9 @@ async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: str f"xml-{schema_type}", {"accessionId": accession_id, "title": title, "content": data} ) - async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accession_id: str, data: str) -> str: + async def _format_data_to_replace_and_add_to_db( + self, schema_type: str, accession_id: str, data: str + ) -> Tuple[str, str]: """Format XML metadata object and add it to db. XML is validated, then parsed to JSON, which is added to database. @@ -552,7 +564,7 @@ async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accessio # remove `draft-` from schema type schema = schema_type[6:] if schema_type.startswith("draft") else schema_type data_as_json = XMLToJSONParser().parse(schema, data) - accession_id = await Operator(db_client)._format_data_to_replace_and_add_to_db( + accession_id, title = await Operator(db_client)._format_data_to_replace_and_add_to_db( schema_type, accession_id, data_as_json ) LOG.debug(f"XMLOperator formatted data for xml-{schema_type} to add to DB") diff --git a/metadata_backend/helpers/parser.py b/metadata_backend/helpers/parser.py index 2cd415bdd..c0fa09841 100644 --- a/metadata_backend/helpers/parser.py +++ b/metadata_backend/helpers/parser.py @@ -1,17 +1,17 @@ """Tool to parse XML and CSV files to JSON.""" -import re import csv +import re from io import StringIO from typing import Any, Dict, List, Optional, Type, Union from aiohttp import web +from pymongo import UpdateOne from xmlschema import XMLSchema, XMLSchemaConverter, XMLSchemaException, XsdElement, XsdType from .logger import LOG from .schema_loader import SchemaNotFoundException, XMLSchemaLoader from .validator import JSONValidator, XMLValidator -from pymongo import UpdateOne class MetadataXMLConverter(XMLSchemaConverter): @@ -457,6 +457,8 @@ def jsonpatch_mongo(identifier: Dict, json_patch: List[Dict[str, Any]]) -> List: queries.append(UpdateOne(identifier, {"$set": {op["path"][1:].replace("/", "."): op["value"]}})) elif op["op"] == "replace": path = op["path"][1:-2] if op["path"].endswith("/-") else op["path"][1:].replace("/", ".") + if op.get("match", None): + identifier.update(op["match"]) queries.append(UpdateOne(identifier, {"$set": {path: op["value"]}})) return queries diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index ba0b82bc7..b1607f85b 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -773,7 +773,11 @@ async def test_crud_folders_works(sess): { "accessionId": draft_id, "schema": "draft-sample", - "tags": {"submissionType": "XML", "displayTitle": "SRS001433.xml", "fileName": "SRS001433.xml"}, + "tags": { + "submissionType": "XML", + "displayTitle": "HapMap sample from Homo sapiens", + "fileName": "SRS001433.xml", + }, } ], "folder drafts content mismatch" assert res["metadataObjects"] == [], "there are objects in folder, expected empty" @@ -797,7 +801,11 @@ async def test_crud_folders_works(sess): { "accessionId": draft_id, "schema": "draft-sample", - "tags": {"submissionType": "XML", "displayTitle": "SRS001433.xml", "fileName": "SRS001433.xml"}, + "tags": { + "submissionType": "XML", + "displayTitle": "HapMap sample from Homo sapiens", + "fileName": "SRS001433.xml", + }, } ], "folder drafts content mismatch" assert res["metadataObjects"] == [ @@ -862,7 +870,11 @@ async def test_crud_folders_works_no_publish(sess): { "accessionId": draft_id, "schema": "draft-sample", - "tags": {"submissionType": "XML", "displayTitle": "SRS001433.xml", "fileName": "SRS001433.xml"}, + "tags": { + "submissionType": "XML", + "displayTitle": "HapMap sample from Homo sapiens", + "fileName": "SRS001433.xml", + }, } ], "folder drafts content mismatch" assert res["metadataObjects"] == [], "there are objects in folder, expected empty" @@ -885,7 +897,11 @@ async def test_crud_folders_works_no_publish(sess): { "accessionId": draft_id, "schema": "draft-sample", - "tags": {"submissionType": "XML", "displayTitle": "SRS001433.xml", "fileName": "SRS001433.xml"}, + "tags": { + "submissionType": "XML", + "displayTitle": "HapMap sample from Homo sapiens", + "fileName": "SRS001433.xml", + }, } ], "folder drafts content mismatch" assert res["metadataObjects"] == [ diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 994a7f0aa..90a3028b7 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -157,7 +157,7 @@ async def fake_xmloperator_create_metadata_object(self, schema_type, content): async def fake_xmloperator_replace_metadata_object(self, schema_type, accession_id, content): """Fake replace operation to return mocked accessionId.""" - return self.test_ega_string + return self.test_ega_string, "title" async def fake_operator_create_metadata_object(self, schema_type, content): """Fake create operation to return mocked accessionId.""" @@ -169,7 +169,7 @@ async def fake_operator_update_metadata_object(self, schema_type, accession_id, async def fake_operator_replace_metadata_object(self, schema_type, accession_id, content): """Fake replace operation to return mocked accessionId.""" - return self.test_ega_string + return self.test_ega_string, "title" async def fake_operator_delete_metadata_object(self, schema_type, accession_id): """Fake delete operation to await successful operation indicator.""" diff --git a/tests/test_operators.py b/tests/test_operators.py index 773f615ac..aa7296e79 100644 --- a/tests/test_operators.py +++ b/tests/test_operators.py @@ -198,7 +198,7 @@ async def test_json_replace_passes_and_returns_accessionId(self): operator = Operator(self.client) operator.db_service.exists.return_value = True operator.db_service.replace.return_value = True - accession = await operator.replace_metadata_object("study", self.accession_id, data) + accession, _ = await operator.replace_metadata_object("study", self.accession_id, data) operator.db_service.replace.assert_called_once() self.assertEqual(accession, self.accession_id) @@ -388,7 +388,7 @@ async def test_correct_data_is_set_to_xml_when_replacing(self): xml_data = "" with patch( "metadata_backend.api.operators.Operator._format_data_to_replace_and_add_to_db", - return_value=self.accession_id, + return_value=(self.accession_id, "title"), ): with patch( "metadata_backend.api.operators.XMLOperator._replace_object_from_db", From 77d3ddbd440f466ae8173b502097ed3d346bbb7f Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Thu, 10 Feb 2022 14:52:22 +0200 Subject: [PATCH 205/336] Update integration tests with folder check for metadata and draft objects --- tests/integration/run_tests.py | 73 ++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index b1607f85b..24d30fd1c 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -503,6 +503,54 @@ async def delete_user(sess, user_id): assert resp.status == 404, f"HTTP Status code error, got {resp.status}" +def extract_folders_object(res, accession_id, draft): + """Extract object from folder metadataObjects with provided accessionId. + + :param res: JSON parsed responce from folder query request + :param accession_id: accession ID of reviwed object + :returns: dict of object entry in folder + """ + object = "drafts" if draft else "metadataObjects" + actual_res = next(obj for obj in res[object] if obj["accessionId"] == accession_id) + return actual_res + + +async def check_folders_object_patch(sess, folder_id, schema, accession_id, title, filename, draft=False): + """Check that draft is added correctly to folder. + + Get draft or metadata object from the folder and assert with data + returned from object endpoint itself. + + :param sess: HTTP session in which request call is made + :param folder_id: id of the folder + :param schema: name of the schema (folder) used for testing + :param accession_id: accession ID of reviwed object + :param title: title of reviwed object + :param filename: name of the file used for inserting data + :param draft: indication of object draft status, default False + """ + sub_type = "Form" if filename.split(".")[-1] == "json" else filename.split(".")[-1].upper() + async with sess.get(f"{folders_url}/{folder_id}") as resp: + res = await resp.json() + try: + actual = extract_folders_object(res, accession_id, draft) + expected = { + "accessionId": accession_id, + "schema": schema if not draft else f"draft-{schema}", + "tags": { + "submissionType": sub_type, + "displayTitle": title, + "fileName": filename, + }, + } + if sub_type == "Form": + del expected["tags"]["fileName"] + assert actual == expected, "actual end expected data did not match" + except StopIteration: + pass + return schema + + # === Integration tests === async def test_crud_works(sess, schema, filename, folder_id): """Test REST api POST, GET and DELETE reqs. @@ -520,6 +568,9 @@ async def test_crud_works(sess, schema, filename, folder_id): async with sess.get(f"{objects_url}/{schema}/{accession_id[0]}") as resp: LOG.debug(f"Checking that {accession_id[0]} JSON is in {schema}") assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + res = await resp.json() + title = res["descriptor"].get("studyTitle", "") if schema == "study" else res.get("title", "") + await check_folders_object_patch(sess, folder_id, schema, accession_id[0], title, filename) async with sess.get(f"{objects_url}/{schema}/{accession_id[0]}?format=xml") as resp: LOG.debug(f"Checking that {accession_id[0]} XML is in {schema}") assert resp.status == 200, f"HTTP Status code error, got {resp.status}" @@ -560,6 +611,9 @@ async def test_csv(sess, folder_id): async with sess.get(f"{objects_url}/{_schema}/{_first_csv_row_id}") as resp: LOG.debug(f"Checking that {_first_csv_row_id} JSON is in {_schema}") assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + res = await resp.json() + title = res.get("title", "") + await check_folders_object_patch(sess, folder_id, _schema, accession_id, title, _filename) await delete_object(sess, _schema, _first_csv_row_id) async with sess.get(f"{objects_url}/{_schema}/{_first_csv_row_id}") as resp: @@ -595,6 +649,14 @@ async def test_put_objects(sess, folder_id): accession_id = await post_object(sess, "study", folder_id, "SRP000539.xml") await put_object_json(sess, "study", accession_id[0], "SRP000539.json") await put_object_xml(sess, "study", accession_id[0], "SRP000539_put.xml") + await check_folders_object_patch( + sess, + folder_id, + "study", + accession_id, + "Highly integrated epigenome maps in Arabidopsis - whole genome shotgun bisulfite sequencing", + "SRP000539_put.xml", + ) async def test_crud_drafts_works(sess, schema, orginal_file, update_file, folder_id): @@ -611,10 +673,19 @@ async def test_crud_drafts_works(sess, schema, orginal_file, update_file, folder :param folder_id: id of the folder used to group submission objects """ draft_id = await post_draft_json(sess, schema, folder_id, orginal_file) + async with sess.get(f"{drafts_url}/{schema}/{draft_id}") as resp: + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + res = await resp.json() + title = res["descriptor"]["studyTitle"] if schema == "study" else res.get("title", "") + await check_folders_object_patch(sess, folder_id, draft_id, schema, title, orginal_file, draft=True) + accession_id = await put_draft(sess, schema, draft_id, update_file) async with sess.get(f"{drafts_url}/{schema}/{accession_id}") as resp: LOG.debug(f"Checking that {accession_id} JSON is in {schema}") assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + res = await resp.json() + title = res["descriptor"]["studyTitle"] if schema == "study" else res.get("title", "") + await check_folders_object_patch(sess, folder_id, schema, accession_id, title, update_file, draft=True) await delete_draft(sess, schema, accession_id) async with sess.get(f"{drafts_url}/{schema}/{accession_id}") as resp: @@ -646,9 +717,11 @@ async def test_patch_drafts_works(sess, schema, orginal_file, update_file, folde async with sess.get(f"{drafts_url}/{schema}/{accession_id}") as resp: LOG.debug(f"Checking that {accession_id} JSON is in {schema}") res = await resp.json() + title = res["descriptor"]["studyTitle"] if schema == "study" else res.get("title", None) assert res["centerName"] == "GEOM", "object centerName content mismatch" assert res["alias"] == "GSE10968", "object alias content mismatch" assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + await check_folders_object_patch(sess, folder_id, schema, accession_id, title, update_file, draft=True) await delete_draft(sess, schema, accession_id) async with sess.get(f"{drafts_url}/{schema}/{accession_id}") as resp: From f17496a7e94f0ba6c18ff2c6f6ad133bac3838e3 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Fri, 11 Feb 2022 17:20:15 +0000 Subject: [PATCH 206/336] Add 'CSV' as accepted submission type Update spelling wordlist. --- .github/config/.wordlist.txt | 3 ++- metadata_backend/helpers/schemas/folders.json | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/config/.wordlist.txt b/.github/config/.wordlist.txt index 69cc2a43f..690da339c 100644 --- a/.github/config/.wordlist.txt +++ b/.github/config/.wordlist.txt @@ -115,6 +115,7 @@ cscfi cscusername csi csrf +csv ctrl cts curation @@ -651,4 +652,4 @@ xsd yaml yml za -zhuang \ No newline at end of file +zhuang diff --git a/metadata_backend/helpers/schemas/folders.json b/metadata_backend/helpers/schemas/folders.json index a95844d62..62fae4501 100644 --- a/metadata_backend/helpers/schemas/folders.json +++ b/metadata_backend/helpers/schemas/folders.json @@ -998,6 +998,7 @@ "title": "Type of submission", "enum": [ "XML", + "CSV", "Form" ] } @@ -1036,6 +1037,7 @@ "title": "Type of submission", "enum": [ "XML", + "CSV", "Form" ] } @@ -1047,4 +1049,4 @@ } }, "additionalProperties": false -} \ No newline at end of file +} From 48fb0950191d361ddf75612aa0dc732b4842c783 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Mon, 14 Feb 2022 07:30:04 +0000 Subject: [PATCH 207/336] Update changelog --- CHANGELOG.md | 126 +++++++++++++++++++++++++-------------------------- 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 14057a1c1..060510f8c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,9 +9,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- templates API #256 +- Add patching of folders after object save and update operations + - Object is added or updated to folder(submission) where it belongs with it's accession ID, schema, submission type, title and filename in the case of CSV and XML upload + - Adds configuration for mypy linting to VScode devcontainer setup +- Templates API #256 - use `ujson` as default json library -- creating draft Datacite DOI for folders #257 +- Creating draft Datacite DOI for folders #257 - created a mock web app, which would act similarly to DataCite REST API - altered `publish_folder` endpoint so that `extraInfo` containing the DOI data is added upon publishing - added `datePublished` key to folders which takes in the date/time, when folder is published @@ -26,7 +29,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - update github actions - Add folder querying by date #308 - Add description to JSON schemas #323 - - add JSON schema spelling checker to pyspelling github action - optimise wordlist by adding regex ignore patterns - added pyspelling to pre-commit hooks (fixed syntax for scripts according to https://github.com/koalaman/shellcheck ) @@ -41,22 +43,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - New mandatory ENV `OIDC_URL` - New optional ENVs `OIDC_SCOPE`, `AUTH_METHOD` - Added oidcrp dependency -- use node 16+ #345 +- Use node 16+ #345 - VScode Dev environment #287 - Adds requirements-dev.in/txt files. Now pip dependencies can be managed with pip-tools - README updated with tox command, development build instructions, and prettify Dockerfile. -- update ENA XML and JSON schemas #299 +- Update ENA XML and JSON schemas #299 - Github actions changed the use of https://git.io/misspell to rojopolis/spellcheck-github-actions #316 - Separated most of the handlers to own files inside the handlers folder #319 ### Fixed -- coveralls report #267 -- typos for functions and tests #279 -- fix spelling mistakes for JSON schemas #323 -- oidcrp does not allow empty values, prefill them in mockauth so front-end can start #333 +- Coveralls report #267 +- Typos for functions and tests #279 +- Fix spelling mistakes for JSON schemas #323 +- Oidcrp does not allow empty values, prefill them in mockauth so front-end can start #333 - Fix development environment #336 - - Add env vars OIDC_URL and OIDC_URL_TEST to mock auth container - Adds logging configs for mock auth - Updates mock auth api's token endpoint with expiration configs @@ -76,7 +77,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed -- package updates +- Package updates ### Added @@ -88,72 +89,71 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- add integration tests for misses in dataset, experiment, policy +- Add integration tests for misses in dataset, experiment, policy ### Changed -- package updates +- Package updates - EGA XML schemas version:1.8.0 -- refactor analysis and experiment schemas to adhere to XML schema +- Refactor analysis and experiment schemas to adhere to XML schema ### Fixed -- fix misses for DAC, experiment and policy processing of XML -- fix misses in JSON Schema +- Fix misses for DAC, experiment and policy processing of XML +- Fix misses in JSON Schema ## [0.9.0] - 2021-03-22 ### Added -- use dependabot -- support simultaneous sessions +- Use dependabot +- Support simultaneous sessions ### Changed - Refactor JSON schema Links -- refactor handlers to be more streamlined -- validate patch requests for JSON content -- switch to python 3.8 +- Refactor handlers to be more streamlined +- Validate patch requests for JSON content +- Switch to python 3.8 ## [0.8.1] - 2021-02-15 ### Fixed -- bugfix for error pages #202 +- Bugfix for error pages #202 ## [0.8.0] - 2021-02-12 ### Added - TLS support -- use `sub` as alternative to `eppn` to identify users +- Use `sub` as alternative to `eppn` to identify users - `PATCH` for objects and `PUT` for XML objects enabled -- delete folders and objects associated to user on user delete +- Delete folders and objects associated to user on user delete ### Changed -- redirect to error pages -- extended integration tests +- Redirect to error pages +- Extended integration tests ### Fixed -- fix replace on json patch -- general bug and fixes +- Fix replace on json patch +- General bug and fixes ## [0.7.1] - 2021-01-19 ### Fixed -- hotfix release #176 - +- Hotfix release #176 - added check_object_exists to check object exists and fail early with 404 before checking it belongs to user - refactor and added more check_folder_exists to check folder exists before doing anything - integration test to check objects are deleted properly ### Changes -- check objects and folders exist before any operation -- integration check to see if deleted object or folder are still registered in db +- Check objects and folders exist before any operation +- Integration check to see if deleted object or folder are still registered in db ## [0.7.0] - 2021-01-06 @@ -162,7 +162,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - CodeQL github action #162 - `/health` endpoint #173 -- map `users` to `folders` with `_handle_check_ownedby_user` #158 +- Map `users` to `folders` with `_handle_check_ownedby_user` #158 - querying for objects is restricted to only the objects that belong to user - return folders owned by user or published - added a few db operators some used (aggregate, remove) @@ -170,17 +170,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - standardise raises description and general improvements and fixes of logs and descriptions ### Changed -- verify `/publish` endpoint #163 -- restrict endpoints to logged in users #151 -- updated documentation #165 -- switch to using uuids for accession ids #168 -- integration tests and increase unit test coverage #166 +- Verify `/publish` endpoint #163 +- Restrict endpoints to logged in users #151 +- Updated documentation #165 +- Switch to using uuids for accession ids #168 +- Integration tests and increase unit test coverage #166 ### Fixed -- fixes for idp and location headers redirects #150 -- fix race condition in db operations #158 -- fix handling of draft deletion by removing redundant deletion #164, #169 and #172 +- Fixes for idp and location headers redirects #150 +- Fix race condition in db operations #158 +- Fix handling of draft deletion by removing redundant deletion #164, #169 and #172 ## [0.6.1] - 2020-11-23 @@ -190,38 +190,38 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed -- refactor draft `/folder` #144 -- refactor gh actions #140 -- patch publish #141 +- Refactor draft `/folder` #144 +- Refactor gh actions #140 +- Patch publish #141 ### Fixed -- bugfixes for login redirect #139 +- Bugfixes for login redirect #139 ## [0.6.0] - 2020-10-08 ### Added -- authentication with OIDC #133 -- only 3.7 support going further #134 -- more submission actions `ADD` and `MODIFY` #137 +- Authentication with OIDC #133 +- Only 3.7 support going further #134 +- More submission actions `ADD` and `MODIFY` #137 ## [0.5.3] - 2020-08-21 ### Changed -- updated OpenAPI specifications #127 -- python modules, project description and instructions to documentation sources #128 -- added integration tests #129 -- updated documentation #130 +- Updated OpenAPI specifications #127 +- Python modules, project description and instructions to documentation sources #128 +- Added integration tests #129 +- Updated documentation #130 ## [0.5.2] - 2020-08-14 ### Fixes -- fix mimetype for SVG image and package data +- Fix mimetype for SVG image and package data ## [0.5.1] - 2020-08-14 @@ -233,10 +233,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixes - Dockerfile build fixes #115 -- fix JSON Schema details #117 -- missing env from github actions #119 -- typo fixes #120 -- await responses #122 +- Fix JSON Schema details #117 +- Missing env from github actions #119 +- Typo fixes #120 +- Await responses #122 ## [0.5.0] - 2020-08-06 @@ -250,28 +250,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - JSON validation - XML better parsing - Auth middleware -- pagination +- Pagination ### Changed - Improved current naming conventions #82 - Login flow with new routes for Home & Login #76, #79, #80 -- change from pymongo to motor +- Change from pymongo to motor ## [0.2.0] - 2020-07-01 ### Added - Added integration tests -- switched to github actions -- added base docs folder -- added more refined XML parsing +- Switched to github actions +- Added base docs folder +- Added more refined XML parsing - Integration tests added - Refactor unit tests ### Changed -- refactor API endpoints and responses +- Refactor API endpoints and responses - error using https://tools.ietf.org/html/rfc7807 - `objects` and `schemas` endpoints added From b8a48606f9aa58c87c40bd969d5e5ec86e98ada1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Feb 2022 09:06:13 +0000 Subject: [PATCH 208/336] Bump pytest from 7.0.0 to 7.0.1 Bumps [pytest](https://github.com/pytest-dev/pytest) from 7.0.0 to 7.0.1. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/7.0.0...7.0.1) --- updated-dependencies: - dependency-name: pytest dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ef36e8f41..cb4a29078 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ packages=find_packages(exclude=["tests"]), install_requires=requirements, extras_require={ - "test": ["coverage==6.3.1", "pytest==7.0.0", "pytest-cov==3.0.0", "tox==3.24.5"], + "test": ["coverage==6.3.1", "pytest==7.0.1", "pytest-cov==3.0.0", "tox==3.24.5"], "docs": ["sphinx >= 1.4", "sphinx_rtd_theme==1.0.0"], }, package_data={ From 2f414654affb3eab0a3c6fc0711c1a268a048399 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Feb 2022 09:06:54 +0000 Subject: [PATCH 209/336] Bump pip-tools from 6.5.0 to 6.5.1 Bumps [pip-tools](https://github.com/jazzband/pip-tools) from 6.5.0 to 6.5.1. - [Release notes](https://github.com/jazzband/pip-tools/releases) - [Changelog](https://github.com/jazzband/pip-tools/blob/master/CHANGELOG.md) - [Commits](https://github.com/jazzband/pip-tools/compare/6.5.0...6.5.1) --- updated-dependencies: - dependency-name: pip-tools dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements-dev.txt | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 968887641..2556a46a1 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -6,7 +6,7 @@ # aiofiles==0.8.0 # via -r requirements-dev.in -backports.entry-points-selectable==1.1.1 +backports-entry-points-selectable==1.1.1 # via virtualenv black==22.1.0 # via -r requirements-dev.in @@ -44,7 +44,7 @@ pathspec==0.9.0 # via black pep517==0.12.0 # via pip-tools -pip-tools==6.5.0 +pip-tools==6.5.1 # via -r requirements-dev.in platformdirs==2.4.0 # via @@ -80,9 +80,7 @@ tomli==1.2.2 tox==3.24.5 # via -r requirements-dev.in typing-extensions==4.0.0 - # via - # black - # mypy + # via mypy virtualenv==20.10.0 # via # pre-commit From e9b98b383b0d0b7c657a33ad29d4885fa31a03d2 Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Tue, 15 Feb 2022 12:43:01 +0200 Subject: [PATCH 210/336] fix rebase conflicts --- .github/config/.wordlist.txt | 1 + metadata_backend/api/handlers/object.py | 8 ++++++++ tests/test_operators.py | 2 +- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/config/.wordlist.txt b/.github/config/.wordlist.txt index 690da339c..2ec2c9618 100644 --- a/.github/config/.wordlist.txt +++ b/.github/config/.wordlist.txt @@ -463,6 +463,7 @@ primaryid probeset processedreads processingtype +projectId promethion proteinclusters protfam diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index ddd1e4829..80a3c0ba2 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -252,6 +252,14 @@ async def put_object(self, req: Request) -> Response: await self._handle_check_ownership(req, collection, accession_id) + folder_op = FolderOperator(db_client) + exists, folder_id, published = await folder_op.check_object_in_folder(collection, accession_id) + if exists: + if published: + reason = "Published objects cannot be updated." + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + accession_id, title = await operator.replace_metadata_object(collection, accession_id, content) patch = await self.prepare_folder_patch_update_object(collection, accession_id, title, filename) await folder_op.update_folder(folder_id, patch) diff --git a/tests/test_operators.py b/tests/test_operators.py index ee1555b49..722f4d1a1 100644 --- a/tests/test_operators.py +++ b/tests/test_operators.py @@ -8,7 +8,6 @@ from aiohttp.web import HTTPBadRequest, HTTPNotFound, HTTPUnprocessableEntity from aiohttp.test_utils import make_mocked_coro -from unittest import IsolatedAsyncioTestCase from multidict import MultiDict, MultiDictProxy from pymongo.errors import ConnectionFailure, OperationFailure @@ -22,6 +21,7 @@ from .mockups import get_request_with_fernet + class AsyncIterator: """Async iterator based on range.""" From fea7e1a594c96c61ce0bad04daca0bb90ebbd0f0 Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Tue, 15 Feb 2022 12:58:19 +0200 Subject: [PATCH 211/336] deprecate user ownership functions --- metadata_backend/api/handlers/folder.py | 6 -- metadata_backend/api/handlers/template.py | 20 +--- metadata_backend/api/operators.py | 110 +++++++++++----------- tests/test_operators.py | 93 +++++++++--------- 4 files changed, 107 insertions(+), 122 deletions(-) diff --git a/metadata_backend/api/handlers/folder.py b/metadata_backend/api/handlers/folder.py index c52daf10c..6e8277ed2 100644 --- a/metadata_backend/api/handlers/folder.py +++ b/metadata_backend/api/handlers/folder.py @@ -196,8 +196,6 @@ async def post_folder(self, req: Request) -> Response: operator = FolderOperator(db_client) folder = await operator.create_folder(content) - # await user_op.assign_objects(current_user, "folders", [folder]) - body = ujson.dumps({"folderId": folder}, escape_forward_slashes=False) url = f"{req.scheme}://{req.host}{req.path}" @@ -336,9 +334,5 @@ async def delete_folder(self, req: Request) -> Response: _folder_id = await operator.delete_folder(folder_id) - # user_op = UserOperator(db_client) - # current_user = get_session(req)["user_info"] - # await user_op.remove_objects(current_user, "folders", [folder_id]) - LOG.info(f"DELETE folder with ID {_folder_id} was successful.") return web.Response(status=204) diff --git a/metadata_backend/api/handlers/template.py b/metadata_backend/api/handlers/template.py index e0cb97b67..b623d756b 100644 --- a/metadata_backend/api/handlers/template.py +++ b/metadata_backend/api/handlers/template.py @@ -87,10 +87,6 @@ async def post_template(self, req: Request) -> Response: LOG.error(reason) raise web.HTTPBadRequest(reason=reason) accession_id, _ = await operator.create_metadata_object(collection, tmpl["template"]) - data = [{"accessionId": accession_id, "schema": collection}] - if "tags" in tmpl: - data[0]["tags"] = tmpl["tags"] - # await user_op.assign_objects(current_user, "templates", data) tmpl_list.append({"accessionId": accession_id}) body = ujson.dumps(tmpl_list, escape_forward_slashes=False) @@ -99,11 +95,9 @@ async def post_template(self, req: Request) -> Response: reason = "template key is missing from request body." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) + # Move projectId to template key, so that it is saved in mongo + content["template"]["projectId"] = content["projectId"] accession_id, _ = await operator.create_metadata_object(collection, content["template"]) - data = [{"accessionId": accession_id, "schema": collection}] - if "tags" in content: - data[0]["tags"] = content["tags"] - # await user_op.assign_objects(current_user, "templates", data) body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) @@ -164,16 +158,6 @@ async def delete_template(self, req: Request) -> Response: await self._handle_check_ownership(req, collection, accession_id) - # user_op = UserOperator(db_client) - # current_user = get_session(req)["user_info"] - # check_user = await user_op.check_user_has_doc(collection, current_user, accession_id) - # if check_user: - # await user_op.remove_objects(current_user, "templates", [accession_id]) - # else: - # reason = "This template does not seem to belong to any user." - # LOG.error(reason) - # raise web.HTTPUnprocessableEntity(reason=reason) - accession_id = await Operator(db_client).delete_metadata_object(collection, accession_id) LOG.info(f"DELETE template with accession ID {accession_id} in schema {collection} was successful.") diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index f71ed61ba..556384d38 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -1096,60 +1096,62 @@ async def update_user(self, user_id: str, patch: List) -> str: LOG.info(f"Updating user with id {user_id} to database succeeded.") return user_id - async def assign_objects(self, user_id: str, collection: str, object_ids: List) -> None: - """Assing object to user. - - An object can be folder(s) or templates(s). - - :param user_id: ID of user to update - :param collection: collection where to remove the id from - :param object_ids: ID or list of IDs of folder(s) to assign - :raises: HTTPBadRequest if assigning templates/folders to user was not successful - returns: None - """ - try: - await self._check_user_exists(user_id) - assign_success = await self.db_service.append( - "user", user_id, {collection: {"$each": object_ids, "$position": 0}} - ) - except (ConnectionFailure, OperationFailure) as error: - reason = f"Error happened while getting user: {error}" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - - if not assign_success: - reason = "Assigning objects to user failed." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - - LOG.info(f"Assigning {object_ids} from {user_id} succeeded.") - - async def remove_objects(self, user_id: str, collection: str, object_ids: List) -> None: - """Remove object from user. - - An object can be folder(s) or template(s). - - :param user_id: ID of user to update - :param collection: collection where to remove the id from - :param object_ids: ID or list of IDs of folder(s) to remove - :raises: HTTPBadRequest if db connection fails - returns: None - """ - remove_content: Dict - try: - await self._check_user_exists(user_id) - for obj in object_ids: - if collection == "templates": - remove_content = {"templates": {"accessionId": obj}} - else: - remove_content = {"folders": obj} - await self.db_service.remove("user", user_id, remove_content) - except (ConnectionFailure, OperationFailure) as error: - reason = f"Error happened while removing objects from user: {error}" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - - LOG.info(f"Removing {object_ids} from {user_id} succeeded.") + # DEPRECATED + # async def assign_objects(self, user_id: str, collection: str, object_ids: List) -> None: + # """Assing object to user. + + # An object can be folder(s) or templates(s). + + # :param user_id: ID of user to update + # :param collection: collection where to remove the id from + # :param object_ids: ID or list of IDs of folder(s) to assign + # :raises: HTTPBadRequest if assigning templates/folders to user was not successful + # returns: None + # """ + # try: + # await self._check_user_exists(user_id) + # assign_success = await self.db_service.append( + # "user", user_id, {collection: {"$each": object_ids, "$position": 0}} + # ) + # except (ConnectionFailure, OperationFailure) as error: + # reason = f"Error happened while getting user: {error}" + # LOG.error(reason) + # raise web.HTTPBadRequest(reason=reason) + + # if not assign_success: + # reason = "Assigning objects to user failed." + # LOG.error(reason) + # raise web.HTTPBadRequest(reason=reason) + + # LOG.info(f"Assigning {object_ids} from {user_id} succeeded.") + + # DEPRECATED + # async def remove_objects(self, user_id: str, collection: str, object_ids: List) -> None: + # """Remove object from user. + + # An object can be folder(s) or template(s). + + # :param user_id: ID of user to update + # :param collection: collection where to remove the id from + # :param object_ids: ID or list of IDs of folder(s) to remove + # :raises: HTTPBadRequest if db connection fails + # returns: None + # """ + # remove_content: Dict + # try: + # await self._check_user_exists(user_id) + # for obj in object_ids: + # if collection == "templates": + # remove_content = {"templates": {"accessionId": obj}} + # else: + # remove_content = {"folders": obj} + # await self.db_service.remove("user", user_id, remove_content) + # except (ConnectionFailure, OperationFailure) as error: + # reason = f"Error happened while removing objects from user: {error}" + # LOG.error(reason) + # raise web.HTTPBadRequest(reason=reason) + + # LOG.info(f"Removing {object_ids} from {user_id} succeeded.") async def delete_user(self, user_id: str) -> str: """Delete user object from database. diff --git a/tests/test_operators.py b/tests/test_operators.py index 722f4d1a1..95adb8bf3 100644 --- a/tests/test_operators.py +++ b/tests/test_operators.py @@ -1079,50 +1079,55 @@ async def test_deleting_user_fails(self): with self.assertRaises(HTTPBadRequest): await operator.delete_user(self.user_id) - async def test_user_objects_remove_passes(self): - """Test remove objects method for users works.""" - operator = UserOperator(self.client) - operator.db_service.exists.return_value = True - operator.db_service.remove.return_value = self.test_user - await operator.remove_objects(self.user_generated_id, "study", ["id"]) - operator.db_service.exists.assert_called_once() - operator.db_service.remove.assert_called_once() - self.assertEqual(len(operator.db_service.remove.mock_calls), 1) - - async def test_user_objects_remove_fails(self): - """Test remove objects method for users fails.""" - operator = UserOperator(self.client) - operator.db_service.exists.return_value = True - operator.db_service.remove.side_effect = ConnectionFailure - with self.assertRaises(HTTPBadRequest): - await operator.remove_objects(self.user_generated_id, "study", ["id"]) - - async def test_user_objects_append_passes(self): - """Test append objects method for users works.""" - operator = UserOperator(self.client) - operator.db_service.exists.return_value = True - operator.db_service.append.return_value = self.test_user - await operator.assign_objects(self.user_generated_id, "study", []) - operator.db_service.exists.assert_called_once() - operator.db_service.append.assert_called_once() - self.assertEqual(len(operator.db_service.append.mock_calls), 1) - - async def test_user_objects_append_on_result_fails(self): - """Test append objects method for users fails on db response validation.""" - operator = UserOperator(self.client) - operator.db_service.exists.return_value = True - operator.db_service.append.return_value = False - with self.assertRaises(HTTPBadRequest): - await operator.assign_objects(self.user_generated_id, "study", []) - operator.db_service.exists.assert_called_once() - operator.db_service.append.assert_called_once() - - async def test_user_objects_assing_fails(self): - """Test append objects method for users fails.""" - operator = UserOperator(self.client) - operator.db_service.exists.side_effect = ConnectionFailure - with self.assertRaises(HTTPBadRequest): - await operator.assign_objects(self.user_generated_id, "study", []) + # DEPRECATED + # async def test_user_objects_remove_passes(self): + # """Test remove objects method for users works.""" + # operator = UserOperator(self.client) + # operator.db_service.exists.return_value = True + # operator.db_service.remove.return_value = self.test_user + # await operator.remove_objects(self.user_generated_id, "study", ["id"]) + # operator.db_service.exists.assert_called_once() + # operator.db_service.remove.assert_called_once() + # self.assertEqual(len(operator.db_service.remove.mock_calls), 1) + + # DEPRECATED + # async def test_user_objects_remove_fails(self): + # """Test remove objects method for users fails.""" + # operator = UserOperator(self.client) + # operator.db_service.exists.return_value = True + # operator.db_service.remove.side_effect = ConnectionFailure + # with self.assertRaises(HTTPBadRequest): + # await operator.remove_objects(self.user_generated_id, "study", ["id"]) + + # DEPRECATED + # async def test_user_objects_append_passes(self): + # """Test append objects method for users works.""" + # operator = UserOperator(self.client) + # operator.db_service.exists.return_value = True + # operator.db_service.append.return_value = self.test_user + # await operator.assign_objects(self.user_generated_id, "study", []) + # operator.db_service.exists.assert_called_once() + # operator.db_service.append.assert_called_once() + # self.assertEqual(len(operator.db_service.append.mock_calls), 1) + + # DEPRECATED + # async def test_user_objects_append_on_result_fails(self): + # """Test append objects method for users fails on db response validation.""" + # operator = UserOperator(self.client) + # operator.db_service.exists.return_value = True + # operator.db_service.append.return_value = False + # with self.assertRaises(HTTPBadRequest): + # await operator.assign_objects(self.user_generated_id, "study", []) + # operator.db_service.exists.assert_called_once() + # operator.db_service.append.assert_called_once() + + # DEPRECATED + # async def test_user_objects_assing_fails(self): + # """Test append objects method for users fails.""" + # operator = UserOperator(self.client) + # operator.db_service.exists.side_effect = ConnectionFailure + # with self.assertRaises(HTTPBadRequest): + # await operator.assign_objects(self.user_generated_id, "study", []) async def test_check_user_has_project_passes(self): """Test check user has project and doesn't raise an exception.""" From 4f4453f76d58298543f3d1f16d166cf41145cac4 Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Tue, 15 Feb 2022 13:35:15 +0200 Subject: [PATCH 212/336] update changelog --- CHANGELOG.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 060510f8c..f6954a2fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,7 +35,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - enum are sorted alphabetically, with the exception of other and unspecified values which are left at the end of the list - allow for accession key in `referenceAlignment` & `process sequence` as array, previously all accession keys were converted to `accessionId` which is not correct - add default `gender` as `unknown` - +- Project ownership #346 --DRAFT + - deprecated `folders` and `templates` keys from `GET /users/current` + - added new collection `project` + - added new key `projects` to `user` + - added new key `projectId` to `folder` and `template-*` + - new mandatory `/userinfo` value from AAI at login time `sdSubmitProjects` + - user is redirected to `/noproject` if key is empty or missing + - new mandatory query parameter `projectId` in `GET /folders` + - new mandatory JSON key `projectId` in `POST /folders` and `POST /templates` + - UNDECIDED: new endpoint `GET /templates` to replace `GET /users/current` `{"templates":[...]}` + - UNDECIDED: new endpoint `GET /project/{projectId}` to replace `GET /users/current` `{"templates":[...]}` + - WARNING: breaking change that requires fresh database, because "project" is new information that did not exist before, and it can't be migrated to existing user-owned hierarchy ### Changed From bc71bd86757ad27db12018bfb6a6184c4743c425 Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Tue, 15 Feb 2022 13:45:51 +0200 Subject: [PATCH 213/336] make project check in auth more reliable --- metadata_backend/api/auth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata_backend/api/auth.py b/metadata_backend/api/auth.py index 4fc1f11d9..4a7c3de33 100644 --- a/metadata_backend/api/auth.py +++ b/metadata_backend/api/auth.py @@ -118,7 +118,7 @@ async def callback(self, req: Request) -> Response: raise web.HTTPBadRequest(reason="Invalid OIDC callback.") # If user has no project affiliations, they will be redirected to an instructions page - if "sdSubmitProjects" not in session["userinfo"]: + if not session["userinfo"].get("sdSubmitProjects", ""): LOG.error("user has no project affiliations") response = web.HTTPSeeOther(f"{self.redirect}/noproject") return response From 7c5612e107e97e239e8bc0682e7109217162b3ae Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Tue, 15 Feb 2022 13:49:23 +0200 Subject: [PATCH 214/336] update wordlist --- .github/config/.wordlist.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/config/.wordlist.txt b/.github/config/.wordlist.txt index 2ec2c9618..98f2884d4 100644 --- a/.github/config/.wordlist.txt +++ b/.github/config/.wordlist.txt @@ -389,6 +389,7 @@ nlmcatalog noindex nominallength nominalsdev +noproject northboundlatitude novaseq npm @@ -534,6 +535,7 @@ schemeuri scientificname sda sdev +sdSubmitProjects se secondaryid sectionname From 00cec8fb4b6042c8c339066651a6ec670b99b3cc Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Wed, 16 Feb 2022 11:22:00 +0200 Subject: [PATCH 215/336] update integration tests and code respectively, deprecate some features --- metadata_backend/api/handlers/template.py | 59 +++-- metadata_backend/api/handlers/user.py | 131 ++++------ metadata_backend/api/operators.py | 2 +- tests/integration/run_tests.py | 295 +++++++++++----------- tests/test_handlers.py | 146 +++++------ 5 files changed, 310 insertions(+), 323 deletions(-) diff --git a/metadata_backend/api/handlers/template.py b/metadata_backend/api/handlers/template.py index b623d756b..5eb528077 100644 --- a/metadata_backend/api/handlers/template.py +++ b/metadata_backend/api/handlers/template.py @@ -57,26 +57,9 @@ async def post_template(self, req: Request) -> Response: db_client = req.app["db_client"] content = await self._get_data(req) - # No schema validation, so must check that project is set - if "projectId" not in content: - reason = "projectId is a mandatory POST key" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - - # Check that project exists + # Operators project_op = ProjectOperator(db_client) - await project_op._check_project_exists(content["projectId"]) - - # Check that user is affiliated with project user_op = UserOperator(db_client) - current_user = get_session(req)["user_info"] - user = await user_op.read_user(current_user) - user_has_project = await user_op.check_user_has_project(content["projectId"], user["userId"]) - if not user_has_project: - reason = f"user {user['userId']} is not affiliated with project {content['projectId']}" - LOG.error(reason) - raise web.HTTPUnauthorized(reason=reason) - operator = Operator(db_client) if isinstance(content, list): @@ -86,6 +69,26 @@ async def post_template(self, req: Request) -> Response: reason = f"template key is missing from request body for element: {num}." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) + + # No schema validation, so must check that project is set + if "projectId" not in tmpl: + reason = "projectId is a mandatory POST key" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + # Check that project exists and user is affiliated with it + await project_op._check_project_exists(tmpl["projectId"]) + current_user = get_session(req)["user_info"] + user = await user_op.read_user(current_user) + user_has_project = await user_op.check_user_has_project(tmpl["projectId"], user["userId"]) + if not user_has_project: + reason = f"user {user['userId']} is not affiliated with project {tmpl['projectId']}" + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + + # Process template + # Move projectId to template structure, so that it is saved in mongo + tmpl["template"]["projectId"] = tmpl["projectId"] accession_id, _ = await operator.create_metadata_object(collection, tmpl["template"]) tmpl_list.append({"accessionId": accession_id}) @@ -95,7 +98,25 @@ async def post_template(self, req: Request) -> Response: reason = "template key is missing from request body." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - # Move projectId to template key, so that it is saved in mongo + + # No schema validation, so must check that project is set + if "projectId" not in content: + reason = "projectId is a mandatory POST key" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + # Check that project exists and user is affiliated with it + await project_op._check_project_exists(content["projectId"]) + current_user = get_session(req)["user_info"] + user = await user_op.read_user(current_user) + user_has_project = await user_op.check_user_has_project(content["projectId"], user["userId"]) + if not user_has_project: + reason = f"user {user['userId']} is not affiliated with project {content['projectId']}" + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + + # Process template + # Move projectId to template structure, so that it is saved in mongo content["template"]["projectId"] = content["projectId"] accession_id, _ = await operator.create_metadata_object(collection, content["template"]) diff --git a/metadata_backend/api/handlers/user.py b/metadata_backend/api/handlers/user.py index e77ce3d3d..bf62166a5 100644 --- a/metadata_backend/api/handlers/user.py +++ b/metadata_backend/api/handlers/user.py @@ -1,18 +1,16 @@ """Handle HTTP methods for server.""" import re -from math import ceil -from typing import Any, Dict, Tuple +from typing import Any import ujson from aiohttp import web from aiohttp.web import Request, Response -from multidict import CIMultiDict from ...conf.conf import aai_config from ...helpers.logger import LOG from .restapi import RESTAPIHandler from ..middlewares import decrypt_cookie, get_session -from ..operators import FolderOperator, Operator, UserOperator +from ..operators import UserOperator class UserAPIHandler(RESTAPIHandler): @@ -85,25 +83,14 @@ async def get_user(self, req: Request) -> Response: current_user = get_session(req)["user_info"] - item_type = req.query.get("items", "").lower() - if item_type: - # Return only list of templates or list of folder IDs owned by the user - result, link_headers = await self._get_user_items(req, current_user, item_type) - return web.Response( - body=ujson.dumps(result, escape_forward_slashes=False), - status=200, - headers=link_headers, - content_type="application/json", - ) - else: - # Return whole user object if templates or folders are not specified in query - db_client = req.app["db_client"] - operator = UserOperator(db_client) - user = await operator.read_user(current_user) - LOG.info(f"GET user with ID {user_id} was successful.") - return web.Response( - body=ujson.dumps(user, escape_forward_slashes=False), status=200, content_type="application/json" - ) + # Return whole user object if templates or folders are not specified in query + db_client = req.app["db_client"] + operator = UserOperator(db_client) + user = await operator.read_user(current_user) + LOG.info(f"GET user with ID {user_id} was successful.") + return web.Response( + body=ujson.dumps(user, escape_forward_slashes=False), status=200, content_type="application/json" + ) async def patch_user(self, req: Request) -> Response: """Update user object with a specific user ID. @@ -143,21 +130,8 @@ async def delete_user(self, req: Request) -> Response: raise web.HTTPUnauthorized(reason="Only current user deletion is allowed") db_client = req.app["db_client"] operator = UserOperator(db_client) - fold_ops = FolderOperator(db_client) - obj_ops = Operator(db_client) current_user = get_session(req)["user_info"] - user = await operator.read_user(current_user) - - for folder_id in user["folders"]: - _folder = await fold_ops.read_folder(folder_id) - if "published" in _folder and not _folder["published"]: - for obj in _folder["drafts"] + _folder["metadataObjects"]: - await obj_ops.delete_metadata_object(obj["schema"], obj["accessionId"]) - await fold_ops.delete_folder(folder_id) - - for tmpl in user["templates"]: - await obj_ops.delete_metadata_object(tmpl["schema"], tmpl["accessionId"]) await operator.delete_user(current_user) LOG.info(f"DELETE user with ID {current_user} was successful.") @@ -177,45 +151,46 @@ async def delete_user(self, req: Request) -> Response: LOG.debug("Logged out user ") raise response - async def _get_user_items(self, req: Request, user: Dict, item_type: str) -> Tuple[Dict, CIMultiDict[str]]: - """Get draft templates owned by the user with pagination values. - - :param req: GET request - :param user: User object - :param item_type: Name of the items ("templates" or "folders") - :raises: HTTPUnauthorized if not current user - :returns: Paginated list of user draft templates and link header - """ - # Check item_type parameter is not faulty - if item_type not in ["templates", "folders"]: - reason = f"{item_type} is a faulty item parameter. Should be either folders or templates" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - - page = self._get_page_param(req, "page", 1) - per_page = self._get_page_param(req, "per_page", 5) - - db_client = req.app["db_client"] - operator = UserOperator(db_client) - user_id = req.match_info["userId"] - - query = {"userId": user} - - items, total_items = await operator.filter_user(query, item_type, page, per_page) - LOG.info(f"GET user with ID {user_id} was successful.") - - result = { - "page": { - "page": page, - "size": per_page, - "totalPages": ceil(total_items / per_page), - "total" + item_type.title(): total_items, - }, - item_type: items, - } - - url = f"{req.scheme}://{req.host}{req.path}" - link_headers = await self._header_links(url, page, per_page, total_items) - LOG.debug(f"Pagination header links: {link_headers}") - LOG.info(f"Querying for user's {item_type} resulted in {total_items} {item_type}") - return result, link_headers + # DEPRECATED + # async def _get_user_items(self, req: Request, user: Dict, item_type: str) -> Tuple[Dict, CIMultiDict[str]]: + # """Get draft templates owned by the user with pagination values. + + # :param req: GET request + # :param user: User object + # :param item_type: Name of the items ("templates" or "folders") + # :raises: HTTPUnauthorized if not current user + # :returns: Paginated list of user draft templates and link header + # """ + # # Check item_type parameter is not faulty + # if item_type not in ["templates", "folders"]: + # reason = f"{item_type} is a faulty item parameter. Should be either folders or templates" + # LOG.error(reason) + # raise web.HTTPBadRequest(reason=reason) + + # page = self._get_page_param(req, "page", 1) + # per_page = self._get_page_param(req, "per_page", 5) + + # db_client = req.app["db_client"] + # operator = UserOperator(db_client) + # user_id = req.match_info["userId"] + + # query = {"userId": user} + + # items, total_items = await operator.filter_user(query, item_type, page, per_page) + # LOG.info(f"GET user with ID {user_id} was successful.") + + # result = { + # "page": { + # "page": page, + # "size": per_page, + # "totalPages": ceil(total_items / per_page), + # "total" + item_type.title(): total_items, + # }, + # item_type: items, + # } + + # url = f"{req.scheme}://{req.host}{req.path}" + # link_headers = await self._header_links(url, page, per_page, total_items) + # LOG.debug(f"Pagination header links: {link_headers}") + # LOG.info(f"Querying for user's {item_type} resulted in {total_items} {item_type}") + # return result, link_headers diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 556384d38..ca5b096f2 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -945,7 +945,7 @@ async def check_user_has_doc(self, req: web.Request, collection: str, user_id: s project_id = "" if collection.startswith("template"): object_operator = Operator(db_client) - project_id = await object_operator.get_object_project("template", accession_id) + project_id = await object_operator.get_object_project(collection, accession_id) elif collection == "folders": folder_operator = FolderOperator(db_client) project_id = await folder_operator.get_folder_project(accession_id) diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index 24d30fd1c..5548bab5f 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -93,6 +93,18 @@ async def login(sess, sub, given, family): LOG.debug("Doing mock user login") +async def get_user_data(sess): + """Get current logged in user's data model. + + :param sess: HTTP session in which request call is made + """ + async with sess.get(f"{base_url}/users/current") as resp: + LOG.debug("Get userdata") + ans = await resp.json() + assert resp.status == 200, f"HTTP Status code error {resp.status} {ans}" + return ans + + async def create_request_data(schema, filename): """Create request data from pairs of schemas and filenames. @@ -319,18 +331,26 @@ async def delete_draft(sess, schema, draft_id): assert resp.status == 204, f"HTTP Status code error, got {resp.status}" -async def post_template_json(sess, schema, filename): +async def post_template_json(sess, schema, filename, project_id): """Post one metadata object within session, returns accessionId. :param sess: HTTP session in which request call is made :param schema: name of the schema (folder) used for testing :param filename: name of the file used for testing. + :param project_id: id of the project the folder belongs to """ request_data = await create_request_json_data(schema, filename) + request_data = json.loads(request_data) + if type(request_data) is list: + for rd in request_data: + rd["projectId"] = project_id + else: + request_data["projectId"] = project_id + request_data = json.dumps(request_data) async with sess.post(f"{templates_url}/{schema}", data=request_data) as resp: LOG.debug(f"Adding new template object to {schema}, via JSON file {filename}") - assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() + assert resp.status == 201, f"HTTP Status code error, got {resp.status}" if isinstance(ans, list): return ans else: @@ -351,15 +371,19 @@ async def get_template(sess, schema, template_id): return json.dumps(ans) -async def patch_template(sess, schema, template_id, update_filename): +async def patch_template(sess, schema, template_id, update_filename, project_id): """Patch one metadata object within session, return accessionId. :param sess: HTTP session in which request call is made :param schema: name of the schema (folder) used for testing :param template_id: id of the draft :param update_filename: name of the file used to use for updating data. + :param project_id: id of the project the folder belongs to """ request_data = await create_request_json_data(schema, update_filename) + request_data = json.loads(request_data) + request_data["projectId"] = project_id + request_data = json.dumps(request_data) async with sess.patch(f"{templates_url}/{schema}/{template_id}", data=request_data) as resp: LOG.debug(f"Update draft object in {schema}") assert resp.status == 200, f"HTTP Status code error, got {resp.status}" @@ -798,7 +822,7 @@ async def test_getting_all_objects_from_schema_works(sess, folder_id): assert ans["page"]["page"] == 1 assert ans["page"]["size"] == 10 assert ans["page"]["totalPages"] == 2 - assert ans["page"]["totalObjects"] == 14 + assert ans["page"]["totalObjects"] == 16 assert len(ans["objects"]) == 10 # Test with custom pagination values @@ -807,8 +831,8 @@ async def test_getting_all_objects_from_schema_works(sess, folder_id): ans = await resp.json() assert ans["page"]["page"] == 2 assert ans["page"]["size"] == 3 - assert ans["page"]["totalPages"] == 5 - assert ans["page"]["totalObjects"] == 14 + assert ans["page"]["totalPages"] == 6 + assert ans["page"]["totalObjects"] == 16 assert len(ans["objects"]) == 3 # Test with wrong pagination values @@ -821,13 +845,14 @@ async def test_getting_all_objects_from_schema_works(sess, folder_id): await asyncio.gather(*[delete_object(sess, "study", accession_id) for accession_id, _ in files]) -async def test_crud_folders_works(sess): +async def test_crud_folders_works(sess, project_id: str): """Test folders REST api POST, GET, PATCH, PUBLISH and DELETE reqs. :param sess: HTTP session in which request call is made + :param project_id: id of the project the folder belongs to """ # Create new folder and check its creation succeeded - folder_data = {"name": "Mock Folder", "description": "Mock Base folder to folder ops"} + folder_data = {"name": "Mock Folder", "description": "Mock Base folder to folder ops", "projectId": project_id} folder_id = await post_folder(sess, folder_data) async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was created") @@ -918,13 +943,14 @@ async def test_crud_folders_works(sess): assert resp.status == 404, f"HTTP Status code error, got {resp.status}" -async def test_crud_folders_works_no_publish(sess): +async def test_crud_folders_works_no_publish(sess, project_id): """Test folders REST api POST, GET, PATCH, PUBLISH and DELETE reqs. :param sess: HTTP session in which request call is made + :param project_id: id of the project the folder belongs to """ # Create new folder and check its creation succeeded - folder_data = {"name": "Mock Unpublished folder", "description": "test umpublished folder"} + folder_data = {"name": "Mock Unpublished folder", "description": "test umpublished folder", "projectId": project_id} folder_id = await post_folder(sess, folder_data) async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was created") @@ -998,13 +1024,14 @@ async def test_crud_folders_works_no_publish(sess): assert expected_true, "folder still exists at user" -async def test_adding_doi_info_to_folder_works(sess): +async def test_adding_doi_info_to_folder_works(sess, project_id): """Test that proper DOI info can be added to folder and bad DOI info cannot be. :param sess: HTTP session in which request call is made + :param project_id: id of the project the folder belongs to """ # Create new folder and check its creation succeeded - folder_data = {"name": "DOI Folder", "description": "Mock Base folder for adding DOI info"} + folder_data = {"name": "DOI Folder", "description": "Mock Base folder for adding DOI info", "projectId": project_id} folder_id = await post_folder(sess, folder_data) async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was created") @@ -1054,13 +1081,14 @@ async def test_adding_doi_info_to_folder_works(sess): assert resp.status == 404, f"HTTP Status code error, got {resp.status}" -async def test_getting_paginated_folders(sess): +async def test_getting_paginated_folders(sess, project_id): """Check that /folders returns folders with correct paginations. :param sess: HTTP session in which request call is made + :param project_id: id of the project the folder belongs to """ # Test default values - async with sess.get(f"{folders_url}") as resp: + async with sess.get(f"{folders_url}?projectId={project_id}") as resp: # The folders received here are from previous # tests where the folders were not deleted assert resp.status == 200 @@ -1068,21 +1096,21 @@ async def test_getting_paginated_folders(sess): assert ans["page"]["page"] == 1 assert ans["page"]["size"] == 5 assert ans["page"]["totalPages"] == 2 - assert ans["page"]["totalFolders"] == 6 + assert ans["page"]["totalFolders"] == 7 assert len(ans["folders"]) == 5 # Test with custom pagination values - async with sess.get(f"{folders_url}?page=2&per_page=3") as resp: + async with sess.get(f"{folders_url}?page=2&per_page=3&projectId={project_id}") as resp: assert resp.status == 200 ans = await resp.json() assert ans["page"]["page"] == 2 assert ans["page"]["size"] == 3 - assert ans["page"]["totalPages"] == 2 - assert ans["page"]["totalFolders"] == 6 + assert ans["page"]["totalPages"] == 3 + assert ans["page"]["totalFolders"] == 7 assert len(ans["folders"]) == 3 # Test querying only published folders - async with sess.get(f"{folders_url}?published=true") as resp: + async with sess.get(f"{folders_url}?published=true&projectId={project_id}") as resp: assert resp.status == 200 ans = await resp.json() assert ans["page"]["page"] == 1 @@ -1092,51 +1120,52 @@ async def test_getting_paginated_folders(sess): assert len(ans["folders"]) == 1 # Test querying only draft folders - async with sess.get(f"{folders_url}?published=false") as resp: + async with sess.get(f"{folders_url}?published=false&projectId={project_id}") as resp: assert resp.status == 200 ans = await resp.json() assert ans["page"]["page"] == 1 assert ans["page"]["size"] == 5 - assert ans["page"]["totalPages"] == 1 - assert ans["page"]["totalFolders"] == 5 + assert ans["page"]["totalPages"] == 2 + assert ans["page"]["totalFolders"] == 6 assert len(ans["folders"]) == 5 # Test with wrong pagination values - async with sess.get(f"{folders_url}?page=-1") as resp: + async with sess.get(f"{folders_url}?page=-1&projectId={project_id}") as resp: assert resp.status == 400 - async with sess.get(f"{folders_url}?per_page=0") as resp: + async with sess.get(f"{folders_url}?per_page=0&projectId={project_id}") as resp: assert resp.status == 400 - async with sess.get(f"{folders_url}?published=asdf") as resp: + async with sess.get(f"{folders_url}?published=asdf&projectId={project_id}") as resp: assert resp.status == 400 -async def test_getting_folders_filtered_by_name(sess): +async def test_getting_folders_filtered_by_name(sess, project_id): """Check that /folders returns folders filtered by name. :param sess: HTTP session in which request call is made + :param project_id: id of the project the folder belongs to """ names = [" filter new ", "_filter_", "-filter-", "_extra-", "_2021special_"] folders = [] for name in names: - folder_data = {"name": f"Test{name}name", "description": "Test filtering name"} + folder_data = {"name": f"Test{name}name", "description": "Test filtering name", "projectId": project_id} folders.append(await post_folder(sess, folder_data)) - async with sess.get(f"{folders_url}?name=filter") as resp: + async with sess.get(f"{folders_url}?name=filter&projectId={project_id}") as resp: ans = await resp.json() assert resp.status == 200, f"HTTP Status code error {resp.status} {ans}" assert ans["page"]["totalFolders"] == 3, f'Shold be 3 returned {ans["page"]["totalFolders"]}' - async with sess.get(f"{folders_url}?name=extra") as resp: + async with sess.get(f"{folders_url}?name=extra&projectId={project_id}") as resp: ans = await resp.json() assert resp.status == 200, f"HTTP Status code error {resp.status} {ans}" assert ans["page"]["totalFolders"] == 1 - async with sess.get(f"{folders_url}?name=2021 special") as resp: + async with sess.get(f"{folders_url}?name=2021 special&projectId={project_id}") as resp: assert resp.status == 200 ans = await resp.json() assert ans["page"]["totalFolders"] == 0 - async with sess.get(f"{folders_url}?name=new extra") as resp: + async with sess.get(f"{folders_url}?name=new extra&projectId={project_id}") as resp: assert resp.status == 200 ans = await resp.json() assert ans["page"]["totalFolders"] == 2 @@ -1145,10 +1174,11 @@ async def test_getting_folders_filtered_by_name(sess): await delete_folder(sess, folder) -async def test_getting_folders_filtered_by_date_created(sess): +async def test_getting_folders_filtered_by_date_created(sess, project_id): """Check that /folders returns folders filtered by date created. :param sess: HTTP session in which request call is made + :param project_id: id of the project the folder belongs to """ async with sess.get(f"{users_url}/current") as resp: ans = await resp.json() @@ -1165,10 +1195,13 @@ async def test_getting_folders_filtered_by_date_created(sess): "name": f"Test date {stamp}", "description": "Test filtering date", "dateCreated": datetime.strptime(stamp, format).timestamp(), + "projectId": project_id, } folders.append(await create_folder(folder_data, user)) - async with sess.get(f"{folders_url}?date_created_start=2015-01-01&date_created_end=2015-12-31") as resp: + async with sess.get( + f"{folders_url}?date_created_start=2015-01-01&date_created_end=2015-12-31&projectId={project_id}" + ) as resp: ans = await resp.json() assert resp.status == 200, f"returned status {resp.status}, error {ans}" assert ans["page"]["totalFolders"] == 2, f'Shold be 2 returned {ans["page"]["totalFolders"]}' @@ -1181,10 +1214,13 @@ async def test_getting_folders_filtered_by_date_created(sess): "name": f"Test date {stamp}", "description": "Test filtering date", "dateCreated": datetime.strptime(stamp, format).timestamp(), + "projectId": project_id, } folders.append(await create_folder(folder_data, user)) - async with sess.get(f"{folders_url}?date_created_start=2013-02-01&date_created_end=2013-03-30") as resp: + async with sess.get( + f"{folders_url}?date_created_start=2013-02-01&date_created_end=2013-03-30&projectId={project_id}" + ) as resp: ans = await resp.json() assert resp.status == 200, f"returned status {resp.status}, error {ans}" assert ans["page"]["totalFolders"] == 2, f'Shold be 2 returned {ans["page"]["totalFolders"]}' @@ -1202,16 +1238,21 @@ async def test_getting_folders_filtered_by_date_created(sess): "name": f"Test date {stamp}", "description": "Test filtering date", "dateCreated": datetime.strptime(stamp, format).timestamp(), + "projectId": project_id, } folders.append(await create_folder(folder_data, user)) - async with sess.get(f"{folders_url}?date_created_start=2012-01-15&date_created_end=2012-01-15") as resp: + async with sess.get( + f"{folders_url}?date_created_start=2012-01-15&date_created_end=2012-01-15&projectId={project_id}" + ) as resp: ans = await resp.json() assert resp.status == 200, f"returned status {resp.status}, error {ans}" assert ans["page"]["totalFolders"] == 2, f'Shold be 2 returned {ans["page"]["totalFolders"]}' # Test parameters date_created_... and name together - async with sess.get(f"{folders_url}?name=2013&date_created_start=2012-01-01&date_created_end=2016-12-31") as resp: + async with sess.get( + f"{folders_url}?name=2013&date_created_start=2012-01-01&date_created_end=2016-12-31&projectId={project_id}" + ) as resp: ans = await resp.json() assert resp.status == 200, f"returned status {resp.status}, error {ans}" assert ans["page"]["totalFolders"] == 4, f'Shold be 4 returned {ans["page"]["totalFolders"]}' @@ -1220,142 +1261,76 @@ async def test_getting_folders_filtered_by_date_created(sess): await delete_folder(sess, folder) -async def test_getting_user_items(sess): - """Test querying user's templates or folders in the user object with GET user request. - - :param sess: HTTP session in which request call is made - """ - # Get real user ID - async with sess.get(f"{users_url}/{user_id}") as resp: - LOG.debug(f"Reading user {user_id}") - assert resp.status == 200, f"HTTP Status code error, got {resp.status}" - - # Add template to user - template_id = await post_template_json(sess, "study", "SRP000539_template.json") - - # Test querying for list of user draft templates - async with sess.get(f"{users_url}/{user_id}?items=templates") as resp: - LOG.debug(f"Reading user {user_id} templates") - assert resp.status == 200, f"HTTP Status code error, got {resp.status}" - ans = await resp.json() - assert ans["page"]["page"] == 1 - assert ans["page"]["size"] == 5 - assert ans["page"]["totalPages"] == 1 - assert ans["page"]["totalTemplates"] == 1 - assert len(ans["templates"]) == 1 - - async with sess.get(f"{users_url}/{user_id}?items=templates&per_page=3") as resp: - LOG.debug(f"Reading user {user_id} templates") - assert resp.status == 200, f"HTTP Status code error, got {resp.status}" - ans = await resp.json() - assert ans["page"]["page"] == 1 - assert ans["page"]["size"] == 3 - assert len(ans["templates"]) == 1 - - await delete_template(sess, "study", template_id) # Future tests will assume the templates key is empty - - # Test querying for the list of folder IDs - async with sess.get(f"{users_url}/{user_id}?items=folders") as resp: - LOG.debug(f"Reading user {user_id} folder list") - assert resp.status == 200, f"HTTP Status code error, got {resp.status}" - ans = await resp.json() - assert ans["page"]["page"] == 1 - assert ans["page"]["size"] == 5 - assert ans["page"]["totalPages"] == 2 - assert ans["page"]["totalFolders"] == 6 - assert len(ans["folders"]) == 5 - - # Test the same with a bad query param - async with sess.get(f"{users_url}/{user_id}?items=bad") as resp: - LOG.debug(f"Reading user {user_id} but with faulty item descriptor") - assert resp.status == 400, f"HTTP Status code error, got {resp.status}" - - -async def test_crud_users_works(sess): +async def test_crud_users_works(sess, project_id): """Test users REST api GET, PATCH and DELETE reqs. :param sess: HTTP session in which request call is made + :param project_id: id of the project the folder belongs to """ # Check user exists in database (requires an user object to be mocked) async with sess.get(f"{users_url}/{user_id}") as resp: LOG.debug(f"Reading user {user_id}") assert resp.status == 200, f"HTTP Status code error, got {resp.status}" - response = await resp.json() - real_user_id = response["userId"] # Add user to session and create a patch to add folder to user - folder_not_published = {"name": "Mock User Folder", "description": "Mock folder for testing users"} + folder_not_published = { + "name": "Mock User Folder", + "description": "Mock folder for testing users", + "projectId": project_id, + } folder_id = await post_folder(sess, folder_not_published) - async with sess.get(f"{users_url}/{user_id}") as resp: + async with sess.get(f"{folders_url}/{folder_id}?projectId={project_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was added") res = await resp.json() - assert res["userId"] == real_user_id, "user id does not match" - assert res["name"] == f"{test_user_given} {test_user_family}", "user name mismatch" - assert res["templates"] == [], "user templates content mismatch" - assert folder_id in res["folders"], "folder added missing mismatch" + assert res["name"] == folder_not_published["name"] + assert res["projectId"] == folder_not_published["projectId"] - folder_published = {"name": "Another test Folder", "description": "Test published folder does not get deleted"} + folder_published = { + "name": "Another test Folder", + "description": "Test published folder does not get deleted", + "projectId": project_id, + } publish_folder_id = await post_folder(sess, folder_published) await publish_folder(sess, publish_folder_id) - async with sess.get(f"{folders_url}/{publish_folder_id}") as resp: + async with sess.get(f"{folders_url}/{publish_folder_id}?projectId={project_id}") as resp: LOG.debug(f"Checking that folder {publish_folder_id} was published") res = await resp.json() assert res["published"] is True, "folder is not published, expected True" - folder_not_published = {"name": "Delete Folder", "description": "Mock folder to delete while testing users"} + folder_not_published = { + "name": "Delete Folder", + "description": "Mock folder to delete while testing users", + "projectId": project_id, + } delete_folder_id = await post_folder(sess, folder_not_published) - patch_delete_folder = [{"op": "add", "path": "/folders/-", "value": [delete_folder_id]}] - - await patch_user(sess, user_id, real_user_id, patch_delete_folder) - async with sess.get(f"{users_url}/{user_id}") as resp: + async with sess.get(f"{folders_url}/{delete_folder_id}?projectId={project_id}") as resp: LOG.debug(f"Checking that folder {delete_folder_id} was added") res = await resp.json() - assert delete_folder_id in res["folders"], "deleted folder added does not exists" + assert res["name"] == folder_not_published["name"] + assert res["projectId"] == folder_not_published["projectId"] await delete_folder(sess, delete_folder_id) - async with sess.get(f"{users_url}/{user_id}") as resp: + async with sess.get(f"{folders_url}/{delete_folder_id}?projectId={project_id}") as resp: LOG.debug(f"Checking that folder {delete_folder_id} was deleted") - res = await resp.json() - assert delete_folder_id not in res["folders"], "delete folder still exists at user" + assert resp.status == 404 - template_id = await post_template_json(sess, "study", "SRP000539_template.json") - await patch_template(sess, "study", template_id, "patch.json") - async with sess.get(f"{users_url}/{user_id}") as resp: + template_id = await post_template_json(sess, "study", "SRP000539_template.json", project_id) + await patch_template(sess, "study", template_id, "patch.json", project_id) + async with sess.get(f"{templates_url}/study/{template_id}") as resp: LOG.debug(f"Checking that template: {template_id} was added") res = await resp.json() - assert res["templates"][0]["accessionId"] == template_id, "added template does not exists" - assert "tags" not in res["templates"][0] - - patch_change_tags_object = [ - { - "op": "add", - "path": "/templates/0/tags", - "value": {"displayTitle": "Test"}, - } - ] - await patch_user(sess, user_id, real_user_id, patch_change_tags_object) - - async with sess.get(f"{users_url}/{user_id}") as resp: - LOG.debug(f"Checking that template: {template_id} was added") - res = await resp.json() - assert res["templates"][0]["accessionId"] == template_id, "added template does not exists" - assert res["templates"][0]["tags"]["displayTitle"] == "Test" + assert res["accessionId"] == template_id + assert res["projectId"] == project_id + assert res["identifiers"]["primaryId"] == "SRP000539" await delete_template(sess, "study", template_id) + async with sess.get(f"{templates_url}/study/{template_id}") as resp: + LOG.debug(f"Checking that template {template_id} was deleted") + assert resp.status == 404 - async with sess.get(f"{users_url}/{user_id}") as resp: - LOG.debug(f"Checking that template {template_id} was added") - res = await resp.json() - assert len(res["templates"]) == 0, "template was not deleted from users" - - template_ids = await post_template_json(sess, "study", "SRP000539_list.json") + template_ids = await post_template_json(sess, "study", "SRP000539_list.json", project_id) assert len(template_ids) == 2, "templates could not be added as batch" - async with sess.get(f"{users_url}/{user_id}") as resp: - LOG.debug(f"Checking that template {template_id} was added") - res = await resp.json() - assert res["templates"][1]["tags"]["submissionType"] == "Form" - # Delete user await delete_user(sess, user_id) # 401 means API is innacessible thus session ended @@ -1365,29 +1340,32 @@ async def test_crud_users_works(sess): assert resp.status == 401, f"HTTP Status code error, got {resp.status}" -async def test_get_folders(sess, folder_id: str): +async def test_get_folders(sess, folder_id: str, project_id: str): """Test folders REST api GET . :param sess: HTTP session in which request call is made :param folder_id: id of the folder used to group submission objects + :param project_id: id of the project the folder belongs to """ - async with sess.get(f"{folders_url}") as resp: + async with sess.get(f"{folders_url}?projectId={project_id}") as resp: LOG.debug(f"Reading folder {folder_id}") assert resp.status == 200, f"HTTP Status code error, got {resp.status}" response = await resp.json() + LOG.error(response) assert len(response["folders"]) == 1 assert response["page"] == {"page": 1, "size": 5, "totalPages": 1, "totalFolders": 1} assert response["folders"][0]["folderId"] == folder_id -async def test_get_folders_objects(sess, folder_id: str): +async def test_get_folders_objects(sess, folder_id: str, project_id: str): """Test folders REST api GET with objects. :param sess: HTTP session in which request call is made :param folder_id: id of the folder used to group submission objects + :param project_id: id of the project the folder belongs to """ accession_id = await post_object_json(sess, "study", folder_id, "SRP000539.json") - async with sess.get(f"{folders_url}") as resp: + async with sess.get(f"{folders_url}?projectId={project_id}") as resp: LOG.debug(f"Reading folder {folder_id}") assert resp.status == 200, f"HTTP Status code error, got {resp.status}" response = await resp.json() @@ -1404,7 +1382,7 @@ async def test_get_folders_objects(sess, folder_id: str): } ] await patch_folder(sess, folder_id, patch_change_tags_object) - async with sess.get(f"{folders_url}") as resp: + async with sess.get(f"{folders_url}?projectId={project_id}") as resp: LOG.debug(f"Reading folder {folder_id}") assert resp.status == 200, f"HTTP Status code error, got {resp.status}" response = await resp.json() @@ -1516,6 +1494,8 @@ async def main(): LOG.debug("=== Login other mock user ===") await login(sess, other_test_user, other_test_user_given, other_test_user_family) + user_data = await get_user_data(sess) + project_id = user_data["projects"][0]["projectId"] # Test add, modify, validate and release action with submissions # added to validate that objects belong to a specific user @@ -1523,21 +1503,25 @@ async def main(): submission_folder = { "name": "submission test 1", "description": "submission test folder 1", + "projectId": project_id, } submission_folder_id = await post_folder(sess, submission_folder) - await test_get_folders(sess, submission_folder_id) - await test_get_folders_objects(sess, submission_folder_id) + await test_get_folders(sess, submission_folder_id, project_id) + await test_get_folders_objects(sess, submission_folder_id, project_id) await test_submissions_work(sess, submission_folder_id) async with aiohttp.ClientSession() as sess: LOG.debug("=== Login mock user ===") await login(sess, test_user, test_user_given, test_user_family) + user_data = await get_user_data(sess) + project_id = user_data["projects"][0]["projectId"] # Test adding and getting objects LOG.debug("=== Testing basic CRUD operations ===") basic_folder = { "name": "basic test", "description": "basic test folder", + "projectId": project_id, } basic_folder_id = await post_folder(sess, basic_folder) @@ -1550,6 +1534,7 @@ async def main(): put_object_folder = { "name": "test put object", "description": "put object test folder", + "projectId": project_id, } put_object_folder = await post_folder(sess, put_object_folder) @@ -1560,6 +1545,7 @@ async def main(): draft_folder = { "name": "basic test draft", "description": "basic test draft folder", + "projectId": project_id, } draft_folder_id = await post_folder(sess, draft_folder) await asyncio.gather( @@ -1579,6 +1565,7 @@ async def main(): query_folder = { "name": "basic test query", "description": "basic test query folder", + "projectId": project_id, } query_folder_id = await post_folder(sess, query_folder) await test_querying_works(sess, query_folder_id) @@ -1588,32 +1575,34 @@ async def main(): pagination_folder = { "name": "basic test pagination", "description": "basic test pagination folder", + "projectId": project_id, } pagination_folder_id = await post_folder(sess, pagination_folder) await test_getting_all_objects_from_schema_works(sess, pagination_folder_id) # Test creating, reading, updating and deleting folders LOG.debug("=== Testing basic CRUD folder operations ===") - await test_crud_folders_works(sess) - await test_crud_folders_works_no_publish(sess) - await test_adding_doi_info_to_folder_works(sess) + await test_crud_folders_works(sess, project_id) + await test_crud_folders_works_no_publish(sess, project_id) + await test_adding_doi_info_to_folder_works(sess, project_id) # Test getting a list of folders and draft templates owned by the user LOG.debug("=== Testing getting folders, draft folders and draft templates with pagination ===") - await test_getting_paginated_folders(sess) - await test_getting_user_items(sess) + await test_getting_paginated_folders(sess, project_id) + # await test_getting_user_items(sess, project_id) # DEPRECATED LOG.debug("=== Testing getting folders filtered with name and date created ===") - await test_getting_folders_filtered_by_name(sess) + await test_getting_folders_filtered_by_name(sess, project_id) # too much of a hassle to make test work with tls db connection in github # must be improven in next integration test iteration if not TLS: - await test_getting_folders_filtered_by_date_created(sess) + await test_getting_folders_filtered_by_date_created(sess, project_id) # Test add, modify, validate and release action with submissions LOG.debug("=== Testing actions within submissions ===") submission_folder = { "name": "submission test", "description": "submission test folder", + "projectId": project_id, } submission_folder_id = await post_folder(sess, submission_folder) await test_submissions_work(sess, submission_folder_id) @@ -1625,7 +1614,7 @@ async def main(): # Test reading, updating and deleting users # this needs to be done last as it deletes users LOG.debug("=== Testing basic CRUD user operations ===") - await test_crud_users_works(sess) + await test_crud_users_works(sess, project_id) # Remove the remaining user in the test database async with aiohttp.ClientSession() as sess: diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 9c167cdc6..b82d9daee 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -661,20 +661,22 @@ async def setUpAsync(self): self.patch_useroperator = patch(class_useroperator, **self.useroperator_config, spec=True) self.MockedUserOperator = self.patch_useroperator.start() - class_folderoperator = "metadata_backend.api.handlers.user.FolderOperator" - self.patch_folderoperator = patch(class_folderoperator, **self.folderoperator_config, spec=True) - self.MockedFolderOperator = self.patch_folderoperator.start() + # DEPRECATED + # class_folderoperator = "metadata_backend.api.handlers.user.FolderOperator" + # self.patch_folderoperator = patch(class_folderoperator, **self.folderoperator_config, spec=True) + # self.MockedFolderOperator = self.patch_folderoperator.start() - class_operator = "metadata_backend.api.handlers.user.Operator" - self.patch_operator = patch(class_operator, **self.operator_config, spec=True) - self.MockedOperator = self.patch_operator.start() + # class_operator = "metadata_backend.api.handlers.user.Operator" + # self.patch_operator = patch(class_operator, **self.operator_config, spec=True) + # self.MockedOperator = self.patch_operator.start() async def tearDownAsync(self): """Cleanup mocked stuff.""" await super().tearDownAsync() self.patch_useroperator.stop() - self.patch_folderoperator.stop() - self.patch_operator.stop() + # DEPRECATED + # self.patch_folderoperator.stop() + # self.patch_operator.stop() async def test_get_user_works(self): """Test user object is returned when correct user id is given.""" @@ -684,76 +686,76 @@ async def test_get_user_works(self): json_resp = await response.json() self.assertEqual(self.test_user, json_resp) - async def test_get_user_drafts_with_no_drafts(self): - """Test getting user drafts when user has no drafts.""" - response = await self.client.get("/users/current?items=templates") - self.assertEqual(response.status, 200) - self.MockedUserOperator().filter_user.assert_called_once() - json_resp = await response.json() - result = { - "page": { - "page": 1, - "size": 5, - "totalPages": 0, - "totalTemplates": 0, - }, - "templates": [], - } - self.assertEqual(json_resp, result) - - async def test_get_user_templates_with_1_template(self): - """Test getting user templates when user has 1 draft.""" - user = self.test_user - user["templates"].append(self.metadata_json) - self.MockedUserOperator().filter_user.return_value = (user["templates"], 1) - response = await self.client.get("/users/current?items=templates") - self.assertEqual(response.status, 200) - self.MockedUserOperator().filter_user.assert_called_once() - json_resp = await response.json() - result = { - "page": { - "page": 1, - "size": 5, - "totalPages": 1, - "totalTemplates": 1, - }, - "templates": [self.metadata_json], - } - self.assertEqual(json_resp, result) - - async def test_get_user_folder_list(self): - """Test get user with folders url returns a folder ID.""" - self.MockedUserOperator().filter_user.return_value = (self.test_user["folders"], 1) - response = await self.client.get("/users/current?items=folders") - self.assertEqual(response.status, 200) - self.MockedUserOperator().filter_user.assert_called_once() - json_resp = await response.json() - result = { - "page": { - "page": 1, - "size": 5, - "totalPages": 1, - "totalFolders": 1, - }, - "folders": ["FOL12345678"], - } - self.assertEqual(json_resp, result) - - async def test_get_user_items_with_bad_param(self): - """Test that error is raised if items parameter in query is not templates or folders.""" - response = await self.client.get("/users/current?items=wrong_thing") - self.assertEqual(response.status, 400) - json_resp = await response.json() - self.assertEqual( - json_resp["detail"], "wrong_thing is a faulty item parameter. Should be either folders or templates" - ) + # DEPRECATED + # async def test_get_user_drafts_with_no_drafts(self): + # """Test getting user drafts when user has no drafts.""" + # response = await self.client.get("/users/current?items=templates") + # self.assertEqual(response.status, 200) + # self.MockedUserOperator().filter_user.assert_called_once() + # json_resp = await response.json() + # result = { + # "page": { + # "page": 1, + # "size": 5, + # "totalPages": 0, + # "totalTemplates": 0, + # }, + # "templates": [], + # } + # self.assertEqual(json_resp, result) + + # async def test_get_user_templates_with_1_template(self): + # """Test getting user templates when user has 1 draft.""" + # user = self.test_user + # user["templates"].append(self.metadata_json) + # self.MockedUserOperator().filter_user.return_value = (user["templates"], 1) + # response = await self.client.get("/users/current?items=templates") + # self.assertEqual(response.status, 200) + # self.MockedUserOperator().filter_user.assert_called_once() + # json_resp = await response.json() + # result = { + # "page": { + # "page": 1, + # "size": 5, + # "totalPages": 1, + # "totalTemplates": 1, + # }, + # "templates": [self.metadata_json], + # } + # self.assertEqual(json_resp, result) + + # async def test_get_user_folder_list(self): + # """Test get user with folders url returns a folder ID.""" + # self.MockedUserOperator().filter_user.return_value = (self.test_user["folders"], 1) + # response = await self.client.get("/users/current?items=folders") + # self.assertEqual(response.status, 200) + # self.MockedUserOperator().filter_user.assert_called_once() + # json_resp = await response.json() + # result = { + # "page": { + # "page": 1, + # "size": 5, + # "totalPages": 1, + # "totalFolders": 1, + # }, + # "folders": ["FOL12345678"], + # } + # self.assertEqual(json_resp, result) + + # async def test_get_user_items_with_bad_param(self): + # """Test that error is raised if items parameter in query is not templates or folders.""" + # response = await self.client.get("/users/current?items=wrong_thing") + # self.assertEqual(response.status, 400) + # json_resp = await response.json() + # self.assertEqual( + # json_resp["detail"], "wrong_thing is a faulty item parameter. Should be either folders or templates" + # ) async def test_user_deletion_is_called(self): """Test that user object would be deleted.""" self.MockedUserOperator().read_user.return_value = self.test_user self.MockedUserOperator().delete_user.return_value = None await self.client.delete("/users/current") - self.MockedUserOperator().read_user.assert_called_once() self.MockedUserOperator().delete_user.assert_called_once() async def test_update_user_fails_with_wrong_key(self): From 99e03a037b885ada76ee957fc57d5dae0c7aeea1 Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Wed, 16 Feb 2022 11:36:25 +0200 Subject: [PATCH 216/336] remove some deprecated code, add deprecation flags to uncertain bits of code --- CHANGELOG.md | 1 + metadata_backend/api/handlers/object.py | 2 +- metadata_backend/api/handlers/restapi.py | 15 +---- metadata_backend/api/handlers/user.py | 48 +-------------- metadata_backend/api/operators.py | 57 ------------------ tests/integration/run_tests.py | 1 - tests/test_handlers.py | 77 ------------------------ tests/test_operators.py | 50 --------------- 8 files changed, 5 insertions(+), 246 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f6954a2fb..d55ea96f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - add default `gender` as `unknown` - Project ownership #346 --DRAFT - deprecated `folders` and `templates` keys from `GET /users/current` + - as a side effect, deprecated `items` query param from the same endpoint - added new collection `project` - added new key `projects` to `user` - added new key `projectId` to `folder` and `template-*` diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 80a3c0ba2..18bb525d1 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -33,7 +33,7 @@ async def _handle_query(self, req: Request) -> Response: per_page = self._get_page_param(req, "per_page", 10) db_client = req.app["db_client"] - filter_list = await self._handle_user_objects_collection(req, collection) + filter_list = await self._handle_user_objects_collection(req, collection) # DEPRECATED, what to do? data, page_num, page_size, total_objects = await Operator(db_client).query_metadata_database( collection, req.query, page, per_page, filter_list ) diff --git a/metadata_backend/api/handlers/restapi.py b/metadata_backend/api/handlers/restapi.py index 71e3a48bc..33777b8a9 100644 --- a/metadata_backend/api/handlers/restapi.py +++ b/metadata_backend/api/handlers/restapi.py @@ -124,6 +124,7 @@ async def _get_collection_objects( yield result + # DEPRECATED, what to do? async def _handle_user_objects_collection(self, req: Request, collection: str) -> List: """Retrieve list of objects accession ids belonging to user in collection. @@ -145,20 +146,6 @@ async def _handle_user_objects_collection(self, req: Request, collection: str) - return dt - async def _filter_by_user(self, req: Request, collection: str, seq: List) -> AsyncGenerator: - """For a list of objects check if these are owned by a user. - - This can be called using a partial from functools. - - :param req: HTTP request - :param collection: collection or schema of document - :param seq: list of folders - :returns: AsyncGenerator - """ - for el in seq: - if await self._handle_check_ownership(req, collection, el["accessionId"]): - yield el - async def _get_data(self, req: Request) -> Dict: """Get the data content from a request. diff --git a/metadata_backend/api/handlers/user.py b/metadata_backend/api/handlers/user.py index bf62166a5..b706c7c94 100644 --- a/metadata_backend/api/handlers/user.py +++ b/metadata_backend/api/handlers/user.py @@ -48,12 +48,12 @@ def _check_patch_user(self, patch_ops: Any) -> None: reason = f"{op['op']} on {op['path']} is not allowed." LOG.error(reason) raise web.HTTPUnauthorized(reason=reason) - if op["path"] == "/folders/-": + if op["path"] == "/folders/-": # DEPRECATED, what to do? if not (isinstance(op["value"], str) or isinstance(op["value"], list)): reason = "We only accept string folder IDs." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - if op["path"] == "/templates/-": + if op["path"] == "/templates/-": # DEPRECATED, what to do? _ops = op["value"] if isinstance(op["value"], list) else [op["value"]] for item in _ops: if not all(key in item.keys() for key in _required_values): @@ -150,47 +150,3 @@ async def delete_user(self, req: Request) -> Response: ) LOG.debug("Logged out user ") raise response - - # DEPRECATED - # async def _get_user_items(self, req: Request, user: Dict, item_type: str) -> Tuple[Dict, CIMultiDict[str]]: - # """Get draft templates owned by the user with pagination values. - - # :param req: GET request - # :param user: User object - # :param item_type: Name of the items ("templates" or "folders") - # :raises: HTTPUnauthorized if not current user - # :returns: Paginated list of user draft templates and link header - # """ - # # Check item_type parameter is not faulty - # if item_type not in ["templates", "folders"]: - # reason = f"{item_type} is a faulty item parameter. Should be either folders or templates" - # LOG.error(reason) - # raise web.HTTPBadRequest(reason=reason) - - # page = self._get_page_param(req, "page", 1) - # per_page = self._get_page_param(req, "per_page", 5) - - # db_client = req.app["db_client"] - # operator = UserOperator(db_client) - # user_id = req.match_info["userId"] - - # query = {"userId": user} - - # items, total_items = await operator.filter_user(query, item_type, page, per_page) - # LOG.info(f"GET user with ID {user_id} was successful.") - - # result = { - # "page": { - # "page": page, - # "size": per_page, - # "totalPages": ceil(total_items / per_page), - # "total" + item_type.title(): total_items, - # }, - # item_type: items, - # } - - # url = f"{req.scheme}://{req.host}{req.path}" - # link_headers = await self._header_links(url, page, per_page, total_items) - # LOG.debug(f"Pagination header links: {link_headers}") - # LOG.info(f"Querying for user's {item_type} resulted in {total_items} {item_type}") - # return result, link_headers diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index ca5b096f2..573b3e28b 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -1096,63 +1096,6 @@ async def update_user(self, user_id: str, patch: List) -> str: LOG.info(f"Updating user with id {user_id} to database succeeded.") return user_id - # DEPRECATED - # async def assign_objects(self, user_id: str, collection: str, object_ids: List) -> None: - # """Assing object to user. - - # An object can be folder(s) or templates(s). - - # :param user_id: ID of user to update - # :param collection: collection where to remove the id from - # :param object_ids: ID or list of IDs of folder(s) to assign - # :raises: HTTPBadRequest if assigning templates/folders to user was not successful - # returns: None - # """ - # try: - # await self._check_user_exists(user_id) - # assign_success = await self.db_service.append( - # "user", user_id, {collection: {"$each": object_ids, "$position": 0}} - # ) - # except (ConnectionFailure, OperationFailure) as error: - # reason = f"Error happened while getting user: {error}" - # LOG.error(reason) - # raise web.HTTPBadRequest(reason=reason) - - # if not assign_success: - # reason = "Assigning objects to user failed." - # LOG.error(reason) - # raise web.HTTPBadRequest(reason=reason) - - # LOG.info(f"Assigning {object_ids} from {user_id} succeeded.") - - # DEPRECATED - # async def remove_objects(self, user_id: str, collection: str, object_ids: List) -> None: - # """Remove object from user. - - # An object can be folder(s) or template(s). - - # :param user_id: ID of user to update - # :param collection: collection where to remove the id from - # :param object_ids: ID or list of IDs of folder(s) to remove - # :raises: HTTPBadRequest if db connection fails - # returns: None - # """ - # remove_content: Dict - # try: - # await self._check_user_exists(user_id) - # for obj in object_ids: - # if collection == "templates": - # remove_content = {"templates": {"accessionId": obj}} - # else: - # remove_content = {"folders": obj} - # await self.db_service.remove("user", user_id, remove_content) - # except (ConnectionFailure, OperationFailure) as error: - # reason = f"Error happened while removing objects from user: {error}" - # LOG.error(reason) - # raise web.HTTPBadRequest(reason=reason) - - # LOG.info(f"Removing {object_ids} from {user_id} succeeded.") - async def delete_user(self, user_id: str) -> str: """Delete user object from database. diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index 5548bab5f..cf3d040c7 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -1589,7 +1589,6 @@ async def main(): # Test getting a list of folders and draft templates owned by the user LOG.debug("=== Testing getting folders, draft folders and draft templates with pagination ===") await test_getting_paginated_folders(sess, project_id) - # await test_getting_user_items(sess, project_id) # DEPRECATED LOG.debug("=== Testing getting folders filtered with name and date created ===") await test_getting_folders_filtered_by_name(sess, project_id) # too much of a hassle to make test work with tls db connection in github diff --git a/tests/test_handlers.py b/tests/test_handlers.py index b82d9daee..f8b65ea46 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -661,22 +661,10 @@ async def setUpAsync(self): self.patch_useroperator = patch(class_useroperator, **self.useroperator_config, spec=True) self.MockedUserOperator = self.patch_useroperator.start() - # DEPRECATED - # class_folderoperator = "metadata_backend.api.handlers.user.FolderOperator" - # self.patch_folderoperator = patch(class_folderoperator, **self.folderoperator_config, spec=True) - # self.MockedFolderOperator = self.patch_folderoperator.start() - - # class_operator = "metadata_backend.api.handlers.user.Operator" - # self.patch_operator = patch(class_operator, **self.operator_config, spec=True) - # self.MockedOperator = self.patch_operator.start() - async def tearDownAsync(self): """Cleanup mocked stuff.""" await super().tearDownAsync() self.patch_useroperator.stop() - # DEPRECATED - # self.patch_folderoperator.stop() - # self.patch_operator.stop() async def test_get_user_works(self): """Test user object is returned when correct user id is given.""" @@ -686,71 +674,6 @@ async def test_get_user_works(self): json_resp = await response.json() self.assertEqual(self.test_user, json_resp) - # DEPRECATED - # async def test_get_user_drafts_with_no_drafts(self): - # """Test getting user drafts when user has no drafts.""" - # response = await self.client.get("/users/current?items=templates") - # self.assertEqual(response.status, 200) - # self.MockedUserOperator().filter_user.assert_called_once() - # json_resp = await response.json() - # result = { - # "page": { - # "page": 1, - # "size": 5, - # "totalPages": 0, - # "totalTemplates": 0, - # }, - # "templates": [], - # } - # self.assertEqual(json_resp, result) - - # async def test_get_user_templates_with_1_template(self): - # """Test getting user templates when user has 1 draft.""" - # user = self.test_user - # user["templates"].append(self.metadata_json) - # self.MockedUserOperator().filter_user.return_value = (user["templates"], 1) - # response = await self.client.get("/users/current?items=templates") - # self.assertEqual(response.status, 200) - # self.MockedUserOperator().filter_user.assert_called_once() - # json_resp = await response.json() - # result = { - # "page": { - # "page": 1, - # "size": 5, - # "totalPages": 1, - # "totalTemplates": 1, - # }, - # "templates": [self.metadata_json], - # } - # self.assertEqual(json_resp, result) - - # async def test_get_user_folder_list(self): - # """Test get user with folders url returns a folder ID.""" - # self.MockedUserOperator().filter_user.return_value = (self.test_user["folders"], 1) - # response = await self.client.get("/users/current?items=folders") - # self.assertEqual(response.status, 200) - # self.MockedUserOperator().filter_user.assert_called_once() - # json_resp = await response.json() - # result = { - # "page": { - # "page": 1, - # "size": 5, - # "totalPages": 1, - # "totalFolders": 1, - # }, - # "folders": ["FOL12345678"], - # } - # self.assertEqual(json_resp, result) - - # async def test_get_user_items_with_bad_param(self): - # """Test that error is raised if items parameter in query is not templates or folders.""" - # response = await self.client.get("/users/current?items=wrong_thing") - # self.assertEqual(response.status, 400) - # json_resp = await response.json() - # self.assertEqual( - # json_resp["detail"], "wrong_thing is a faulty item parameter. Should be either folders or templates" - # ) - async def test_user_deletion_is_called(self): """Test that user object would be deleted.""" self.MockedUserOperator().read_user.return_value = self.test_user diff --git a/tests/test_operators.py b/tests/test_operators.py index 95adb8bf3..1f2bb0192 100644 --- a/tests/test_operators.py +++ b/tests/test_operators.py @@ -1079,56 +1079,6 @@ async def test_deleting_user_fails(self): with self.assertRaises(HTTPBadRequest): await operator.delete_user(self.user_id) - # DEPRECATED - # async def test_user_objects_remove_passes(self): - # """Test remove objects method for users works.""" - # operator = UserOperator(self.client) - # operator.db_service.exists.return_value = True - # operator.db_service.remove.return_value = self.test_user - # await operator.remove_objects(self.user_generated_id, "study", ["id"]) - # operator.db_service.exists.assert_called_once() - # operator.db_service.remove.assert_called_once() - # self.assertEqual(len(operator.db_service.remove.mock_calls), 1) - - # DEPRECATED - # async def test_user_objects_remove_fails(self): - # """Test remove objects method for users fails.""" - # operator = UserOperator(self.client) - # operator.db_service.exists.return_value = True - # operator.db_service.remove.side_effect = ConnectionFailure - # with self.assertRaises(HTTPBadRequest): - # await operator.remove_objects(self.user_generated_id, "study", ["id"]) - - # DEPRECATED - # async def test_user_objects_append_passes(self): - # """Test append objects method for users works.""" - # operator = UserOperator(self.client) - # operator.db_service.exists.return_value = True - # operator.db_service.append.return_value = self.test_user - # await operator.assign_objects(self.user_generated_id, "study", []) - # operator.db_service.exists.assert_called_once() - # operator.db_service.append.assert_called_once() - # self.assertEqual(len(operator.db_service.append.mock_calls), 1) - - # DEPRECATED - # async def test_user_objects_append_on_result_fails(self): - # """Test append objects method for users fails on db response validation.""" - # operator = UserOperator(self.client) - # operator.db_service.exists.return_value = True - # operator.db_service.append.return_value = False - # with self.assertRaises(HTTPBadRequest): - # await operator.assign_objects(self.user_generated_id, "study", []) - # operator.db_service.exists.assert_called_once() - # operator.db_service.append.assert_called_once() - - # DEPRECATED - # async def test_user_objects_assing_fails(self): - # """Test append objects method for users fails.""" - # operator = UserOperator(self.client) - # operator.db_service.exists.side_effect = ConnectionFailure - # with self.assertRaises(HTTPBadRequest): - # await operator.assign_objects(self.user_generated_id, "study", []) - async def test_check_user_has_project_passes(self): """Test check user has project and doesn't raise an exception.""" operator = UserOperator(self.client) From 6830c2e60d50f97acea48c33b6f2c010b9676c7b Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Wed, 16 Feb 2022 11:54:54 +0200 Subject: [PATCH 217/336] fix typo and update part of api spec --- CHANGELOG.md | 2 +- docs/specification.yml | 49 ++++++++++++++++++------------------------ 2 files changed, 22 insertions(+), 29 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d55ea96f7..e7c9cc433 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,7 +37,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - add default `gender` as `unknown` - Project ownership #346 --DRAFT - deprecated `folders` and `templates` keys from `GET /users/current` - - as a side effect, deprecated `items` query param from the same endpoint + - as a side effect, deprecated `items` query parameter from the same endpoint - added new collection `project` - added new key `projects` to `user` - added new key `projectId` to `folder` and `template-*` diff --git a/docs/specification.yml b/docs/specification.yml index 8ddd4a6fe..4557106ab 100644 --- a/docs/specification.yml +++ b/docs/specification.yml @@ -890,6 +890,12 @@ paths: - Query summary: List of folders available for the user. parameters: + - in: query + name: projectId + schema: + type: string + description: ID of the project the folder belongs to + required: true - in: query name: page schema: @@ -959,11 +965,14 @@ paths: required: - name - description + - projectId properties: name: type: string description: type: string + projectId: + type: string responses: 201: description: OK @@ -1158,11 +1167,6 @@ paths: schema: type: string description: Results per page - - in: query - name: items - schema: - type: string - description: Item type name responses: 200: description: OK @@ -1443,6 +1447,7 @@ components: type: object required: - folderId + - projectId - name - description - published @@ -1455,6 +1460,9 @@ components: folderId: type: string description: Folder id + projectId: + type: string + description: Project ID this folder belongs to name: type: string description: Folder name @@ -1622,8 +1630,7 @@ components: required: - userId - name - - drafts - - folders + - projects additionalProperties: false properties: userId: @@ -1632,35 +1639,21 @@ components: name: type: string description: User's Name - drafts: + projects: type: array items: type: object required: - - accessionId - - schema + - projectId + - projectNumber additionalProperties: false properties: - accessionId: + projectId: type: string - description: Accession id generated to identify an object - schema: + description: Internal accession ID for project + projectNumber: type: string - description: type of schema this Accession ID relates to and was added in submit - tags: - type: object - description: Different tags to describe the object. - additionalProperties: true - properties: - submissionType: - type: string - description: Type of submission - enum: ["XML", "Form"] - folders: - type: array - items: - type: string - description: Folder Id + description: Human friendly project number received from AAI UserUpdated: type: object required: From dee3e223dc54e4b99ec0a22c9bb07362b72c740d Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 26 Jan 2022 15:15:17 +0000 Subject: [PATCH 218/336] Add mypy linting checker for VSCode --- .devcontainer/devcontainer.json | 8 ++++++++ requirements-dev.in | 1 + requirements-dev.txt | 11 ++++++++++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 13140fc03..225e54305 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -23,6 +23,14 @@ "python.linting.pylintEnabled": true, "python.linting.enabled": true, "python.pythonPath": "/usr/local/bin/python", + "python.linting.mypyEnabled": true, + "python.linting.mypyArgs": [ + "--ignore-missing-imports", + "--follow-imports=silent", + "--show-column-numbers", + "--strict", + "--exclude tests" + ], }, "extensions": [ "ms-python.python", diff --git a/requirements-dev.in b/requirements-dev.in index 2181f099b..eff2402df 100644 --- a/requirements-dev.in +++ b/requirements-dev.in @@ -2,6 +2,7 @@ aiofiles # to run integration tests black certifi flake8 +mypy pip-tools # pip depedencies management pre-commit tox diff --git a/requirements-dev.txt b/requirements-dev.txt index 2437f3593..968887641 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -30,8 +30,12 @@ identify==2.3.6 # via pre-commit mccabe==0.6.1 # via flake8 +mypy==0.931 + # via -r requirements-dev.in mypy-extensions==0.4.3 - # via black + # via + # black + # mypy nodeenv==1.6.0 # via pre-commit packaging==21.2 @@ -71,9 +75,14 @@ toml==0.10.2 tomli==1.2.2 # via # black + # mypy # pep517 tox==3.24.5 # via -r requirements-dev.in +typing-extensions==4.0.0 + # via + # black + # mypy virtualenv==20.10.0 # via # pre-commit From 7ddef1206c1a09ceaaaa31315b27cb407bf8c0b3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Feb 2022 09:06:13 +0000 Subject: [PATCH 219/336] Bump pytest from 7.0.0 to 7.0.1 Bumps [pytest](https://github.com/pytest-dev/pytest) from 7.0.0 to 7.0.1. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/7.0.0...7.0.1) --- updated-dependencies: - dependency-name: pytest dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ef36e8f41..cb4a29078 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ packages=find_packages(exclude=["tests"]), install_requires=requirements, extras_require={ - "test": ["coverage==6.3.1", "pytest==7.0.0", "pytest-cov==3.0.0", "tox==3.24.5"], + "test": ["coverage==6.3.1", "pytest==7.0.1", "pytest-cov==3.0.0", "tox==3.24.5"], "docs": ["sphinx >= 1.4", "sphinx_rtd_theme==1.0.0"], }, package_data={ From 5d4a9781d27a3e2cf196a44fbebe3b583c9cfde3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Feb 2022 09:06:54 +0000 Subject: [PATCH 220/336] Bump pip-tools from 6.5.0 to 6.5.1 Bumps [pip-tools](https://github.com/jazzband/pip-tools) from 6.5.0 to 6.5.1. - [Release notes](https://github.com/jazzband/pip-tools/releases) - [Changelog](https://github.com/jazzband/pip-tools/blob/master/CHANGELOG.md) - [Commits](https://github.com/jazzband/pip-tools/compare/6.5.0...6.5.1) --- updated-dependencies: - dependency-name: pip-tools dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements-dev.txt | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 968887641..2556a46a1 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -6,7 +6,7 @@ # aiofiles==0.8.0 # via -r requirements-dev.in -backports.entry-points-selectable==1.1.1 +backports-entry-points-selectable==1.1.1 # via virtualenv black==22.1.0 # via -r requirements-dev.in @@ -44,7 +44,7 @@ pathspec==0.9.0 # via black pep517==0.12.0 # via pip-tools -pip-tools==6.5.0 +pip-tools==6.5.1 # via -r requirements-dev.in platformdirs==2.4.0 # via @@ -80,9 +80,7 @@ tomli==1.2.2 tox==3.24.5 # via -r requirements-dev.in typing-extensions==4.0.0 - # via - # black - # mypy + # via mypy virtualenv==20.10.0 # via # pre-commit From 6938955c9d52d727a3c4baa14a97b0d957a571a8 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 26 Jan 2022 13:20:30 +0000 Subject: [PATCH 221/336] Add filename extraction for objects created from file Filename is needed for db entry (fileName and displayTitle) when new object created from file and is assigned to folder. --- metadata_backend/api/handlers/common.py | 10 +++++----- metadata_backend/api/handlers/object.py | 4 ++-- metadata_backend/api/handlers/submission.py | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/metadata_backend/api/handlers/common.py b/metadata_backend/api/handlers/common.py index 43ec14f70..c575c3d58 100644 --- a/metadata_backend/api/handlers/common.py +++ b/metadata_backend/api/handlers/common.py @@ -1,11 +1,10 @@ """Functions shared between handlers.""" import csv import string -from typing import List, Tuple, Dict, Any +from typing import Any, Dict, List, Tuple -from aiohttp import BodyPartReader, web, hdrs, MultipartReader +from aiohttp import BodyPartReader, MultipartReader, hdrs, web from aiohttp.web import Request - from xmlschema import XMLResource from xmlschema.etree import ElementTree @@ -16,7 +15,7 @@ async def multipart_content( req: Request, extract_one: bool = False, expect_xml: bool = False -) -> Tuple[List[Tuple[Any, str]], str]: +) -> Tuple[List[Tuple[Any, str]], str, str]: """Get content(s) and schema type(s) of a multipart request (from either csv or xml format). Note: for multiple files support check: https://docs.aiohttp.org/en/stable/multipart.html#hacking-multipart @@ -48,6 +47,7 @@ async def multipart_content( raise web.HTTPUnsupportedMediaType(reason=reason) if not part: break + filename = part.filename if part.filename else "" if extract_one and (xml_files or csv_files): reason = "Only one file can be sent to this endpoint at a time." LOG.error(reason) @@ -72,7 +72,7 @@ async def multipart_content( raise web.HTTPBadRequest(reason=reason) # Return extracted content - return _get_content_with_type(xml_files, csv_files) + return _get_content_with_type(xml_files, csv_files) + (filename,) async def _extract_upload(part: BodyPartReader) -> Tuple[str, str]: diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 506de11c5..871e776d7 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -109,7 +109,7 @@ async def post_object(self, req: Request) -> Response: operator: Union[Operator, XMLOperator] if req.content_type == "multipart/form-data": _only_xml = False if schema_type in _allowed_csv else True - files, cont_type = await multipart_content(req, extract_one=True, expect_xml=_only_xml) + files, cont_type, _ = await multipart_content(req, extract_one=True, expect_xml=_only_xml) if cont_type == "xml": # from this tuple we only care about the content # files should be of form (content, schema) @@ -222,7 +222,7 @@ async def put_object(self, req: Request) -> Response: content: Union[Dict, str] operator: Union[Operator, XMLOperator] if req.content_type == "multipart/form-data": - files, _ = await multipart_content(req, extract_one=True, expect_xml=True) + files, _, _ = await multipart_content(req, extract_one=True, expect_xml=True) content, _ = files[0] operator = XMLOperator(db_client) else: diff --git a/metadata_backend/api/handlers/submission.py b/metadata_backend/api/handlers/submission.py index 8d36b9a73..c9c2176e8 100644 --- a/metadata_backend/api/handlers/submission.py +++ b/metadata_backend/api/handlers/submission.py @@ -31,7 +31,7 @@ async def submit(self, req: Request) -> Response: :raises: HTTPBadRequest if request is missing some parameters or cannot be processed :returns: XML-based receipt from submission """ - files, _ = await multipart_content(req, expect_xml=True) + files, _, _ = await multipart_content(req, expect_xml=True) schema_types = Counter(file[1] for file in files) if "submission" not in schema_types: reason = "There must be a submission.xml file in submission." @@ -92,7 +92,7 @@ async def validate(self, req: Request) -> Response: :param req: Multipart POST request with submission.xml and files :returns: JSON response indicating if validation was successful or not """ - files, _ = await multipart_content(req, extract_one=True, expect_xml=True) + files, _, _ = await multipart_content(req, extract_one=True, expect_xml=True) xml_content, schema_type = files[0] validator = await self._perform_validation(schema_type, xml_content) return web.Response(body=validator.resp_body, content_type="application/json") From d9ab77b62ca6ece33b14808f6f8e0688028d3dd1 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 26 Jan 2022 15:11:38 +0000 Subject: [PATCH 222/336] Add folder patching on object creation Moving responsibility for adding newly created object to a folder from frontend to object endpoint. Now folder id is required query parameter with POST object/. Tests updated. --- metadata_backend/api/handlers/object.py | 68 +++++++++- tests/integration/run_tests.py | 170 ++++++++++-------------- tests/test_handlers.py | 20 +-- 3 files changed, 145 insertions(+), 113 deletions(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 871e776d7..889b69641 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -1,6 +1,6 @@ """Handle HTTP methods for server.""" from math import ceil -from typing import Dict, Union, List, Any, Tuple +from typing import Any, Dict, List, Tuple, Union import ujson from aiohttp import web @@ -101,6 +101,13 @@ async def post_object(self, req: Request) -> Response: """ _allowed_csv = ["sample"] schema_type = req.match_info["schema"] + + folder_id = req.query.get("folder", "") + if not folder_id: + reason = "Folder ID is required query parameter." + raise web.HTTPBadRequest(reason=reason) + patch_params = {"folder": folder_id} + self._check_schema_exists(schema_type) collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type @@ -109,17 +116,18 @@ async def post_object(self, req: Request) -> Response: operator: Union[Operator, XMLOperator] if req.content_type == "multipart/form-data": _only_xml = False if schema_type in _allowed_csv else True - files, cont_type, _ = await multipart_content(req, extract_one=True, expect_xml=_only_xml) + files, cont_type, filename = await multipart_content(req, extract_one=True, expect_xml=_only_xml) if cont_type == "xml": # from this tuple we only care about the content # files should be of form (content, schema) content, _ = files[0] else: - # for CSV files we need to tread this as a list of tuples (content, schema) + # for CSV files we need to treat this as a list of tuples (content, schema) content = files # If multipart request contains XML, XML operator is used. # Else the multipart request is expected to contain CSV file(s) which are converted into JSON. operator = XMLOperator(db_client) if cont_type == "xml" else Operator(db_client) + patch_params.update({"cont_type": cont_type, "title": filename}) else: content = await self._get_data(req) if not req.path.startswith("/drafts"): @@ -147,6 +155,20 @@ async def post_object(self, req: Request) -> Response: location_headers = CIMultiDict(Location=f"{url}/{accession_id}") LOG.info(f"POST object with accesssion ID {accession_id} in schema {collection} was successful.") + # Gathering data for object to be added to folder + if not isinstance(data, List): + ids = [data] + if not patch_params.get("title", None) and isinstance(content, Dict): + try: + patch_params["title"] = ( + content["descriptor"]["studyTitle"] if collection == "study" else content["title"] + ) + except (TypeError, KeyError): + patch_params["title"] = "" + patch = await self.prepare_folder_patch(collection, ids, patch_params) + folder_op = FolderOperator(db_client) + folder_id = await folder_op.update_folder(folder_id, patch) + body = ujson.dumps(data, escape_forward_slashes=False) return web.Response( @@ -283,3 +305,43 @@ async def patch_object(self, req: Request) -> Response: body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) LOG.info(f"PATCH object with accession ID {accession_id} in schema {collection} was successful.") return web.Response(body=body, status=200, content_type="application/json") + + async def prepare_folder_patch(self, schema: str, ids: List, params: Dict[str, str]) -> List: + """Prepare patch operations list. + + :param schema: schema of objects to be added to the folder + :param ids: object IDs + :param params: addidtional data required for db entry + :returns: list of patch operations + """ + if not params.get("cont_type", None): + submission_type = "Form" + else: + submission_type = params["cont_type"].upper() + + if schema.startswith("draft"): + path = "/drafts/-" + else: + path = "/metadataObjects/-" + + patch = [] + patch_ops: Dict[str, Any] = {} + for id in ids: + patch_ops = { + "op": "add", + "path": path, + "value": { + "accessionId": id["accessionId"], + "schema": schema, + "tags": { + "submissionType": submission_type, + "displayTitle": params["title"], + }, + }, + } + + if submission_type != "Form": + patch_ops["value"]["tags"]["fileName"] = params["title"] + patch.append(patch_ops) + + return patch diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index edc404b95..ba0b82bc7 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -140,7 +140,7 @@ async def create_request_json_data(schema, filename): return request_data -async def post_object(sess, schema, filename): +async def post_object(sess, schema, folder_id, filename): """Post one metadata object within session, returns accessionId. :param sess: HTTP session in which request call is made @@ -148,14 +148,14 @@ async def post_object(sess, schema, filename): :param filename: name of the file used for testing. """ request_data = await create_request_data(schema, filename) - async with sess.post(f"{objects_url}/{schema}", data=request_data) as resp: + async with sess.post(f"{objects_url}/{schema}", params={"folder": folder_id}, data=request_data) as resp: LOG.debug(f"Adding new object to {schema}, via XML/CSV file {filename}") assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() return ans if isinstance(ans, list) else ans["accessionId"], schema -async def post_object_expect_status(sess, schema, filename, status): +async def post_object_expect_status(sess, schema, folder_id, filename, status): """Post one metadata object within session, returns accessionId. :param sess: HTTP session in which request call is made @@ -163,7 +163,7 @@ async def post_object_expect_status(sess, schema, filename, status): :param filename: name of the file used for testing. """ request_data = await create_request_data(schema, filename) - async with sess.post(f"{objects_url}/{schema}", data=request_data) as resp: + async with sess.post(f"{objects_url}/{schema}", params={"folder": folder_id}, data=request_data) as resp: LOG.debug(f"Adding new object to {schema}, via XML/CSV file {filename} and expecting status: {status}") assert resp.status == status, f"HTTP Status code error, got {resp.status}" if status < 400: @@ -171,7 +171,7 @@ async def post_object_expect_status(sess, schema, filename, status): return ans if isinstance(ans, list) else ans["accessionId"], schema -async def post_object_json(sess, schema, filename): +async def post_object_json(sess, schema, folder_id, filename): """Post & put one metadata object within session, returns accessionId. :param sess: HTTP session in which request call is made @@ -179,7 +179,7 @@ async def post_object_json(sess, schema, filename): :param filename: name of the file used for testing. """ request_data = await create_request_json_data(schema, filename) - async with sess.post(f"{objects_url}/{schema}", data=request_data) as resp: + async with sess.post(f"{objects_url}/{schema}", params={"folder": folder_id}, data=request_data) as resp: LOG.debug(f"Adding new object to {schema}, via JSON file {filename}") assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() @@ -198,7 +198,7 @@ async def delete_object(sess, schema, accession_id): assert resp.status == 204, f"HTTP Status code error, got {resp.status}" -async def post_draft(sess, schema, filename): +async def post_draft(sess, schema, folder_id, filename): """Post one draft metadata object within session, returns accessionId. :param sess: HTTP session in which request call is made @@ -206,14 +206,14 @@ async def post_draft(sess, schema, filename): :param filename: name of the file used for testing. """ request_data = await create_request_data(schema, filename) - async with sess.post(f"{drafts_url}/{schema}", data=request_data) as resp: + async with sess.post(f"{drafts_url}/{schema}", params={"folder": folder_id}, data=request_data) as resp: LOG.debug(f"Adding new draft object to {schema}, via XML file {filename}") assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() return ans["accessionId"] -async def post_draft_json(sess, schema, filename): +async def post_draft_json(sess, schema, folder_id, filename): """Post & put one metadata object within session, returns accessionId. :param sess: HTTP session in which request call is made @@ -221,7 +221,7 @@ async def post_draft_json(sess, schema, filename): :param filename: name of the file used for testing. """ request_data = await create_request_json_data(schema, filename) - async with sess.post(f"{drafts_url}/{schema}", data=request_data) as resp: + async with sess.post(f"{drafts_url}/{schema}", params={"folder": folder_id}, data=request_data) as resp: LOG.debug(f"Adding new draft object to {schema}, via JSON file {filename}") assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() @@ -516,11 +516,7 @@ async def test_crud_works(sess, schema, filename, folder_id): :param filename: name of the file used for testing :param folder_id: id of the folder used to group submission """ - accession_id = await post_object(sess, schema, filename) - patch_object = [ - {"op": "add", "path": "/metadataObjects/-", "value": {"accessionId": accession_id[0], "schema": schema}} - ] - await patch_folder(sess, folder_id, patch_object) + accession_id = await post_object(sess, schema, folder_id, filename) async with sess.get(f"{objects_url}/{schema}/{accession_id[0]}") as resp: LOG.debug(f"Checking that {accession_id[0]} JSON is in {schema}") assert resp.status == 200, f"HTTP Status code error, got {resp.status}" @@ -556,15 +552,11 @@ async def test_csv(sess, folder_id): """ _schema = "sample" _filename = "EGAformat.csv" - accession_id = await post_object(sess, _schema, _filename) + accession_id = await post_object(sess, _schema, folder_id, _filename) # there are 3 rows and we expected to get 3rd assert len(accession_id[0]) == 3, f"expected nb of CSV entries does not match, we got: {len(accession_id)}" _first_csv_row_id = accession_id[0][0]["accessionId"] - patch_object = [ - {"op": "add", "path": "/metadataObjects/-", "value": {"accessionId": _first_csv_row_id, "schema": _schema}} - ] - await patch_folder(sess, folder_id, patch_object) async with sess.get(f"{objects_url}/{_schema}/{_first_csv_row_id}") as resp: LOG.debug(f"Checking that {_first_csv_row_id} JSON is in {_schema}") assert resp.status == 200, f"HTTP Status code error, got {resp.status}" @@ -582,11 +574,11 @@ async def test_csv(sess, folder_id): _filename = "empty.csv" # status should be 400 - await post_object_expect_status(sess, _schema, _filename, 400) + await post_object_expect_status(sess, _schema, folder_id, _filename, 400) _filename = "EGA_sample_w_issue.csv" # status should be 201 but we expect 3 rows, as the CSV has 4 rows one of which is empty - accession_id = await post_object_expect_status(sess, _schema, _filename, 201) + accession_id = await post_object_expect_status(sess, _schema, folder_id, _filename, 201) assert len(accession_id[0]) == 3, f"expected nb of CSV entries does not match, we got: {len(accession_id)}" @@ -600,11 +592,7 @@ async def test_put_objects(sess, folder_id): :param sess: HTTP session in which request call is made :param folder_id: id of the folder used to group submission """ - accession_id = await post_object(sess, "study", "SRP000539.xml") - patch_object = [ - {"op": "add", "path": "/metadataObjects/-", "value": {"accessionId": accession_id[0], "schema": "study"}} - ] - await patch_folder(sess, folder_id, patch_object) + accession_id = await post_object(sess, "study", folder_id, "SRP000539.xml") await put_object_json(sess, "study", accession_id[0], "SRP000539.json") await put_object_xml(sess, "study", accession_id[0], "SRP000539_put.xml") @@ -622,11 +610,7 @@ async def test_crud_drafts_works(sess, schema, orginal_file, update_file, folder :param update_file: name of the file used for updating object. :param folder_id: id of the folder used to group submission objects """ - draft_id = await post_draft_json(sess, schema, orginal_file) - patch_draft_data = [ - {"op": "add", "path": "/drafts/-", "value": {"accessionId": draft_id, "schema": f"draft-{schema}"}} - ] - await patch_folder(sess, folder_id, patch_draft_data) + draft_id = await post_draft_json(sess, schema, folder_id, orginal_file) accession_id = await put_draft(sess, schema, draft_id, update_file) async with sess.get(f"{drafts_url}/{schema}/{accession_id}") as resp: LOG.debug(f"Checking that {accession_id} JSON is in {schema}") @@ -657,11 +641,7 @@ async def test_patch_drafts_works(sess, schema, orginal_file, update_file, folde :param update_file: name of the file used for updating object. :param folder_id: id of the folder used to group submission objects """ - draft_id = await post_draft_json(sess, schema, orginal_file) - patch_draft_data = [ - {"op": "add", "path": "/drafts/-", "value": {"accessionId": draft_id, "schema": f"draft-{schema}"}} - ] - await patch_folder(sess, folder_id, patch_draft_data) + draft_id = await post_draft_json(sess, schema, folder_id, orginal_file) accession_id = await patch_draft(sess, schema, draft_id, update_file) async with sess.get(f"{drafts_url}/{schema}/{accession_id}") as resp: LOG.debug(f"Checking that {accession_id} JSON is in {schema}") @@ -682,13 +662,9 @@ async def test_querying_works(sess, folder_id): :param sess: HTTP session in which request call is made :param folder_id: id of the folder used to group submission objects """ - files = await asyncio.gather(*[post_object(sess, schema, filename) for schema, filename in test_xml_files]) - - for accession_id, schema in files: - patch_folder_obj = [ - {"op": "add", "path": "/metadataObjects/-", "value": {"accessionId": accession_id, "schema": schema}} - ] - await patch_folder(sess, folder_id, patch_folder_obj) + files = await asyncio.gather( + *[post_object(sess, schema, folder_id, filename) for schema, filename in test_xml_files] + ) queries = { "study": [ @@ -740,13 +716,7 @@ async def test_getting_all_objects_from_schema_works(sess, folder_id): :param folder_id: id of the folder used to group submission objects """ # Add objects - files = await asyncio.gather(*[post_object(sess, "study", "SRP000539.xml") for _ in range(13)]) - - for accession_id, schema in files: - patch_folder_obj = [ - {"op": "add", "path": "/metadataObjects/-", "value": {"accessionId": accession_id, "schema": schema}} - ] - await patch_folder(sess, folder_id, patch_folder_obj) + files = await asyncio.gather(*[post_object(sess, "study", folder_id, "SRP000539.xml") for _ in range(13)]) # Test default values async with sess.get(f"{objects_url}/study") as resp: @@ -791,11 +761,7 @@ async def test_crud_folders_works(sess): assert resp.status == 200, f"HTTP Status code error, got {resp.status}" # Create draft from test XML file and patch the draft into the newly created folder - draft_id = await post_draft(sess, "sample", "SRS001433.xml") - patch_add_draft = [ - {"op": "add", "path": "/drafts/-", "value": [{"accessionId": draft_id, "schema": "draft-sample"}]} - ] - folder_id = await patch_folder(sess, folder_id, patch_add_draft) + draft_id = await post_draft(sess, "sample", folder_id, "SRS001433.xml") async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was patched") res = await resp.json() @@ -803,32 +769,43 @@ async def test_crud_folders_works(sess): assert res["name"] == folder_data["name"], "expected folder name does not match" assert res["description"] == folder_data["description"], "folder description content mismatch" assert res["published"] is False, "folder is published, expected False" - assert res["drafts"] == [{"accessionId": draft_id, "schema": "draft-sample"}], "folder drafts content mismatch" + assert res["drafts"] == [ + { + "accessionId": draft_id, + "schema": "draft-sample", + "tags": {"submissionType": "XML", "displayTitle": "SRS001433.xml", "fileName": "SRS001433.xml"}, + } + ], "folder drafts content mismatch" assert res["metadataObjects"] == [], "there are objects in folder, expected empty" # Get the draft from the collection within this session and post it to objects collection draft_data = await get_draft(sess, "sample", draft_id) - async with sess.post(f"{objects_url}/sample", data=draft_data) as resp: + async with sess.post(f"{objects_url}/sample", params={"folder": folder_id}, data=draft_data) as resp: LOG.debug("Adding draft to actual objects") assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() assert ans["accessionId"] != draft_id, "draft id does not match expected" accession_id = ans["accessionId"] - # Patch folder so that original draft becomes an object in the folder - patch_folder_move_draft = [ - {"op": "add", "path": "/metadataObjects/-", "value": [{"accessionId": accession_id, "schema": "sample"}]}, - ] - folder_id = await patch_folder(sess, folder_id, patch_folder_move_draft) async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was patched") res = await resp.json() assert res["folderId"] == folder_id, "expected folder id does not match" assert res["published"] is False, "folder is published, expected False" assert "datePublished" not in res.keys() - assert res["drafts"] == [{"accessionId": draft_id, "schema": "draft-sample"}], "folder drafts content mismatch" + assert res["drafts"] == [ + { + "accessionId": draft_id, + "schema": "draft-sample", + "tags": {"submissionType": "XML", "displayTitle": "SRS001433.xml", "fileName": "SRS001433.xml"}, + } + ], "folder drafts content mismatch" assert res["metadataObjects"] == [ - {"accessionId": accession_id, "schema": "sample"} + { + "accessionId": accession_id, + "schema": "sample", + "tags": {"submissionType": "Form", "displayTitle": "HapMap sample from Homo sapiens"}, + } ], "folder metadataObjects content mismatch" # Publish the folder @@ -845,7 +822,11 @@ async def test_crud_folders_works(sess): assert "extraInfo" in res.keys() assert res["drafts"] == [], "there are drafts in folder, expected empty" assert res["metadataObjects"] == [ - {"accessionId": accession_id, "schema": "sample"} + { + "accessionId": accession_id, + "schema": "sample", + "tags": {"submissionType": "Form", "displayTitle": "HapMap sample from Homo sapiens"}, + } ], "folder metadataObjects content mismatch" # Delete folder @@ -869,11 +850,7 @@ async def test_crud_folders_works_no_publish(sess): assert resp.status == 200, f"HTTP Status code error, got {resp.status}" # Create draft from test XML file and patch the draft into the newly created folder - draft_id = await post_draft(sess, "sample", "SRS001433.xml") - patch_add_draft = [ - {"op": "add", "path": "/drafts/-", "value": [{"accessionId": draft_id, "schema": "draft-sample"}]} - ] - folder_id = await patch_folder(sess, folder_id, patch_add_draft) + draft_id = await post_draft(sess, "sample", folder_id, "SRS001433.xml") async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was patched") res = await resp.json() @@ -881,31 +858,42 @@ async def test_crud_folders_works_no_publish(sess): assert res["name"] == folder_data["name"], "expected folder name does not match" assert res["description"] == folder_data["description"], "folder description content mismatch" assert res["published"] is False, "folder is published, expected False" - assert res["drafts"] == [{"accessionId": draft_id, "schema": "draft-sample"}], "folder drafts content mismatch" + assert res["drafts"] == [ + { + "accessionId": draft_id, + "schema": "draft-sample", + "tags": {"submissionType": "XML", "displayTitle": "SRS001433.xml", "fileName": "SRS001433.xml"}, + } + ], "folder drafts content mismatch" assert res["metadataObjects"] == [], "there are objects in folder, expected empty" # Get the draft from the collection within this session and post it to objects collection draft = await get_draft(sess, "sample", draft_id) - async with sess.post(f"{objects_url}/sample", data=draft) as resp: + async with sess.post(f"{objects_url}/sample", params={"folder": folder_id}, data=draft) as resp: LOG.debug("Adding draft to actual objects") assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() assert ans["accessionId"] != draft_id, "draft id does not match expected" accession_id = ans["accessionId"] - # Patch folder so that original draft becomes an object in the folder - patch_folder_move_draft = [ - {"op": "add", "path": "/metadataObjects/-", "value": [{"accessionId": accession_id, "schema": "sample"}]}, - ] - folder_id = await patch_folder(sess, folder_id, patch_folder_move_draft) async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was patched") res = await resp.json() assert res["folderId"] == folder_id, "expected folder id does not match" assert res["published"] is False, "folder is published, expected False" - assert res["drafts"] == [{"accessionId": draft_id, "schema": "draft-sample"}], "folder drafts content mismatch" + assert res["drafts"] == [ + { + "accessionId": draft_id, + "schema": "draft-sample", + "tags": {"submissionType": "XML", "displayTitle": "SRS001433.xml", "fileName": "SRS001433.xml"}, + } + ], "folder drafts content mismatch" assert res["metadataObjects"] == [ - {"accessionId": accession_id, "schema": "sample"} + { + "accessionId": accession_id, + "schema": "sample", + "tags": {"submissionType": "Form", "displayTitle": "HapMap sample from Homo sapiens"}, + } ], "folder metadataObjects content mismatch" # Delete folder @@ -1309,32 +1297,14 @@ async def test_get_folders_objects(sess, folder_id: str): :param sess: HTTP session in which request call is made :param folder_id: id of the folder used to group submission objects """ - accession_id = await post_object_json(sess, "study", "SRP000539.json") - patch_add_object = [ - {"op": "add", "path": "/metadataObjects/-", "value": {"accessionId": accession_id, "schema": "study"}} - ] - await patch_folder(sess, folder_id, patch_add_object) - async with sess.get(f"{folders_url}") as resp: - LOG.debug(f"Reading folder {folder_id}") - assert resp.status == 200, f"HTTP Status code error, got {resp.status}" - response = await resp.json() - assert len(response["folders"]) == 1 - assert response["folders"][0]["metadataObjects"][0]["accessionId"] == accession_id - assert "tags" not in response["folders"][0]["metadataObjects"][0] - patch_add_more_object = [ - { - "op": "add", - "path": "/metadataObjects/0/tags", - "value": {"submissionType": "Form"}, - } - ] - await patch_folder(sess, folder_id, patch_add_more_object) + accession_id = await post_object_json(sess, "study", folder_id, "SRP000539.json") async with sess.get(f"{folders_url}") as resp: LOG.debug(f"Reading folder {folder_id}") assert resp.status == 200, f"HTTP Status code error, got {resp.status}" response = await resp.json() assert len(response["folders"]) == 1 assert response["folders"][0]["metadataObjects"][0]["accessionId"] == accession_id + assert "tags" in response["folders"][0]["metadataObjects"][0] assert response["folders"][0]["metadataObjects"][0]["tags"]["submissionType"] == "Form" patch_change_tags_object = [ diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 2156a586b..0f6244a66 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -387,7 +387,7 @@ async def test_submit_object_works(self): """Test that submission is handled, XMLOperator is called.""" files = [("study", "SRP000539.xml")] data = self.create_submission_data(files) - response = await self.client.post("/objects/study", data=data) + response = await self.client.post("/objects/study", params={"folder": "some id"}, data=data) self.assertEqual(response.status, 201) self.assertIn(self.test_ega_string, await response.text()) self.MockedXMLOperator().create_metadata_object.assert_called_once() @@ -399,7 +399,7 @@ async def test_submit_object_works_with_json(self): "alias": "GSE10966", "descriptor": {"studyTitle": "Highly", "studyType": "Other"}, } - response = await self.client.post("/objects/study", json=json_req) + response = await self.client.post("/objects/study", params={"folder": "some id"}, json=json_req) self.assertEqual(response.status, 201) self.assertIn(self.test_ega_string, await response.text()) self.MockedOperator().create_metadata_object.assert_called_once() @@ -407,7 +407,7 @@ async def test_submit_object_works_with_json(self): async def test_submit_object_missing_field_json(self): """Test that JSON has missing property.""" json_req = {"centerName": "GEO", "alias": "GSE10966"} - response = await self.client.post("/objects/study", json=json_req) + response = await self.client.post("/objects/study", params={"folder": "some id"}, json=json_req) reason = "Provided input does not seem correct because: ''descriptor' is a required property'" self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) @@ -419,7 +419,7 @@ async def test_submit_object_bad_field_json(self): "alias": "GSE10966", "descriptor": {"studyTitle": "Highly", "studyType": "ceva"}, } - response = await self.client.post("/objects/study", json=json_req) + response = await self.client.post("/objects/study", params={"folder": "some id"}, json=json_req) reason = "Provided input does not seem correct for field: 'descriptor'" self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) @@ -431,7 +431,7 @@ async def test_post_object_bad_json(self): "alias": "GSE10966", "descriptor": {"studyTitle": "Highly", "studyType": "Other"}, } - response = await self.client.post("/objects/study", data=json_req) + response = await self.client.post("/objects/study", params={"folder": "some id"}, data=json_req) reason = "JSON is not correctly formatted. See: Expecting value: line 1 column 1" self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) @@ -442,7 +442,7 @@ async def test_post_object_works_with_csv(self): data = self.create_submission_data(files) file_content = self.get_file_data("sample", "EGAformat.csv") self.MockedCSVParser().parse.return_value = [{}, {}, {}] - response = await self.client.post("/objects/sample", data=data) + response = await self.client.post("/objects/sample", params={"folder": "some id"}, data=data) json_resp = await response.json() self.assertEqual(response.status, 201) self.assertEqual(self.test_ega_string, json_resp[0]["accessionId"]) @@ -460,7 +460,7 @@ async def test_post_objet_error_with_empty(self): """Test multipart request post fails when no objects are parsed.""" files = [("sample", "empty.csv")] data = self.create_submission_data(files) - response = await self.client.post("/objects/sample", data=data) + response = await self.client.post("/objects/sample", params={"folder": "some id"}, data=data) json_resp = await response.json() self.assertEqual(response.status, 400) self.assertEqual(json_resp["detail"], "Request data seems empty.") @@ -495,7 +495,7 @@ async def test_submit_draft_works_with_json(self): "alias": "GSE10966", "descriptor": {"studyTitle": "Highly", "studyType": "Other"}, } - response = await self.client.post("/drafts/study", json=json_req) + response = await self.client.post("/drafts/study", params={"folder": "some id"}, json=json_req) self.assertEqual(response.status, 201) self.assertIn(self.test_ega_string, await response.text()) self.MockedOperator().create_metadata_object.assert_called_once() @@ -544,7 +544,7 @@ async def test_submit_object_fails_with_too_many_files(self): """Test that sending two files to endpoint results failure.""" files = [("study", "SRP000539.xml"), ("study", "SRP000539_copy.xml")] data = self.create_submission_data(files) - response = await self.client.post("/objects/study", data=data) + response = await self.client.post("/objects/study", params={"folder": "some id"}, data=data) reason = "Only one file can be sent to this endpoint at a time." self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) @@ -616,7 +616,7 @@ async def test_operations_fail_for_wrong_schema_type(self): json_get_resp = await get_resp.json() self.assertIn("Specified schema", json_get_resp["detail"]) - post_rep = await self.client.post("/objects/bad_scehma_name") + post_rep = await self.client.post("/objects/bad_scehma_name", params={"folder": "some id"}) self.assertEqual(post_rep.status, 404) post_json_rep = await post_rep.json() self.assertIn("Specified schema", post_json_rep["detail"]) From 2202e3bb5e0bee96e26bcbd5432030c4a26c7188 Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Thu, 17 Feb 2022 10:34:35 +0200 Subject: [PATCH 223/336] add deprecation notes for later pruning, remove some deprecated features that don't break anything --- metadata_backend/api/handlers/object.py | 2 +- metadata_backend/api/operators.py | 2 - metadata_backend/helpers/schemas/users.json | 48 ++++++--------------- tests/integration/run_tests.py | 17 +++----- tests/test_db_service.py | 4 +- tests/test_handlers.py | 4 +- tests/test_operators.py | 4 +- 7 files changed, 26 insertions(+), 55 deletions(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 18bb525d1..fd5958f0f 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -33,7 +33,7 @@ async def _handle_query(self, req: Request) -> Response: per_page = self._get_page_param(req, "per_page", 10) db_client = req.app["db_client"] - filter_list = await self._handle_user_objects_collection(req, collection) # DEPRECATED, what to do? + filter_list = [] # DEPRECATED, users don't own folders anymore data, page_num, page_size, total_objects = await Operator(db_client).query_metadata_database( collection, req.query, page, per_page, filter_list ) diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 573b3e28b..d8e51e3b8 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -999,8 +999,6 @@ async def create_user(self, data: Dict[str, Union[list, str]]) -> str: return existing_user_id else: user_data["projects"] = data["projects"] - user_data["templates"] = [] - user_data["folders"] = [] user_data["userId"] = user_id = self._generate_user_id() user_data["name"] = data["real_name"] user_data["externalId"] = data["user_id"] diff --git a/metadata_backend/helpers/schemas/users.json b/metadata_backend/helpers/schemas/users.json index aed6a5a4e..191b20293 100644 --- a/metadata_backend/helpers/schemas/users.json +++ b/metadata_backend/helpers/schemas/users.json @@ -1,9 +1,10 @@ { "type": "object", - "title": "Folder schema for user objects", + "title": "User object schema", "required": [ "userId", - "name" + "name", + "projects" ], "properties": { "userId": { @@ -14,51 +15,28 @@ "type": "string", "title": "User Name" }, - "templates": { + "projects": { "type": "array", - "title": "User templates schema", + "title": "User's project affiliations schema", "items": { "type": "object", - "title": "Template objects", + "title": "Project objects", "required": [ - "accessionId", - "schema" + "projectId", + "projectNumber" ], "properties": { - "accessionId": { + "projectId": { "type": "string", - "title": "Accession Id" + "title": "Project internal accession ID" }, - "schema": { + "projectNumber": { "type": "string", - "title": "Draft object's schema" - }, - "tags": { - "type": "object", - "title": "Different tags to describe the template object.", - "additionalProperties": true, - "properties": { - "submissionType": { - "type": "string", - "title": "Type of submission", - "enum": [ - "Form" - ] - } - } + "title": "Project human friendly ID" } } }, "uniqueItems": true - }, - "folders": { - "type": "array", - "title": "The folders schema", - "items": { - "type": "string", - "title": "Folder Id" - }, - "uniqueItems": true } } -} +} \ No newline at end of file diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index cf3d040c7..49d4abed8 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -487,11 +487,12 @@ async def create_folder(data, user): data["metadataObjects"] = [] try: await database["folder"].insert_one(data) - find_by_id = {"userId": user} - append_op = {"$push": {"folders": {"$each": [folder_id], "$position": 0}}} - await database["user"].find_one_and_update( - find_by_id, append_op, projection={"_id": False}, return_document=True - ) + # DEPRECATED + # find_by_id = {"userId": user} + # append_op = {"$push": {"folders": {"$each": [folder_id], "$position": 0}}} + # await database["user"].find_one_and_update( + # find_by_id, append_op, projection={"_id": False}, return_document=True + # ) return folder_id except Exception as e: @@ -1017,12 +1018,6 @@ async def test_crud_folders_works_no_publish(sess, project_id): LOG.debug(f"Checking that folder {folder_id} was deleted") assert resp.status == 404, f"HTTP Status code error, got {resp.status}" - async with sess.get(f"{users_url}/current") as resp: - LOG.debug(f"Checking that folder {folder_id} was deleted from current user") - res = await resp.json() - expected_true = not any(d == accession_id for d in res["folders"]) - assert expected_true, "folder still exists at user" - async def test_adding_doi_info_to_folder_works(sess, project_id): """Test that proper DOI info can be added to folder and bad DOI info cannot be. diff --git a/tests/test_db_service.py b/tests/test_db_service.py index b88748710..3ea8c77a3 100644 --- a/tests/test_db_service.py +++ b/tests/test_db_service.py @@ -35,8 +35,8 @@ def setUp(self): self.user_stub = { "userId": self.user_id_stub, "name": "name", - "drafts": ["EGA123456", "EGA1234567"], - "folders": ["EGA1234569"], + "drafts": ["EGA123456", "EGA1234567"], # DEPRECATED + "folders": ["EGA1234569"], # DEPRECATED } self.data_stub = { "accessionId": self.id_stub, diff --git a/tests/test_handlers.py b/tests/test_handlers.py index f8b65ea46..f51408286 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -73,8 +73,8 @@ async def setUpAsync(self): self.test_user = { "userId": self.user_id, "name": "tester", - "templates": [], - "folders": ["FOL12345678"], + "templates": [], # DEPRECATED + "folders": ["FOL12345678"], # DEPRECATED } self.operator_config = { diff --git a/tests/test_operators.py b/tests/test_operators.py index 1f2bb0192..bf7cf57cc 100644 --- a/tests/test_operators.py +++ b/tests/test_operators.py @@ -101,8 +101,8 @@ def setUp(self): self.test_user = { "userId": self.user_generated_id, "name": "tester", - "drafts": [], - "folders": [], + "drafts": [], # DEPRECATED + "folders": [], # DEPRECATED } class_dbservice = "metadata_backend.api.operators.DBService" self.patch_dbservice = patch(class_dbservice, spec=True) From 19b0f12c45b4995a3b306e66f2e13e118026f10c Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Thu, 17 Feb 2022 11:35:47 +0200 Subject: [PATCH 224/336] fix mypy and spellcheck --- .github/config/.wordlist.txt | 1 + metadata_backend/api/handlers/object.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/config/.wordlist.txt b/.github/config/.wordlist.txt index 98f2884d4..cf53823bc 100644 --- a/.github/config/.wordlist.txt +++ b/.github/config/.wordlist.txt @@ -465,6 +465,7 @@ probeset processedreads processingtype projectId +projectNumber promethion proteinclusters protfam diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index fd5958f0f..51da12cd1 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -33,7 +33,7 @@ async def _handle_query(self, req: Request) -> Response: per_page = self._get_page_param(req, "per_page", 10) db_client = req.app["db_client"] - filter_list = [] # DEPRECATED, users don't own folders anymore + filter_list: List = [] # DEPRECATED, users don't own folders anymore data, page_num, page_size, total_objects = await Operator(db_client).query_metadata_database( collection, req.query, page, per_page, filter_list ) From bc3d28501b5adffcb57088e1631495a91e4b44f2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Feb 2022 09:05:34 +0000 Subject: [PATCH 225/336] Bump coverage from 6.3.1 to 6.3.2 Bumps [coverage](https://github.com/nedbat/coveragepy) from 6.3.1 to 6.3.2. - [Release notes](https://github.com/nedbat/coveragepy/releases) - [Changelog](https://github.com/nedbat/coveragepy/blob/master/CHANGES.rst) - [Commits](https://github.com/nedbat/coveragepy/compare/6.3.1...6.3.2) --- updated-dependencies: - dependency-name: coverage dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index cb4a29078..4507a5ff5 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ packages=find_packages(exclude=["tests"]), install_requires=requirements, extras_require={ - "test": ["coverage==6.3.1", "pytest==7.0.1", "pytest-cov==3.0.0", "tox==3.24.5"], + "test": ["coverage==6.3.2", "pytest==7.0.1", "pytest-cov==3.0.0", "tox==3.24.5"], "docs": ["sphinx >= 1.4", "sphinx_rtd_theme==1.0.0"], }, package_data={ From 890fbffb1a1cd72e8810552a5036621cc62fb52d Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Wed, 23 Feb 2022 11:22:44 +0200 Subject: [PATCH 226/336] add templates-endpoint --- metadata_backend/api/handlers/folder.py | 6 +-- metadata_backend/api/handlers/template.py | 33 +++++++++++++ metadata_backend/api/operators.py | 59 +++++++++++++++++++++++ metadata_backend/server.py | 1 + tests/test_server.py | 2 +- 5 files changed, 96 insertions(+), 5 deletions(-) diff --git a/metadata_backend/api/handlers/folder.py b/metadata_backend/api/handlers/folder.py index 6e8277ed2..1cbe5ee83 100644 --- a/metadata_backend/api/handlers/folder.py +++ b/metadata_backend/api/handlers/folder.py @@ -80,7 +80,7 @@ def _check_patch_folder(self, patch_ops: Any) -> None: raise web.HTTPBadRequest(reason=reason) async def get_folders(self, req: Request) -> Response: - """Get a set of folders owned by the user with pagination values. + """Get a set of folders owned by the project with pagination values. :param req: GET Request :returns: JSON list of folders available for the user @@ -158,7 +158,7 @@ async def get_folders(self, req: Request) -> Response: url = f"{req.scheme}://{req.host}{req.path}" link_headers = await self._header_links(url, page, per_page, total_folders) LOG.debug(f"Pagination header links: {link_headers}") - LOG.info(f"Querying for user's folders resulted in {total_folders} folders") + LOG.info(f"Querying for project={project_id} folders resulted in {total_folders} folders") return web.Response( body=result, status=200, @@ -169,8 +169,6 @@ async def get_folders(self, req: Request) -> Response: async def post_folder(self, req: Request) -> Response: """Save object folder to database. - Also assigns the folder to the current user. - :param req: POST request :returns: JSON response containing folder ID for submitted folder """ diff --git a/metadata_backend/api/handlers/template.py b/metadata_backend/api/handlers/template.py index 5eb528077..674d673f4 100644 --- a/metadata_backend/api/handlers/template.py +++ b/metadata_backend/api/handlers/template.py @@ -15,6 +15,39 @@ class TemplatesAPIHandler(RESTAPIHandler): """API Handler for Templates.""" + async def get_templates(self, req: Request) -> Response: + """Get a set of templates owned by the project. + + :param req: GET Request + :returns: JSON list of templates available for the user + """ + project_id = self._get_param(req, "projectId") + db_client = req.app["db_client"] + + user_operator = UserOperator(db_client) + current_user = get_session(req)["user_info"] + user = await user_operator.read_user(current_user) + user_has_project = await user_operator.check_user_has_project(project_id, user["userId"]) + if not user_has_project: + reason = f"user {user['userId']} is not affiliated with project {project_id}" + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + + operator = Operator(db_client) + templates = await operator.query_templates_by_project(project_id) + + result = ujson.dumps( + templates, + escape_forward_slashes=False, + ) + + LOG.info(f"Querying for project={project_id} templates resulted in {len(templates)} templates") + return web.Response( + body=result, + status=200, + content_type="application/json", + ) + async def get_template(self, req: Request) -> Response: """Get one metadata template by its accession id. diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index d8e51e3b8..2f48d0105 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -305,6 +305,65 @@ def __init__(self, db_client: AsyncIOMotorClient) -> None: """ super().__init__(mongo_database, "application/json", db_client) + async def query_templates_by_project(self, project_id: str) -> List[Dict[str, str]]: + """Query all template schemas for given project ID. + + :param query: Dict containing query information + :returns: Paginated query result + """ + + templates: List[Dict[str, str]] = [] + + # List of possible template collections + collections = [ + "template-analysis", + "template-dac", + "template-dataset", + "template-experiment", + "template-policy", + "template-run", + "template-sample", + "template-study", + ] + + # Over all collections, query for accessionId and + # title (in study it's descriptor.studyTitle), cast them as displayTitle + # add schema name from current collection, bundle together + for collection in collections: + + # Cast title as displayTitle + title = "$title" + if collection == "template-study": + # Study has title in slightly different format + title = "$descriptor.studyTitle" + + # Query with projectId, get title and id, set schema with default value + _query = [ + { + "$match": { + "projectId": project_id, + }, + }, + { + "$project": { + "_id": 0, + "displayTitle": title, + "accessionId": "$accessionId", + "schema": collection, + }, + }, + ] + data_raw = await self.db_service.do_aggregate(collection, _query) + + # Parse and bundle up + if not data_raw: + data = [] + else: + data = [doc for doc in data_raw] + templates += data + + return templates + async def get_object_project(self, collection: str, accession_id: str) -> str: """Get the project ID the object is associated to. diff --git a/metadata_backend/server.py b/metadata_backend/server.py index 56696eba4..e7513f4bd 100644 --- a/metadata_backend/server.py +++ b/metadata_backend/server.py @@ -91,6 +91,7 @@ async def init() -> web.Application: web.patch("/drafts/{schema}/{accessionId}", _object.patch_object), web.delete("/drafts/{schema}/{accessionId}", _object.delete_object), # template objects operations + web.get("/templates", _template.get_templates), web.post("/templates/{schema}", _template.post_template), web.get("/templates/{schema}/{accessionId}", _template.get_template), web.patch("/templates/{schema}/{accessionId}", _template.patch_template), diff --git a/tests/test_server.py b/tests/test_server.py index 5062927ca..958f74ef4 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -41,7 +41,7 @@ async def test_init(self): async def test_api_routes_are_set(self): """Test correct amount of api (no frontend) routes is set.""" server = await self.get_application() - self.assertIs(len(server.router.resources()), 18) + self.assertIs(len(server.router.resources()), 19) async def test_frontend_routes_are_set(self): """Test correct routes are set when frontend folder is exists.""" From a523f65b650ac2cf3efb4ae2734b115ba53ec2cc Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Wed, 23 Feb 2022 11:26:31 +0200 Subject: [PATCH 227/336] remove deprecations --- CHANGELOG.md | 3 +-- metadata_backend/api/handlers/restapi.py | 22 ---------------------- tests/integration/run_tests.py | 6 ------ tests/test_db_service.py | 2 -- tests/test_handlers.py | 3 --- tests/test_operators.py | 2 -- 6 files changed, 1 insertion(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e7c9cc433..a25924c49 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,8 +45,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - user is redirected to `/noproject` if key is empty or missing - new mandatory query parameter `projectId` in `GET /folders` - new mandatory JSON key `projectId` in `POST /folders` and `POST /templates` - - UNDECIDED: new endpoint `GET /templates` to replace `GET /users/current` `{"templates":[...]}` - - UNDECIDED: new endpoint `GET /project/{projectId}` to replace `GET /users/current` `{"templates":[...]}` + - new endpoint `GET /templates` to replace `GET /users/current` `{"templates":[...]}` - WARNING: breaking change that requires fresh database, because "project" is new information that did not exist before, and it can't be migrated to existing user-owned hierarchy ### Changed diff --git a/metadata_backend/api/handlers/restapi.py b/metadata_backend/api/handlers/restapi.py index 33777b8a9..bc3514f69 100644 --- a/metadata_backend/api/handlers/restapi.py +++ b/metadata_backend/api/handlers/restapi.py @@ -124,28 +124,6 @@ async def _get_collection_objects( yield result - # DEPRECATED, what to do? - async def _handle_user_objects_collection(self, req: Request, collection: str) -> List: - """Retrieve list of objects accession ids belonging to user in collection. - - :param req: HTTP request - :param collection: collection or schema of document - :returns: List - """ - db_client = req.app["db_client"] - current_user = get_session(req)["user_info"] - user_op = UserOperator(db_client) - folder_op = FolderOperator(db_client) - - user = await user_op.read_user(current_user) - res = self._get_collection_objects(folder_op, collection, user["folders"]) - - dt = [] - async for r in res: - dt.extend(r) - - return dt - async def _get_data(self, req: Request) -> Dict: """Get the data content from a request. diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index 49d4abed8..930ab8afb 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -487,12 +487,6 @@ async def create_folder(data, user): data["metadataObjects"] = [] try: await database["folder"].insert_one(data) - # DEPRECATED - # find_by_id = {"userId": user} - # append_op = {"$push": {"folders": {"$each": [folder_id], "$position": 0}}} - # await database["user"].find_one_and_update( - # find_by_id, append_op, projection={"_id": False}, return_document=True - # ) return folder_id except Exception as e: diff --git a/tests/test_db_service.py b/tests/test_db_service.py index 3ea8c77a3..afa304f9e 100644 --- a/tests/test_db_service.py +++ b/tests/test_db_service.py @@ -35,8 +35,6 @@ def setUp(self): self.user_stub = { "userId": self.user_id_stub, "name": "name", - "drafts": ["EGA123456", "EGA1234567"], # DEPRECATED - "folders": ["EGA1234569"], # DEPRECATED } self.data_stub = { "accessionId": self.id_stub, diff --git a/tests/test_handlers.py b/tests/test_handlers.py index f51408286..b283c1079 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -73,8 +73,6 @@ async def setUpAsync(self): self.test_user = { "userId": self.user_id, "name": "tester", - "templates": [], # DEPRECATED - "folders": ["FOL12345678"], # DEPRECATED } self.operator_config = { @@ -576,7 +574,6 @@ async def test_get_object_as_xml(self): async def test_query_is_called_and_returns_json_in_correct_format(self): """Test query method calls operator and returns mocked JSON object.""" - RESTAPIHandler._handle_user_objects_collection = make_mocked_coro(["EDAG3991701442770179", "EGA123456"]) url = f"/objects/study?studyType=foo&name=bar&page={self.page_num}" f"&per_page={self.page_size}" response = await self.client.get(url) self.assertEqual(response.status, 200) diff --git a/tests/test_operators.py b/tests/test_operators.py index bf7cf57cc..f0a65f17e 100644 --- a/tests/test_operators.py +++ b/tests/test_operators.py @@ -101,8 +101,6 @@ def setUp(self): self.test_user = { "userId": self.user_generated_id, "name": "tester", - "drafts": [], # DEPRECATED - "folders": [], # DEPRECATED } class_dbservice = "metadata_backend.api.operators.DBService" self.patch_dbservice = patch(class_dbservice, spec=True) From bbf9cb0411215153881f0f0663715b75d1aad145 Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Wed, 23 Feb 2022 11:48:29 +0200 Subject: [PATCH 228/336] add templates endpoints to api spec --- docs/specification.yml | 45 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/docs/specification.yml b/docs/specification.yml index 4557106ab..459d92400 100644 --- a/docs/specification.yml +++ b/docs/specification.yml @@ -756,6 +756,37 @@ paths: application/json: schema: $ref: "#/components/schemas/403Forbidden" + /templates: + get: + tags: + - Query + summary: Get templates from selected project + parameters: + - name: projectId + in: query + description: project internal ID + schema: + type: string + required: true + responses: + 200: + description: OK + content: + application/json: + schema: + $ref: "#/components/schemas/Templates" + 401: + description: Unauthorized + content: + application/json: + schema: + $ref: "#/components/schemas/401Unauthorized" + 403: + description: Forbidden + content: + application/json: + schema: + $ref: "#/components/schemas/403Forbidden" /templates/{schema}: post: tags: @@ -1339,6 +1370,20 @@ components: type: string description: URL pointing to the schema source example: https://github.com/enasequence/schema/blob/master/src/main/resources/uk/ac/ebi/ena/sra/schema/SRA.sample.xsd + Templates: + type: array + items: + type: object + properties: + accessionId: + type: string + description: internal ID of template + displayTitle: + type: string + description: name of template to be displayed in UI + schema: + type: string + description: database collection name template belongs to Object: type: object required: From 5c08690c3192148378429e43429cd283cc7d9613 Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Wed, 23 Feb 2022 12:19:25 +0200 Subject: [PATCH 229/336] add templates endpoint request to integration tests --- tests/integration/run_tests.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index 930ab8afb..0fc462e60 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -337,7 +337,7 @@ async def post_template_json(sess, schema, filename, project_id): :param sess: HTTP session in which request call is made :param schema: name of the schema (folder) used for testing :param filename: name of the file used for testing. - :param project_id: id of the project the folder belongs to + :param project_id: id of the project the template belongs to """ request_data = await create_request_json_data(schema, filename) request_data = json.loads(request_data) @@ -357,6 +357,20 @@ async def post_template_json(sess, schema, filename, project_id): return ans["accessionId"] +async def get_templates(sess, project_id): + """Get templates from project. + + :param sess: HTTP session in which request call is made + :param project_id: id of the project the template belongs to + """ + async with sess.get(f"{templates_url}?projectId={project_id}") as resp: + LOG.debug(f"Requesting templates from project={project_id}") + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + ans = await resp.json() + LOG.debug(f"Received {len(ans)} templates") + return ans + + async def get_template(sess, schema, template_id): """Get and return a drafted metadata object. @@ -1320,6 +1334,12 @@ async def test_crud_users_works(sess, project_id): template_ids = await post_template_json(sess, "study", "SRP000539_list.json", project_id) assert len(template_ids) == 2, "templates could not be added as batch" + templates = await get_templates(sess, project_id) + assert len(templates) == 2, "did not find templates from project" + assert templates[0]["schema"] == "template-study", "wrong template schema" + title = "Highly integrated epigenome maps in Arabidopsis - whole genome shotgun bisulfite sequencing" + assert templates[0]["displayTitle"] == title, "wrong template title" + # Delete user await delete_user(sess, user_id) # 401 means API is innacessible thus session ended From 80479a7d5671e341d3e98898f6b0965b684d307a Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Wed, 23 Feb 2022 13:38:11 +0200 Subject: [PATCH 230/336] fix function docstring --- metadata_backend/api/operators.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 2f48d0105..22a5a9142 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -308,8 +308,8 @@ def __init__(self, db_client: AsyncIOMotorClient) -> None: async def query_templates_by_project(self, project_id: str) -> List[Dict[str, str]]: """Query all template schemas for given project ID. - :param query: Dict containing query information - :returns: Paginated query result + :param project_id: project internal ID that owns templates + :returns: list of simplified template objects """ templates: List[Dict[str, str]] = [] From 7128ffec6a62b1b97e4af064a28126b207acdf5c Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 26 Jan 2022 16:04:28 +0000 Subject: [PATCH 231/336] Update swagger doc --- docs/specification.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/specification.yml b/docs/specification.yml index 48e5e69b6..111ac77fa 100644 --- a/docs/specification.yml +++ b/docs/specification.yml @@ -299,6 +299,13 @@ paths: schema: type: string required: true + - name: folder + in: query + schema: + type: string + description: The folder ID where object belongs to. + required: true + example: "folder=12345" requestBody: content: multipart/form-data:: @@ -535,6 +542,13 @@ paths: schema: type: string required: true + - name: folder + in: query + schema: + type: string + description: The folder ID where object belongs to. + required: true + example: "folder=12345" requestBody: content: multipart/form-data:: From 465bec77ad2cab55db40f911040c03614a67129d Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 9 Feb 2022 08:09:36 +0000 Subject: [PATCH 232/336] Extract title of object for folder patch --- metadata_backend/api/handlers/object.py | 38 +++++++++-------------- metadata_backend/api/operators.py | 24 ++++++++------ metadata_backend/helpers/schema_loader.py | 1 + tests/test_handlers.py | 8 ++--- tests/test_operators.py | 23 +++++--------- 5 files changed, 41 insertions(+), 53 deletions(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 889b69641..9905fed04 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -104,9 +104,9 @@ async def post_object(self, req: Request) -> Response: folder_id = req.query.get("folder", "") if not folder_id: - reason = "Folder ID is required query parameter." + reason = "Folder is required query parameter. Please provide folder id where object is added to." raise web.HTTPBadRequest(reason=reason) - patch_params = {"folder": folder_id} + patch_params = {} self._check_schema_exists(schema_type) collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type @@ -127,7 +127,7 @@ async def post_object(self, req: Request) -> Response: # If multipart request contains XML, XML operator is used. # Else the multipart request is expected to contain CSV file(s) which are converted into JSON. operator = XMLOperator(db_client) if cont_type == "xml" else Operator(db_client) - patch_params.update({"cont_type": cont_type, "title": filename}) + patch_params = {"cont_type": cont_type, "filename": filename} else: content = await self._get_data(req) if not req.path.startswith("/drafts"): @@ -141,33 +141,25 @@ async def post_object(self, req: Request) -> Response: LOG.debug(f"Inserting multiple objects for {schema_type}.") ids: List[Dict[str, str]] = [] for item in content: - accession_id = await operator.create_metadata_object(collection, item[0]) - ids.append({"accessionId": accession_id}) + accession_id, title = await operator.create_metadata_object(collection, item[0]) + ids.append({"accessionId": accession_id, "title": title}) LOG.info(f"POST object with accesssion ID {accession_id} in schema {collection} was successful.") # we format like this to make it consistent with the response from /submit endpoint - data = [dict(item, **{"schema": schema_type}) for item in ids] + data = [dict({"accessionId": item["accessionId"]}, **{"schema": schema_type}) for item in ids] # we take the first result if we get multiple location_headers = CIMultiDict(Location=f"{url}/{data[0]['accessionId']}") else: - accession_id = await operator.create_metadata_object(collection, content) + accession_id, title = await operator.create_metadata_object(collection, content) data = {"accessionId": accession_id} - location_headers = CIMultiDict(Location=f"{url}/{accession_id}") LOG.info(f"POST object with accesssion ID {accession_id} in schema {collection} was successful.") # Gathering data for object to be added to folder if not isinstance(data, List): - ids = [data] - if not patch_params.get("title", None) and isinstance(content, Dict): - try: - patch_params["title"] = ( - content["descriptor"]["studyTitle"] if collection == "study" else content["title"] - ) - except (TypeError, KeyError): - patch_params["title"] = "" - patch = await self.prepare_folder_patch(collection, ids, patch_params) + ids = [dict(data, **{"title": title})] folder_op = FolderOperator(db_client) - folder_id = await folder_op.update_folder(folder_id, patch) + patch = await self.prepare_folder_patch_new_object(collection, ids, patch_params) + await folder_op.update_folder(folder_id, patch) body = ujson.dumps(data, escape_forward_slashes=False) @@ -306,8 +298,8 @@ async def patch_object(self, req: Request) -> Response: LOG.info(f"PATCH object with accession ID {accession_id} in schema {collection} was successful.") return web.Response(body=body, status=200, content_type="application/json") - async def prepare_folder_patch(self, schema: str, ids: List, params: Dict[str, str]) -> List: - """Prepare patch operations list. + async def prepare_folder_patch_new_object(self, schema: str, ids: List, params: Dict[str, str]) -> List: + """Prepare patch operations list for adding an object or objects to a folder. :param schema: schema of objects to be added to the folder :param ids: object IDs @@ -335,13 +327,11 @@ async def prepare_folder_patch(self, schema: str, ids: List, params: Dict[str, s "schema": schema, "tags": { "submissionType": submission_type, - "displayTitle": params["title"], + "displayTitle": id["title"], }, }, } - if submission_type != "Form": - patch_ops["value"]["tags"]["fileName"] = params["title"] + patch_ops["value"]["tags"]["fileName"] = params["filename"] patch.append(patch_ops) - return patch diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 669e8551c..9dc7e8ee4 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -39,7 +39,7 @@ def __init__(self, db_name: str, content_type: str, db_client: AsyncIOMotorClien self.db_service = DBService(db_name, db_client) self.content_type = content_type - async def create_metadata_object(self, schema_type: str, data: Union[Dict, str]) -> str: + async def create_metadata_object(self, schema_type: str, data: Union[Dict, str]) -> Tuple[str, str]: """Create new metadata object to database. Data formatting and addition step for JSON or XML must be implemented @@ -49,9 +49,9 @@ async def create_metadata_object(self, schema_type: str, data: Union[Dict, str]) :param data: Data to be saved to database. :returns: Accession id for the object inserted to database """ - accession_id = await self._format_data_to_create_and_add_to_db(schema_type, data) + accession_id, title = await self._format_data_to_create_and_add_to_db(schema_type, data) LOG.info(f"Inserting object with schema {schema_type} to database succeeded with accession id: {accession_id}") - return accession_id + return accession_id, title async def replace_metadata_object(self, schema_type: str, accession_id: str, data: Union[Dict, str]) -> str: """Replace metadata object from database. @@ -127,7 +127,7 @@ async def delete_metadata_object(self, schema_type: str, accession_id: str) -> s LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - async def _insert_formatted_object_to_db(self, schema_type: str, data: Dict) -> str: + async def _insert_formatted_object_to_db(self, schema_type: str, data: Dict) -> Tuple[str, str]: """Insert formatted metadata object to database. :param schema_type: Schema type of the object to insert. @@ -142,7 +142,11 @@ async def _insert_formatted_object_to_db(self, schema_type: str, data: Dict) -> LOG.error(reason) raise web.HTTPBadRequest(reason=reason) if insert_success: - return data["accessionId"] + try: + title = data["descriptor"]["studyTitle"] if schema_type == "study" else data["title"] + except (TypeError, KeyError): + title = "" + return data["accessionId"], title else: reason = "Inserting object to database failed for some reason." LOG.error(reason) @@ -249,7 +253,7 @@ async def check_exists(self, schema_type: str, accession_id: str) -> None: raise web.HTTPNotFound(reason=reason) @abstractmethod - async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Any) -> str: + async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Any) -> Tuple[str, str]: """Format and add data to database. Must be implemented by subclass. @@ -380,7 +384,7 @@ async def query_metadata_database( ) return data, page_num, page_size, total_objects[0]["total"] - async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Dict) -> str: + async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Dict) -> Tuple[str, str]: """Format JSON metadata object and add it to db. Adds necessary additional information to object before adding to db. @@ -513,7 +517,7 @@ def __init__(self, db_client: AsyncIOMotorClient) -> None: """ super().__init__(mongo_database, "text/xml", db_client) - async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: str) -> str: + async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: str) -> Tuple[str, str]: """Format XML metadata object and add it to db. XML is validated, then parsed to JSON, which is added to database. @@ -527,10 +531,10 @@ async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: str # remove `draft-` from schema type schema = schema_type[6:] if schema_type.startswith("draft") else schema_type data_as_json = XMLToJSONParser().parse(schema, data) - accession_id = await Operator(db_client)._format_data_to_create_and_add_to_db(schema_type, data_as_json) + accession_id, title = await Operator(db_client)._format_data_to_create_and_add_to_db(schema_type, data_as_json) LOG.debug(f"XMLOperator formatted data for xml-{schema_type} to add to DB") return await self._insert_formatted_object_to_db( - f"xml-{schema_type}", {"accessionId": accession_id, "content": data} + f"xml-{schema_type}", {"accessionId": accession_id, "title": title, "content": data} ) async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accession_id: str, data: str) -> str: diff --git a/metadata_backend/helpers/schema_loader.py b/metadata_backend/helpers/schema_loader.py index a30a6ee25..42f9492a7 100644 --- a/metadata_backend/helpers/schema_loader.py +++ b/metadata_backend/helpers/schema_loader.py @@ -42,6 +42,7 @@ def _identify_file(self, schema_type: str) -> Path: for file in [x for x in self.path.iterdir()]: if schema_type in file.name and file.name.endswith(self.loader_type): schema_file = file + break if not schema_file: raise SchemaNotFoundException diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 0f6244a66..994a7f0aa 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -1,7 +1,7 @@ """Test API endpoints from handlers module.""" from pathlib import Path -from unittest.mock import patch, call +from unittest.mock import call, patch from aiohttp import FormData from aiohttp.test_utils import AioHTTPTestCase, make_mocked_coro @@ -153,7 +153,7 @@ async def fake_xmloperator_read_metadata_object(self, schema_type, accession_id) async def fake_xmloperator_create_metadata_object(self, schema_type, content): """Fake create operation to return mocked accessionId.""" - return self.test_ega_string + return self.test_ega_string, "title" async def fake_xmloperator_replace_metadata_object(self, schema_type, accession_id, content): """Fake replace operation to return mocked accessionId.""" @@ -161,7 +161,7 @@ async def fake_xmloperator_replace_metadata_object(self, schema_type, accession_ async def fake_operator_create_metadata_object(self, schema_type, content): """Fake create operation to return mocked accessionId.""" - return self.test_ega_string + return self.test_ega_string, "title" async def fake_operator_update_metadata_object(self, schema_type, accession_id, content): """Fake update operation to return mocked accessionId.""" @@ -443,6 +443,7 @@ async def test_post_object_works_with_csv(self): file_content = self.get_file_data("sample", "EGAformat.csv") self.MockedCSVParser().parse.return_value = [{}, {}, {}] response = await self.client.post("/objects/sample", params={"folder": "some id"}, data=data) + print("=== RESP ===", await response.text()) json_resp = await response.json() self.assertEqual(response.status, 201) self.assertEqual(self.test_ega_string, json_resp[0]["accessionId"]) @@ -549,7 +550,6 @@ async def test_submit_object_fails_with_too_many_files(self): self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) - # handle_check_ownedby_user.return_value = True async def test_get_object(self): """Test that accessionId returns correct JSON object.""" url = f"/objects/study/{self.query_accessionId}" diff --git a/tests/test_operators.py b/tests/test_operators.py index 20ca13851..773f615ac 100644 --- a/tests/test_operators.py +++ b/tests/test_operators.py @@ -2,22 +2,15 @@ import datetime import re import unittest +from unittest import IsolatedAsyncioTestCase +from unittest.mock import MagicMock, call, patch from uuid import uuid4 -from unittest.mock import MagicMock, patch, call from aiohttp.web import HTTPBadRequest, HTTPNotFound, HTTPUnprocessableEntity -from unittest import IsolatedAsyncioTestCase - +from metadata_backend.api.operators import FolderOperator, Operator, UserOperator, XMLOperator from multidict import MultiDict, MultiDictProxy from pymongo.errors import ConnectionFailure -from metadata_backend.api.operators import ( - FolderOperator, - Operator, - XMLOperator, - UserOperator, -) - class AsyncIterator: """Async iterator based on range.""" @@ -191,7 +184,7 @@ async def test_json_create_passes_and_returns_accessionId(self): "descriptor": {"studyTitle": "Highly", "studyType": "Other"}, } operator.db_service.create.return_value = True - accession = await operator.create_metadata_object("study", data) + accession, _ = await operator.create_metadata_object("study", data) operator.db_service.create.assert_called_once() self.assertEqual(accession, self.accession_id) @@ -264,10 +257,10 @@ async def test_xml_create_passes_and_returns_accessionId(self): operator.db_service.create.return_value = True with patch( ("metadata_backend.api.operators.Operator._format_data_to_create_and_add_to_db"), - return_value=self.accession_id, + return_value=(self.accession_id, "title"), ): with patch("metadata_backend.api.operators.XMLToJSONParser"): - accession = await operator.create_metadata_object("study", "") + accession, _ = await operator.create_metadata_object("study", "") operator.db_service.create.assert_called_once() self.assertEqual(accession, self.accession_id) @@ -375,7 +368,7 @@ async def test_correct_data_is_set_to_xml_when_creating(self): xml_data = "" with patch( ("metadata_backend.api.operators.Operator._format_data_to_create_and_add_to_db"), - return_value=self.accession_id, + return_value=(self.accession_id, "title"), ): with patch( ("metadata_backend.api.operators.XMLOperator._insert_formatted_object_to_db"), @@ -384,7 +377,7 @@ async def test_correct_data_is_set_to_xml_when_creating(self): with patch("metadata_backend.api.operators.XMLToJSONParser"): acc = await (operator._format_data_to_create_and_add_to_db("study", xml_data)) m_insert.assert_called_once_with( - "xml-study", {"accessionId": self.accession_id, "content": xml_data} + "xml-study", {"accessionId": self.accession_id, "title": "title", "content": xml_data} ) self.assertEqual(acc, self.accession_id) From 0c1077f9aa7a08066f352209d1dca3d0f1efe9e6 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 9 Feb 2022 08:29:11 +0000 Subject: [PATCH 233/336] Add CSV to submission type check --- metadata_backend/api/handlers/folder.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/metadata_backend/api/handlers/folder.py b/metadata_backend/api/handlers/folder.py index cc5910fc9..ffabad1c6 100644 --- a/metadata_backend/api/handlers/folder.py +++ b/metadata_backend/api/handlers/folder.py @@ -14,9 +14,9 @@ from ...helpers.doi import DOIHandler from ...helpers.logger import LOG from ...helpers.validator import JSONValidator -from .restapi import RESTAPIHandler from ..middlewares import get_session from ..operators import FolderOperator, Operator, UserOperator +from .restapi import RESTAPIHandler class FolderAPIHandler(RESTAPIHandler): @@ -26,7 +26,7 @@ def _check_patch_folder(self, patch_ops: Any) -> None: """Check patch operations in request are valid. We check that ``metadataObjects`` and ``drafts`` have ``_required_values``. - For tags we check that the ``submissionType`` takes either ``XML`` or + For tags we check that the ``submissionType`` takes either ``CSV``, ``XML`` or ``Form`` as values. :param patch_ops: JSON patch request :raises: HTTPBadRequest if request does not fullfil one of requirements @@ -41,8 +41,12 @@ def _check_patch_folder(self, patch_ops: Any) -> None: for op in patch_ops: if _tags.match(op["path"]): LOG.info(f"{op['op']} on tags in folder") - if "submissionType" in op["value"].keys() and op["value"]["submissionType"] not in ["XML", "Form"]: - reason = "submissionType is restricted to either 'XML' or 'Form' values." + if "submissionType" in op["value"].keys() and op["value"]["submissionType"] not in [ + "XML", + "CSV", + "Form", + ]: + reason = "submissionType is restricted to either 'CSV', 'XML' or 'Form' values." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) pass From ca2a25e5255041b4beccfaed19ed16b006a6470c Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 9 Feb 2022 15:10:00 +0000 Subject: [PATCH 234/336] Add patching of folder after object update and replace --- metadata_backend/api/handlers/object.py | 59 +++++++++++++++++++-- metadata_backend/api/handlers/submission.py | 3 +- metadata_backend/api/handlers/template.py | 4 +- metadata_backend/api/operators.py | 32 +++++++---- metadata_backend/helpers/parser.py | 6 ++- tests/integration/run_tests.py | 24 +++++++-- tests/test_handlers.py | 4 +- tests/test_operators.py | 4 +- 8 files changed, 110 insertions(+), 26 deletions(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 9905fed04..5d521e028 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -235,8 +235,9 @@ async def put_object(self, req: Request) -> Response: db_client = req.app["db_client"] content: Union[Dict, str] operator: Union[Operator, XMLOperator] + filename = "" if req.content_type == "multipart/form-data": - files, _, _ = await multipart_content(req, extract_one=True, expect_xml=True) + files, _, filename = await multipart_content(req, extract_one=True, expect_xml=True) content, _ = files[0] operator = XMLOperator(db_client) else: @@ -250,8 +251,17 @@ async def put_object(self, req: Request) -> Response: await operator.check_exists(collection, accession_id) await self._handle_check_ownedby_user(req, collection, accession_id) + folder_op = FolderOperator(db_client) + exists, folder_id, published = await folder_op.check_object_in_folder(collection, accession_id) + if exists: + if published: + reason = "Published objects cannot be updated." + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) - accession_id = await operator.replace_metadata_object(collection, accession_id, content) + accession_id, title = await operator.replace_metadata_object(collection, accession_id, content) + patch = await self.prepare_folder_patch_update_object(collection, accession_id, title, filename) + await folder_op.update_folder(folder_id, patch) body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) LOG.info(f"PUT object with accession ID {accession_id} in schema {collection} was successful.") @@ -285,7 +295,7 @@ async def patch_object(self, req: Request) -> Response: await self._handle_check_ownedby_user(req, collection, accession_id) folder_op = FolderOperator(db_client) - exists, _, published = await folder_op.check_object_in_folder(collection, accession_id) + exists, folder_id, published = await folder_op.check_object_in_folder(collection, accession_id) if exists: if published: reason = "Published objects cannot be updated." @@ -294,6 +304,14 @@ async def patch_object(self, req: Request) -> Response: accession_id = await operator.update_metadata_object(collection, accession_id, content) + # If there's changed title it will be updated to folder + try: + title = content["descriptor"]["studyTitle"] if collection == "study" else content["title"] + patch = await self.prepare_folder_patch_update_object(collection, accession_id, title) + await folder_op.update_folder(folder_id, patch) + except (TypeError, KeyError): + pass + body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) LOG.info(f"PATCH object with accession ID {accession_id} in schema {collection} was successful.") return web.Response(body=body, status=200, content_type="application/json") @@ -335,3 +353,38 @@ async def prepare_folder_patch_new_object(self, schema: str, ids: List, params: patch_ops["value"]["tags"]["fileName"] = params["filename"] patch.append(patch_ops) return patch + + async def prepare_folder_patch_update_object( + self, schema: str, accession_id: str, title: str, filename: str = "" + ) -> List: + """Prepare patch operation for updating object's title in a folder. + + :param schema: schema of object to be updated + :param accession_id: object ID + :param title: title to be updated + :returns: dict with patch operation + """ + if schema.startswith("draft"): + path = "/drafts" + else: + path = "/metadataObjects" + + patch_op = { + "op": "replace", + "match": {path.replace("/", ""): {"$elemMatch": {"schema": schema, "accessionId": accession_id}}}, + } + if not filename: + patch_op.update( + { + "path": f"{path}/$/tags/displayTitle", + "value": title, + } + ) + else: + patch_op.update( + { + "path": f"{path}/$/tags", + "value": {"submissionType": "XML", "fileName": filename, "displayTitle": title}, + } + ) + return [patch_op] diff --git a/metadata_backend/api/handlers/submission.py b/metadata_backend/api/handlers/submission.py index c9c2176e8..e8386ca50 100644 --- a/metadata_backend/api/handlers/submission.py +++ b/metadata_backend/api/handlers/submission.py @@ -128,8 +128,9 @@ async def _execute_action(self, schema: str, content: str, db_client: AsyncIOMot :returns: Dict containing specific action that was completed """ if action == "add": + assession_id, _ = await XMLOperator(db_client).create_metadata_object(schema, content) result = { - "accessionId": await XMLOperator(db_client).create_metadata_object(schema, content), + "accessionId": assession_id, "schema": schema, } LOG.debug(f"added some content in {schema} ...") diff --git a/metadata_backend/api/handlers/template.py b/metadata_backend/api/handlers/template.py index c2bccc2cc..105b5b74e 100644 --- a/metadata_backend/api/handlers/template.py +++ b/metadata_backend/api/handlers/template.py @@ -69,7 +69,7 @@ async def post_template(self, req: Request) -> Response: reason = f"template key is missing from request body for element: {num}." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - accession_id = await operator.create_metadata_object(collection, tmpl["template"]) + accession_id, _ = await operator.create_metadata_object(collection, tmpl["template"]) data = [{"accessionId": accession_id, "schema": collection}] if "tags" in tmpl: data[0]["tags"] = tmpl["tags"] @@ -82,7 +82,7 @@ async def post_template(self, req: Request) -> Response: reason = "template key is missing from request body." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - accession_id = await operator.create_metadata_object(collection, content["template"]) + accession_id, _ = await operator.create_metadata_object(collection, content["template"]) data = [{"accessionId": accession_id, "schema": collection}] if "tags" in content: data[0]["tags"] = content["tags"] diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 9dc7e8ee4..3a434cdca 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -53,7 +53,9 @@ async def create_metadata_object(self, schema_type: str, data: Union[Dict, str]) LOG.info(f"Inserting object with schema {schema_type} to database succeeded with accession id: {accession_id}") return accession_id, title - async def replace_metadata_object(self, schema_type: str, accession_id: str, data: Union[Dict, str]) -> str: + async def replace_metadata_object( + self, schema_type: str, accession_id: str, data: Union[Dict, str] + ) -> Tuple[str, str]: """Replace metadata object from database. Data formatting and addition step for JSON or XML must be implemented @@ -64,9 +66,9 @@ async def replace_metadata_object(self, schema_type: str, accession_id: str, dat :param data: Data to be saved to database. :returns: Accession id for the object replaced to database """ - await self._format_data_to_replace_and_add_to_db(schema_type, accession_id, data) + accession_id, title = await self._format_data_to_replace_and_add_to_db(schema_type, accession_id, data) LOG.info(f"Replacing object with schema {schema_type} to database succeeded with accession id: {accession_id}") - return accession_id + return accession_id, title async def update_metadata_object(self, schema_type: str, accession_id: str, data: Union[Dict, str]) -> str: """Update metadata object from database. @@ -143,7 +145,7 @@ async def _insert_formatted_object_to_db(self, schema_type: str, data: Dict) -> raise web.HTTPBadRequest(reason=reason) if insert_success: try: - title = data["descriptor"]["studyTitle"] if schema_type == "study" else data["title"] + title = data["descriptor"]["studyTitle"] if schema_type in ["study", "draft-study"] else data["title"] except (TypeError, KeyError): title = "" return data["accessionId"], title @@ -152,7 +154,7 @@ async def _insert_formatted_object_to_db(self, schema_type: str, data: Dict) -> LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - async def _replace_object_from_db(self, schema_type: str, accession_id: str, data: Dict) -> str: + async def _replace_object_from_db(self, schema_type: str, accession_id: str, data: Dict) -> Tuple[str, str]: """Replace formatted metadata object in database. :param schema_type: Schema type of the object to replace. @@ -173,7 +175,11 @@ async def _replace_object_from_db(self, schema_type: str, accession_id: str, dat LOG.error(reason) raise web.HTTPBadRequest(reason=reason) if replace_success: - return accession_id + try: + title = data["descriptor"]["studyTitle"] if schema_type in ["study", "draft-study"] else data["title"] + except (TypeError, KeyError): + title = "" + return accession_id, title else: reason = "Replacing object to database failed for some reason." LOG.error(reason) @@ -260,7 +266,9 @@ async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Any """ @abstractmethod - async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accession_id: str, data: Any) -> str: + async def _format_data_to_replace_and_add_to_db( + self, schema_type: str, accession_id: str, data: Any + ) -> Tuple[str, str]: """Format and replace data in database. Must be implemented by subclass. @@ -406,7 +414,9 @@ async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Dic LOG.debug(f"Operator formatted data for {schema_type} to add to DB.") return await self._insert_formatted_object_to_db(schema_type, data) - async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accession_id: str, data: Dict) -> str: + async def _format_data_to_replace_and_add_to_db( + self, schema_type: str, accession_id: str, data: Dict + ) -> Tuple[str, str]: """Format JSON metadata object and replace it in db. Replace information in object before adding to db. @@ -537,7 +547,9 @@ async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: str f"xml-{schema_type}", {"accessionId": accession_id, "title": title, "content": data} ) - async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accession_id: str, data: str) -> str: + async def _format_data_to_replace_and_add_to_db( + self, schema_type: str, accession_id: str, data: str + ) -> Tuple[str, str]: """Format XML metadata object and add it to db. XML is validated, then parsed to JSON, which is added to database. @@ -552,7 +564,7 @@ async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accessio # remove `draft-` from schema type schema = schema_type[6:] if schema_type.startswith("draft") else schema_type data_as_json = XMLToJSONParser().parse(schema, data) - accession_id = await Operator(db_client)._format_data_to_replace_and_add_to_db( + accession_id, title = await Operator(db_client)._format_data_to_replace_and_add_to_db( schema_type, accession_id, data_as_json ) LOG.debug(f"XMLOperator formatted data for xml-{schema_type} to add to DB") diff --git a/metadata_backend/helpers/parser.py b/metadata_backend/helpers/parser.py index 2cd415bdd..c0fa09841 100644 --- a/metadata_backend/helpers/parser.py +++ b/metadata_backend/helpers/parser.py @@ -1,17 +1,17 @@ """Tool to parse XML and CSV files to JSON.""" -import re import csv +import re from io import StringIO from typing import Any, Dict, List, Optional, Type, Union from aiohttp import web +from pymongo import UpdateOne from xmlschema import XMLSchema, XMLSchemaConverter, XMLSchemaException, XsdElement, XsdType from .logger import LOG from .schema_loader import SchemaNotFoundException, XMLSchemaLoader from .validator import JSONValidator, XMLValidator -from pymongo import UpdateOne class MetadataXMLConverter(XMLSchemaConverter): @@ -457,6 +457,8 @@ def jsonpatch_mongo(identifier: Dict, json_patch: List[Dict[str, Any]]) -> List: queries.append(UpdateOne(identifier, {"$set": {op["path"][1:].replace("/", "."): op["value"]}})) elif op["op"] == "replace": path = op["path"][1:-2] if op["path"].endswith("/-") else op["path"][1:].replace("/", ".") + if op.get("match", None): + identifier.update(op["match"]) queries.append(UpdateOne(identifier, {"$set": {path: op["value"]}})) return queries diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index ba0b82bc7..b1607f85b 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -773,7 +773,11 @@ async def test_crud_folders_works(sess): { "accessionId": draft_id, "schema": "draft-sample", - "tags": {"submissionType": "XML", "displayTitle": "SRS001433.xml", "fileName": "SRS001433.xml"}, + "tags": { + "submissionType": "XML", + "displayTitle": "HapMap sample from Homo sapiens", + "fileName": "SRS001433.xml", + }, } ], "folder drafts content mismatch" assert res["metadataObjects"] == [], "there are objects in folder, expected empty" @@ -797,7 +801,11 @@ async def test_crud_folders_works(sess): { "accessionId": draft_id, "schema": "draft-sample", - "tags": {"submissionType": "XML", "displayTitle": "SRS001433.xml", "fileName": "SRS001433.xml"}, + "tags": { + "submissionType": "XML", + "displayTitle": "HapMap sample from Homo sapiens", + "fileName": "SRS001433.xml", + }, } ], "folder drafts content mismatch" assert res["metadataObjects"] == [ @@ -862,7 +870,11 @@ async def test_crud_folders_works_no_publish(sess): { "accessionId": draft_id, "schema": "draft-sample", - "tags": {"submissionType": "XML", "displayTitle": "SRS001433.xml", "fileName": "SRS001433.xml"}, + "tags": { + "submissionType": "XML", + "displayTitle": "HapMap sample from Homo sapiens", + "fileName": "SRS001433.xml", + }, } ], "folder drafts content mismatch" assert res["metadataObjects"] == [], "there are objects in folder, expected empty" @@ -885,7 +897,11 @@ async def test_crud_folders_works_no_publish(sess): { "accessionId": draft_id, "schema": "draft-sample", - "tags": {"submissionType": "XML", "displayTitle": "SRS001433.xml", "fileName": "SRS001433.xml"}, + "tags": { + "submissionType": "XML", + "displayTitle": "HapMap sample from Homo sapiens", + "fileName": "SRS001433.xml", + }, } ], "folder drafts content mismatch" assert res["metadataObjects"] == [ diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 994a7f0aa..90a3028b7 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -157,7 +157,7 @@ async def fake_xmloperator_create_metadata_object(self, schema_type, content): async def fake_xmloperator_replace_metadata_object(self, schema_type, accession_id, content): """Fake replace operation to return mocked accessionId.""" - return self.test_ega_string + return self.test_ega_string, "title" async def fake_operator_create_metadata_object(self, schema_type, content): """Fake create operation to return mocked accessionId.""" @@ -169,7 +169,7 @@ async def fake_operator_update_metadata_object(self, schema_type, accession_id, async def fake_operator_replace_metadata_object(self, schema_type, accession_id, content): """Fake replace operation to return mocked accessionId.""" - return self.test_ega_string + return self.test_ega_string, "title" async def fake_operator_delete_metadata_object(self, schema_type, accession_id): """Fake delete operation to await successful operation indicator.""" diff --git a/tests/test_operators.py b/tests/test_operators.py index 773f615ac..aa7296e79 100644 --- a/tests/test_operators.py +++ b/tests/test_operators.py @@ -198,7 +198,7 @@ async def test_json_replace_passes_and_returns_accessionId(self): operator = Operator(self.client) operator.db_service.exists.return_value = True operator.db_service.replace.return_value = True - accession = await operator.replace_metadata_object("study", self.accession_id, data) + accession, _ = await operator.replace_metadata_object("study", self.accession_id, data) operator.db_service.replace.assert_called_once() self.assertEqual(accession, self.accession_id) @@ -388,7 +388,7 @@ async def test_correct_data_is_set_to_xml_when_replacing(self): xml_data = "" with patch( "metadata_backend.api.operators.Operator._format_data_to_replace_and_add_to_db", - return_value=self.accession_id, + return_value=(self.accession_id, "title"), ): with patch( "metadata_backend.api.operators.XMLOperator._replace_object_from_db", From d6ea2a9915b9ea4a34bac59dc4a27fa54b630506 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Thu, 10 Feb 2022 14:52:22 +0200 Subject: [PATCH 235/336] Update integration tests with folder check for metadata and draft objects --- tests/integration/run_tests.py | 73 ++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index b1607f85b..24d30fd1c 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -503,6 +503,54 @@ async def delete_user(sess, user_id): assert resp.status == 404, f"HTTP Status code error, got {resp.status}" +def extract_folders_object(res, accession_id, draft): + """Extract object from folder metadataObjects with provided accessionId. + + :param res: JSON parsed responce from folder query request + :param accession_id: accession ID of reviwed object + :returns: dict of object entry in folder + """ + object = "drafts" if draft else "metadataObjects" + actual_res = next(obj for obj in res[object] if obj["accessionId"] == accession_id) + return actual_res + + +async def check_folders_object_patch(sess, folder_id, schema, accession_id, title, filename, draft=False): + """Check that draft is added correctly to folder. + + Get draft or metadata object from the folder and assert with data + returned from object endpoint itself. + + :param sess: HTTP session in which request call is made + :param folder_id: id of the folder + :param schema: name of the schema (folder) used for testing + :param accession_id: accession ID of reviwed object + :param title: title of reviwed object + :param filename: name of the file used for inserting data + :param draft: indication of object draft status, default False + """ + sub_type = "Form" if filename.split(".")[-1] == "json" else filename.split(".")[-1].upper() + async with sess.get(f"{folders_url}/{folder_id}") as resp: + res = await resp.json() + try: + actual = extract_folders_object(res, accession_id, draft) + expected = { + "accessionId": accession_id, + "schema": schema if not draft else f"draft-{schema}", + "tags": { + "submissionType": sub_type, + "displayTitle": title, + "fileName": filename, + }, + } + if sub_type == "Form": + del expected["tags"]["fileName"] + assert actual == expected, "actual end expected data did not match" + except StopIteration: + pass + return schema + + # === Integration tests === async def test_crud_works(sess, schema, filename, folder_id): """Test REST api POST, GET and DELETE reqs. @@ -520,6 +568,9 @@ async def test_crud_works(sess, schema, filename, folder_id): async with sess.get(f"{objects_url}/{schema}/{accession_id[0]}") as resp: LOG.debug(f"Checking that {accession_id[0]} JSON is in {schema}") assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + res = await resp.json() + title = res["descriptor"].get("studyTitle", "") if schema == "study" else res.get("title", "") + await check_folders_object_patch(sess, folder_id, schema, accession_id[0], title, filename) async with sess.get(f"{objects_url}/{schema}/{accession_id[0]}?format=xml") as resp: LOG.debug(f"Checking that {accession_id[0]} XML is in {schema}") assert resp.status == 200, f"HTTP Status code error, got {resp.status}" @@ -560,6 +611,9 @@ async def test_csv(sess, folder_id): async with sess.get(f"{objects_url}/{_schema}/{_first_csv_row_id}") as resp: LOG.debug(f"Checking that {_first_csv_row_id} JSON is in {_schema}") assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + res = await resp.json() + title = res.get("title", "") + await check_folders_object_patch(sess, folder_id, _schema, accession_id, title, _filename) await delete_object(sess, _schema, _first_csv_row_id) async with sess.get(f"{objects_url}/{_schema}/{_first_csv_row_id}") as resp: @@ -595,6 +649,14 @@ async def test_put_objects(sess, folder_id): accession_id = await post_object(sess, "study", folder_id, "SRP000539.xml") await put_object_json(sess, "study", accession_id[0], "SRP000539.json") await put_object_xml(sess, "study", accession_id[0], "SRP000539_put.xml") + await check_folders_object_patch( + sess, + folder_id, + "study", + accession_id, + "Highly integrated epigenome maps in Arabidopsis - whole genome shotgun bisulfite sequencing", + "SRP000539_put.xml", + ) async def test_crud_drafts_works(sess, schema, orginal_file, update_file, folder_id): @@ -611,10 +673,19 @@ async def test_crud_drafts_works(sess, schema, orginal_file, update_file, folder :param folder_id: id of the folder used to group submission objects """ draft_id = await post_draft_json(sess, schema, folder_id, orginal_file) + async with sess.get(f"{drafts_url}/{schema}/{draft_id}") as resp: + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + res = await resp.json() + title = res["descriptor"]["studyTitle"] if schema == "study" else res.get("title", "") + await check_folders_object_patch(sess, folder_id, draft_id, schema, title, orginal_file, draft=True) + accession_id = await put_draft(sess, schema, draft_id, update_file) async with sess.get(f"{drafts_url}/{schema}/{accession_id}") as resp: LOG.debug(f"Checking that {accession_id} JSON is in {schema}") assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + res = await resp.json() + title = res["descriptor"]["studyTitle"] if schema == "study" else res.get("title", "") + await check_folders_object_patch(sess, folder_id, schema, accession_id, title, update_file, draft=True) await delete_draft(sess, schema, accession_id) async with sess.get(f"{drafts_url}/{schema}/{accession_id}") as resp: @@ -646,9 +717,11 @@ async def test_patch_drafts_works(sess, schema, orginal_file, update_file, folde async with sess.get(f"{drafts_url}/{schema}/{accession_id}") as resp: LOG.debug(f"Checking that {accession_id} JSON is in {schema}") res = await resp.json() + title = res["descriptor"]["studyTitle"] if schema == "study" else res.get("title", None) assert res["centerName"] == "GEOM", "object centerName content mismatch" assert res["alias"] == "GSE10968", "object alias content mismatch" assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + await check_folders_object_patch(sess, folder_id, schema, accession_id, title, update_file, draft=True) await delete_draft(sess, schema, accession_id) async with sess.get(f"{drafts_url}/{schema}/{accession_id}") as resp: From 578b5b363060901e215df309e534f4a81f4de4c7 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Fri, 11 Feb 2022 17:20:15 +0000 Subject: [PATCH 236/336] Add 'CSV' as accepted submission type Update spelling wordlist. --- .github/config/.wordlist.txt | 3 ++- metadata_backend/helpers/schemas/folders.json | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/config/.wordlist.txt b/.github/config/.wordlist.txt index 69cc2a43f..690da339c 100644 --- a/.github/config/.wordlist.txt +++ b/.github/config/.wordlist.txt @@ -115,6 +115,7 @@ cscfi cscusername csi csrf +csv ctrl cts curation @@ -651,4 +652,4 @@ xsd yaml yml za -zhuang \ No newline at end of file +zhuang diff --git a/metadata_backend/helpers/schemas/folders.json b/metadata_backend/helpers/schemas/folders.json index a95844d62..62fae4501 100644 --- a/metadata_backend/helpers/schemas/folders.json +++ b/metadata_backend/helpers/schemas/folders.json @@ -998,6 +998,7 @@ "title": "Type of submission", "enum": [ "XML", + "CSV", "Form" ] } @@ -1036,6 +1037,7 @@ "title": "Type of submission", "enum": [ "XML", + "CSV", "Form" ] } @@ -1047,4 +1049,4 @@ } }, "additionalProperties": false -} \ No newline at end of file +} From 2b078c9581205097edb0efd727daa165c71a6e95 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Mon, 14 Feb 2022 07:30:04 +0000 Subject: [PATCH 237/336] Update changelog --- CHANGELOG.md | 126 +++++++++++++++++++++++++-------------------------- 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 14057a1c1..060510f8c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,9 +9,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- templates API #256 +- Add patching of folders after object save and update operations + - Object is added or updated to folder(submission) where it belongs with it's accession ID, schema, submission type, title and filename in the case of CSV and XML upload + - Adds configuration for mypy linting to VScode devcontainer setup +- Templates API #256 - use `ujson` as default json library -- creating draft Datacite DOI for folders #257 +- Creating draft Datacite DOI for folders #257 - created a mock web app, which would act similarly to DataCite REST API - altered `publish_folder` endpoint so that `extraInfo` containing the DOI data is added upon publishing - added `datePublished` key to folders which takes in the date/time, when folder is published @@ -26,7 +29,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - update github actions - Add folder querying by date #308 - Add description to JSON schemas #323 - - add JSON schema spelling checker to pyspelling github action - optimise wordlist by adding regex ignore patterns - added pyspelling to pre-commit hooks (fixed syntax for scripts according to https://github.com/koalaman/shellcheck ) @@ -41,22 +43,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - New mandatory ENV `OIDC_URL` - New optional ENVs `OIDC_SCOPE`, `AUTH_METHOD` - Added oidcrp dependency -- use node 16+ #345 +- Use node 16+ #345 - VScode Dev environment #287 - Adds requirements-dev.in/txt files. Now pip dependencies can be managed with pip-tools - README updated with tox command, development build instructions, and prettify Dockerfile. -- update ENA XML and JSON schemas #299 +- Update ENA XML and JSON schemas #299 - Github actions changed the use of https://git.io/misspell to rojopolis/spellcheck-github-actions #316 - Separated most of the handlers to own files inside the handlers folder #319 ### Fixed -- coveralls report #267 -- typos for functions and tests #279 -- fix spelling mistakes for JSON schemas #323 -- oidcrp does not allow empty values, prefill them in mockauth so front-end can start #333 +- Coveralls report #267 +- Typos for functions and tests #279 +- Fix spelling mistakes for JSON schemas #323 +- Oidcrp does not allow empty values, prefill them in mockauth so front-end can start #333 - Fix development environment #336 - - Add env vars OIDC_URL and OIDC_URL_TEST to mock auth container - Adds logging configs for mock auth - Updates mock auth api's token endpoint with expiration configs @@ -76,7 +77,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed -- package updates +- Package updates ### Added @@ -88,72 +89,71 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- add integration tests for misses in dataset, experiment, policy +- Add integration tests for misses in dataset, experiment, policy ### Changed -- package updates +- Package updates - EGA XML schemas version:1.8.0 -- refactor analysis and experiment schemas to adhere to XML schema +- Refactor analysis and experiment schemas to adhere to XML schema ### Fixed -- fix misses for DAC, experiment and policy processing of XML -- fix misses in JSON Schema +- Fix misses for DAC, experiment and policy processing of XML +- Fix misses in JSON Schema ## [0.9.0] - 2021-03-22 ### Added -- use dependabot -- support simultaneous sessions +- Use dependabot +- Support simultaneous sessions ### Changed - Refactor JSON schema Links -- refactor handlers to be more streamlined -- validate patch requests for JSON content -- switch to python 3.8 +- Refactor handlers to be more streamlined +- Validate patch requests for JSON content +- Switch to python 3.8 ## [0.8.1] - 2021-02-15 ### Fixed -- bugfix for error pages #202 +- Bugfix for error pages #202 ## [0.8.0] - 2021-02-12 ### Added - TLS support -- use `sub` as alternative to `eppn` to identify users +- Use `sub` as alternative to `eppn` to identify users - `PATCH` for objects and `PUT` for XML objects enabled -- delete folders and objects associated to user on user delete +- Delete folders and objects associated to user on user delete ### Changed -- redirect to error pages -- extended integration tests +- Redirect to error pages +- Extended integration tests ### Fixed -- fix replace on json patch -- general bug and fixes +- Fix replace on json patch +- General bug and fixes ## [0.7.1] - 2021-01-19 ### Fixed -- hotfix release #176 - +- Hotfix release #176 - added check_object_exists to check object exists and fail early with 404 before checking it belongs to user - refactor and added more check_folder_exists to check folder exists before doing anything - integration test to check objects are deleted properly ### Changes -- check objects and folders exist before any operation -- integration check to see if deleted object or folder are still registered in db +- Check objects and folders exist before any operation +- Integration check to see if deleted object or folder are still registered in db ## [0.7.0] - 2021-01-06 @@ -162,7 +162,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - CodeQL github action #162 - `/health` endpoint #173 -- map `users` to `folders` with `_handle_check_ownedby_user` #158 +- Map `users` to `folders` with `_handle_check_ownedby_user` #158 - querying for objects is restricted to only the objects that belong to user - return folders owned by user or published - added a few db operators some used (aggregate, remove) @@ -170,17 +170,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - standardise raises description and general improvements and fixes of logs and descriptions ### Changed -- verify `/publish` endpoint #163 -- restrict endpoints to logged in users #151 -- updated documentation #165 -- switch to using uuids for accession ids #168 -- integration tests and increase unit test coverage #166 +- Verify `/publish` endpoint #163 +- Restrict endpoints to logged in users #151 +- Updated documentation #165 +- Switch to using uuids for accession ids #168 +- Integration tests and increase unit test coverage #166 ### Fixed -- fixes for idp and location headers redirects #150 -- fix race condition in db operations #158 -- fix handling of draft deletion by removing redundant deletion #164, #169 and #172 +- Fixes for idp and location headers redirects #150 +- Fix race condition in db operations #158 +- Fix handling of draft deletion by removing redundant deletion #164, #169 and #172 ## [0.6.1] - 2020-11-23 @@ -190,38 +190,38 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed -- refactor draft `/folder` #144 -- refactor gh actions #140 -- patch publish #141 +- Refactor draft `/folder` #144 +- Refactor gh actions #140 +- Patch publish #141 ### Fixed -- bugfixes for login redirect #139 +- Bugfixes for login redirect #139 ## [0.6.0] - 2020-10-08 ### Added -- authentication with OIDC #133 -- only 3.7 support going further #134 -- more submission actions `ADD` and `MODIFY` #137 +- Authentication with OIDC #133 +- Only 3.7 support going further #134 +- More submission actions `ADD` and `MODIFY` #137 ## [0.5.3] - 2020-08-21 ### Changed -- updated OpenAPI specifications #127 -- python modules, project description and instructions to documentation sources #128 -- added integration tests #129 -- updated documentation #130 +- Updated OpenAPI specifications #127 +- Python modules, project description and instructions to documentation sources #128 +- Added integration tests #129 +- Updated documentation #130 ## [0.5.2] - 2020-08-14 ### Fixes -- fix mimetype for SVG image and package data +- Fix mimetype for SVG image and package data ## [0.5.1] - 2020-08-14 @@ -233,10 +233,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixes - Dockerfile build fixes #115 -- fix JSON Schema details #117 -- missing env from github actions #119 -- typo fixes #120 -- await responses #122 +- Fix JSON Schema details #117 +- Missing env from github actions #119 +- Typo fixes #120 +- Await responses #122 ## [0.5.0] - 2020-08-06 @@ -250,28 +250,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - JSON validation - XML better parsing - Auth middleware -- pagination +- Pagination ### Changed - Improved current naming conventions #82 - Login flow with new routes for Home & Login #76, #79, #80 -- change from pymongo to motor +- Change from pymongo to motor ## [0.2.0] - 2020-07-01 ### Added - Added integration tests -- switched to github actions -- added base docs folder -- added more refined XML parsing +- Switched to github actions +- Added base docs folder +- Added more refined XML parsing - Integration tests added - Refactor unit tests ### Changed -- refactor API endpoints and responses +- Refactor API endpoints and responses - error using https://tools.ietf.org/html/rfc7807 - `objects` and `schemas` endpoints added From 8ac660430aceb58b73ecacd0482e8535cba7ec10 Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Thu, 24 Feb 2022 10:42:14 +0200 Subject: [PATCH 238/336] check folder ownership when posting an object --- metadata_backend/api/handlers/object.py | 1 + tests/test_handlers.py | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 51da12cd1..da4a8f1a1 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -106,6 +106,7 @@ async def post_object(self, req: Request) -> Response: if not folder_id: reason = "Folder is required query parameter. Please provide folder id where object is added to." raise web.HTTPBadRequest(reason=reason) + await self._handle_check_ownership(req, "folders", folder_id) patch_params = {} self._check_schema_exists(schema_type) diff --git a/tests/test_handlers.py b/tests/test_handlers.py index b283c1079..dc8eded37 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -824,8 +824,6 @@ async def test_get_folders_with_bad_params(self): async def test_get_folder_works(self): """Test folder is returned when correct folder id is given.""" - RESTAPIHandler._handle_check_ownership = make_mocked_coro(True) - response = await self.client.get("/folders/FOL12345678") self.assertEqual(response.status, 200) self.MockedFolderOperator().read_folder.assert_called_once() From 5e283db989de2c671b649c15ec31ae3e2b01b36a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Feb 2022 09:05:45 +0000 Subject: [PATCH 239/336] Bump rojopolis/spellcheck-github-actions from 0.21.1 to 0.22.1 Bumps [rojopolis/spellcheck-github-actions](https://github.com/rojopolis/spellcheck-github-actions) from 0.21.1 to 0.22.1. - [Release notes](https://github.com/rojopolis/spellcheck-github-actions/releases) - [Changelog](https://github.com/rojopolis/spellcheck-github-actions/blob/master/CHANGELOG.md) - [Commits](https://github.com/rojopolis/spellcheck-github-actions/compare/0.21.1...0.22.1) --- updated-dependencies: - dependency-name: rojopolis/spellcheck-github-actions dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 97f858c21..8263df8b9 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -13,7 +13,7 @@ jobs: steps: - uses: actions/checkout@master - - uses: rojopolis/spellcheck-github-actions@0.21.1 + - uses: rojopolis/spellcheck-github-actions@0.22.1 name: Spellcheck with: config_path: .github/config/.spellcheck.yml From 5f748692640ed605e358dca5d5bec67cd1978935 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Feb 2022 09:06:41 +0000 Subject: [PATCH 240/336] Bump oidcrp from 2.1.3 to 2.1.4 Bumps [oidcrp](https://github.com/IdentityPython/oicrp) from 2.1.3 to 2.1.4. - [Release notes](https://github.com/IdentityPython/oicrp/releases) - [Commits](https://github.com/IdentityPython/oicrp/compare/v2.1.3...v2.1.4) --- updated-dependencies: - dependency-name: oidcrp dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index d2aad5e51..a2661f1aa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -51,9 +51,9 @@ multidict==5.2.0 # via # aiohttp # yarl -oidcmsg==1.5.4 +oidcmsg==1.6.0 # via oidcrp -oidcrp==2.1.3 +oidcrp==2.1.4 # via -r requirements.in pycparser==2.21 # via cffi From 48a927f622bdede7ce51ffb734c0a418ea84c11a Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Mon, 28 Feb 2022 13:37:58 +0200 Subject: [PATCH 241/336] refactor templates to be listed under projects using old object assigning code --- metadata_backend/api/handlers/template.py | 19 +++- metadata_backend/api/operators.py | 119 ++++++++++++---------- metadata_backend/database/db_service.py | 9 +- tests/integration/run_tests.py | 13 ++- tests/test_operators.py | 45 ++++++++ 5 files changed, 138 insertions(+), 67 deletions(-) diff --git a/metadata_backend/api/handlers/template.py b/metadata_backend/api/handlers/template.py index 674d673f4..118193d10 100644 --- a/metadata_backend/api/handlers/template.py +++ b/metadata_backend/api/handlers/template.py @@ -123,6 +123,10 @@ async def post_template(self, req: Request) -> Response: # Move projectId to template structure, so that it is saved in mongo tmpl["template"]["projectId"] = tmpl["projectId"] accession_id, _ = await operator.create_metadata_object(collection, tmpl["template"]) + data = [{"accessionId": accession_id, "schema": collection}] + if "tags" in tmpl: + data[0]["tags"] = tmpl["tags"] + await project_op.assign_templates(tmpl["projectId"], data) tmpl_list.append({"accessionId": accession_id}) body = ujson.dumps(tmpl_list, escape_forward_slashes=False) @@ -152,6 +156,10 @@ async def post_template(self, req: Request) -> Response: # Move projectId to template structure, so that it is saved in mongo content["template"]["projectId"] = content["projectId"] accession_id, _ = await operator.create_metadata_object(collection, content["template"]) + data = [{"accessionId": accession_id, "schema": collection}] + if "tags" in content: + data[0]["tags"] = content["tags"] + await project_op.assign_templates(content["projectId"], data) body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) @@ -204,13 +212,20 @@ async def delete_template(self, req: Request) -> Response: schema_type = req.match_info["schema"] self._check_schema_exists(schema_type) collection = f"template-{schema_type}" - + project_id = self._get_param(req, "projectId") accession_id = req.match_info["accessionId"] db_client = req.app["db_client"] await Operator(db_client).check_exists(collection, accession_id) + project_operator = ProjectOperator(db_client) - await self._handle_check_ownership(req, collection, accession_id) + project_ok = await self._handle_check_ownership(req, collection, accession_id) + if project_ok: + await project_operator.remove_templates(project_id, [accession_id]) + else: + reason = "This template does not belong to this project." + LOG.error(reason) + raise web.HTTPUnprocessableEntity(reason=reason) accession_id = await Operator(db_client).delete_metadata_object(collection, accession_id) diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 22a5a9142..a9ff265a6 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -305,64 +305,26 @@ def __init__(self, db_client: AsyncIOMotorClient) -> None: """ super().__init__(mongo_database, "application/json", db_client) - async def query_templates_by_project(self, project_id: str) -> List[Dict[str, str]]: - """Query all template schemas for given project ID. + async def query_templates_by_project(self, project_id: str) -> List[Dict[str, Union[Dict[str, str], str]]]: + """Get templates list from given project ID. :param project_id: project internal ID that owns templates - :returns: list of simplified template objects + :returns: list of templates in project """ + try: + templates_cursor = self.db_service.query( + "project", {"projectId": project_id}, custom_projection={"_id": 0, "templates": 1} + ) + templates = [template async for template in templates_cursor] + except (ConnectionFailure, OperationFailure) as error: + reason = f"Error happened while getting templates from project {project_id}: {error}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) - templates: List[Dict[str, str]] = [] - - # List of possible template collections - collections = [ - "template-analysis", - "template-dac", - "template-dataset", - "template-experiment", - "template-policy", - "template-run", - "template-sample", - "template-study", - ] - - # Over all collections, query for accessionId and - # title (in study it's descriptor.studyTitle), cast them as displayTitle - # add schema name from current collection, bundle together - for collection in collections: - - # Cast title as displayTitle - title = "$title" - if collection == "template-study": - # Study has title in slightly different format - title = "$descriptor.studyTitle" - - # Query with projectId, get title and id, set schema with default value - _query = [ - { - "$match": { - "projectId": project_id, - }, - }, - { - "$project": { - "_id": 0, - "displayTitle": title, - "accessionId": "$accessionId", - "schema": collection, - }, - }, - ] - data_raw = await self.db_service.do_aggregate(collection, _query) - - # Parse and bundle up - if not data_raw: - data = [] - else: - data = [doc for doc in data_raw] - templates += data - - return templates + if len(templates) == 1: + return templates[0]["templates"] + else: + return [] async def get_object_project(self, collection: str, accession_id: str) -> str: """Get the project ID the object is associated to. @@ -1220,7 +1182,7 @@ async def create_project(self, project_number: str) -> str: :raises: HTTPBadRequest if error occurs during the process of insert :returns: Project id for the project inserted to database """ - project_data: Dict[str, str] = dict() + project_data: Dict[str, Union[str, List[str]]] = dict() try: existing_project_id = await self.db_service.exists_project_by_external_id(project_number) @@ -1229,6 +1191,7 @@ async def create_project(self, project_number: str) -> str: return existing_project_id else: project_id = self._generate_project_id() + project_data["templates"] = [] project_data["projectId"] = project_id project_data["externalId"] = project_number insert_success = await self.db_service.create("project", project_data) @@ -1257,6 +1220,52 @@ async def _check_project_exists(self, project_id: str) -> None: LOG.error(reason) raise web.HTTPNotFound(reason=reason) + async def assign_templates(self, project_id: str, object_ids: List) -> None: + """Assing templates to project. + + :param project_id: ID of project to update + :param object_ids: ID or list of IDs of template(s) to assign + :raises: HTTPBadRequest if assigning templates to project was not successful + returns: None + """ + try: + await self._check_project_exists(project_id) + assign_success = await self.db_service.append( + "project", project_id, {"templates": {"$each": object_ids, "$position": 0}} + ) + except (ConnectionFailure, OperationFailure) as error: + reason = f"Error happened while getting project: {error}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + if not assign_success: + reason = "Assigning templates to project failed." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + LOG.info(f"Assigning templates={object_ids} to project={project_id} succeeded.") + + async def remove_templates(self, project_id: str, object_ids: List) -> None: + """Remove templates from project. + + :param project_id: ID of project to update + :param object_ids: ID or list of IDs of template(s) to remove + :raises: HTTPBadRequest if db connection fails + returns: None + """ + remove_content: Dict + try: + await self._check_project_exists(project_id) + for obj in object_ids: + remove_content = {"templates": {"accessionId": obj}} + await self.db_service.remove("project", project_id, remove_content) + except (ConnectionFailure, OperationFailure) as error: + reason = f"Error happened while removing templates from project: {error}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + LOG.info(f"Removing templates={object_ids} from project={project_id} succeeded.") + def _generate_project_id(self) -> str: """Generate random project id. diff --git a/metadata_backend/database/db_service.py b/metadata_backend/database/db_service.py index 7bdd7b198..a961a5a0a 100644 --- a/metadata_backend/database/db_service.py +++ b/metadata_backend/database/db_service.py @@ -189,7 +189,7 @@ async def remove(self, collection: str, accession_id: str, data_to_be_removed: A updated to removed. :returns: True if operation was successful """ - id_key = f"{collection}Id" if (collection in ["folder", "user"]) else "accessionId" + id_key = f"{collection}Id" if (collection in ["folder", "user", "project"]) else "accessionId" find_by_id = {id_key: accession_id} remove_op = {"$pull": data_to_be_removed} result = await self.database[collection].find_one_and_update( @@ -208,7 +208,7 @@ async def append(self, collection: str, accession_id: str, data_to_be_addded: An updated to removed. :returns: True if operation was successful """ - id_key = f"{collection}Id" if (collection in ["folder", "user"]) else "accessionId" + id_key = f"{collection}Id" if (collection in ["folder", "user", "project"]) else "accessionId" find_by_id = {id_key: accession_id} # push vs addtoSet # push allows us to specify the postion but it does not check the items are unique @@ -259,7 +259,7 @@ async def delete(self, collection: str, accession_id: str) -> bool: LOG.debug(f"DB doc in {collection} deleted for {accession_id}.") return result.acknowledged - def query(self, collection: str, query: Dict) -> AsyncIOMotorCursor: + def query(self, collection: str, query: Dict, custom_projection: Dict = {}) -> AsyncIOMotorCursor: """Query database with given query. Find() does no I/O and does not require an await expression, hence @@ -267,10 +267,13 @@ def query(self, collection: str, query: Dict) -> AsyncIOMotorCursor: :param collection: Collection where document should be searched from :param query: query to be used + :param custom_projection: overwrites default projection :returns: Async cursor instance which should be awaited when iterating """ LOG.debug(f"DB doc query performed in {collection}.") projection = {"_id": False, "eppn": False} if collection == "user" else {"_id": False} + if custom_projection: + projection = custom_projection return self.database[collection].find(query, projection) @auto_reconnect diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index 0fc462e60..d711ef0c5 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -406,14 +406,15 @@ async def patch_template(sess, schema, template_id, update_filename, project_id) return ans_put["accessionId"] -async def delete_template(sess, schema, template_id): +async def delete_template(sess, schema, template_id, project_id): """Delete metadata object within session. :param sess: HTTP session in which request call is made :param schema: name of the schema (folder) used for testing :param template_id: id of the draft + :param template_id: id of the project the template belongs to """ - async with sess.delete(f"{templates_url}/{schema}/{template_id}") as resp: + async with sess.delete(f"{templates_url}/{schema}/{template_id}?projectId={project_id}") as resp: LOG.debug(f"Deleting template object {template_id} from {schema}") assert resp.status == 204, f"HTTP Status code error, got {resp.status}" @@ -1326,19 +1327,17 @@ async def test_crud_users_works(sess, project_id): assert res["projectId"] == project_id assert res["identifiers"]["primaryId"] == "SRP000539" - await delete_template(sess, "study", template_id) + await delete_template(sess, "study", template_id, project_id) async with sess.get(f"{templates_url}/study/{template_id}") as resp: LOG.debug(f"Checking that template {template_id} was deleted") assert resp.status == 404 template_ids = await post_template_json(sess, "study", "SRP000539_list.json", project_id) assert len(template_ids) == 2, "templates could not be added as batch" - templates = await get_templates(sess, project_id) - assert len(templates) == 2, "did not find templates from project" + + assert len(templates) == 2, f"should be 2 templates, got {len(templates)}" assert templates[0]["schema"] == "template-study", "wrong template schema" - title = "Highly integrated epigenome maps in Arabidopsis - whole genome shotgun bisulfite sequencing" - assert templates[0]["displayTitle"] == title, "wrong template title" # Delete user await delete_user(sess, user_id) diff --git a/tests/test_operators.py b/tests/test_operators.py index f0a65f17e..a35f1ab9c 100644 --- a/tests/test_operators.py +++ b/tests/test_operators.py @@ -1161,6 +1161,51 @@ async def test_check_project_exists_passes(self): await operator._check_project_exists(self.project_id) operator.db_service.exists.assert_called_once() + async def test_project_objects_remove_passes(self): + """Test remove objects method for projects works.""" + operator = ProjectOperator(self.client) + operator.db_service.exists.return_value = True + operator.db_service.remove.return_value = None + await operator.remove_templates(self.project_generated_id, ["id"]) + operator.db_service.exists.assert_called_once() + operator.db_service.remove.assert_called_once() + self.assertEqual(len(operator.db_service.remove.mock_calls), 1) + + async def test_project_objects_remove_fails(self): + """Test remove objects method for projects fails.""" + operator = ProjectOperator(self.client) + operator.db_service.exists.return_value = True + operator.db_service.remove.side_effect = ConnectionFailure + with self.assertRaises(HTTPBadRequest): + await operator.remove_templates(self.project_generated_id, ["id"]) + + async def test_project_objects_append_passes(self): + """Test append objects method for projects works.""" + operator = ProjectOperator(self.client) + operator.db_service.exists.return_value = True + operator.db_service.append.return_value = True + await operator.assign_templates(self.project_generated_id, []) + operator.db_service.exists.assert_called_once() + operator.db_service.append.assert_called_once() + self.assertEqual(len(operator.db_service.append.mock_calls), 1) + + async def test_project_objects_append_on_result_fails(self): + """Test append objects method for projects fails on db response validation.""" + operator = ProjectOperator(self.client) + operator.db_service.exists.return_value = True + operator.db_service.append.return_value = False + with self.assertRaises(HTTPBadRequest): + await operator.assign_templates(self.project_generated_id, []) + operator.db_service.exists.assert_called_once() + operator.db_service.append.assert_called_once() + + async def test_project_objects_assing_fails(self): + """Test append objects method for projects fails.""" + operator = ProjectOperator(self.client) + operator.db_service.exists.side_effect = ConnectionFailure + with self.assertRaises(HTTPBadRequest): + await operator.assign_templates(self.project_generated_id, []) + if __name__ == "__main__": unittest.main() From 829f5a176d06f5a5a51edfdc729c7c76875d9777 Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Mon, 28 Feb 2022 14:09:51 +0200 Subject: [PATCH 242/336] remove projectid requirement from template deletion --- metadata_backend/api/handlers/restapi.py | 15 ++++++++------- metadata_backend/api/handlers/template.py | 3 +-- metadata_backend/api/operators.py | 8 +++++--- tests/integration/run_tests.py | 7 +++---- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/metadata_backend/api/handlers/restapi.py b/metadata_backend/api/handlers/restapi.py index bc3514f69..96dcf4462 100644 --- a/metadata_backend/api/handlers/restapi.py +++ b/metadata_backend/api/handlers/restapi.py @@ -1,7 +1,7 @@ """Handle HTTP methods for server.""" import json from math import ceil -from typing import AsyncGenerator, Dict, List +from typing import AsyncGenerator, Dict, List, Tuple import ujson from aiohttp import web @@ -64,7 +64,7 @@ def _get_param(self, req: Request, name: str) -> str: raise web.HTTPBadRequest(reason=reason) return param - async def _handle_check_ownership(self, req: Request, collection: str, accession_id: str) -> bool: + async def _handle_check_ownership(self, req: Request, collection: str, accession_id: str) -> Tuple[bool, str]: """Check if object belongs to project. For this we need to check the object is in exactly 1 folder and we need to check @@ -74,13 +74,14 @@ async def _handle_check_ownership(self, req: Request, collection: str, accession :param collection: collection or schema of document :param doc_id: document accession id :raises: HTTPUnauthorized if accession id does not belong to user - :returns: bool + :returns: bool and possible project id """ db_client = req.app["db_client"] current_user = get_session(req)["user_info"] user_op = UserOperator(db_client) _check = False + project_id = "" if collection != "folders": folder_op = FolderOperator(db_client) @@ -89,23 +90,23 @@ async def _handle_check_ownership(self, req: Request, collection: str, accession # _check = True if check: # if the draft object is found in folder we just need to check if the folder belongs to user - _check = await user_op.check_user_has_doc(req, "folders", current_user, folder_id) + _check, project_id = await user_op.check_user_has_doc(req, "folders", current_user, folder_id) elif collection.startswith("template"): # if collection is template but not found in a folder # we also check if object is in templates of the user # they will be here if they will not be deleted after publish - _check = await user_op.check_user_has_doc(req, collection, current_user, accession_id) + _check, project_id = await user_op.check_user_has_doc(req, collection, current_user, accession_id) else: _check = False else: - _check = await user_op.check_user_has_doc(req, collection, current_user, accession_id) + _check, project_id = await user_op.check_user_has_doc(req, collection, current_user, accession_id) if not _check: reason = f"{collection} {accession_id}." LOG.error(reason) raise web.HTTPUnauthorized(reason=reason) - return _check + return _check, project_id async def _get_collection_objects( self, folder_op: AsyncIOMotorClient, collection: str, seq: List diff --git a/metadata_backend/api/handlers/template.py b/metadata_backend/api/handlers/template.py index 118193d10..372d61203 100644 --- a/metadata_backend/api/handlers/template.py +++ b/metadata_backend/api/handlers/template.py @@ -212,14 +212,13 @@ async def delete_template(self, req: Request) -> Response: schema_type = req.match_info["schema"] self._check_schema_exists(schema_type) collection = f"template-{schema_type}" - project_id = self._get_param(req, "projectId") accession_id = req.match_info["accessionId"] db_client = req.app["db_client"] await Operator(db_client).check_exists(collection, accession_id) project_operator = ProjectOperator(db_client) - project_ok = await self._handle_check_ownership(req, collection, accession_id) + project_ok, project_id = await self._handle_check_ownership(req, collection, accession_id) if project_ok: await project_operator.remove_templates(project_id, [accession_id]) else: diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index a9ff265a6..ade723c9e 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -949,14 +949,16 @@ def __init__(self, db_client: AsyncIOMotorClient) -> None: """ self.db_service = DBService(mongo_database, db_client) - async def check_user_has_doc(self, req: web.Request, collection: str, user_id: str, accession_id: str) -> bool: + async def check_user_has_doc( + self, req: web.Request, collection: str, user_id: str, accession_id: str + ) -> Tuple[bool, str]: """Check a folder/template belongs to same project the user is in. :param collection: collection it belongs to, it would be used as path :param user_id: user_id from session :param accession_id: document by accession_id :raises: HTTPUnprocessableEntity if more users seem to have same folder - :returns: True if accession_id belongs to user + :returns: True and project_id if accession_id belongs to user, False otherwise """ LOG.debug(f"check that user {user_id} belongs to same project as {collection} {accession_id}") @@ -978,7 +980,7 @@ async def check_user_has_doc(self, req: web.Request, collection: str, user_id: s current_user = get_session(req)["user_info"] user = await user_operator.read_user(current_user) user_has_project = await user_operator.check_user_has_project(project_id, user["userId"]) - return user_has_project + return user_has_project, project_id async def check_user_has_project(self, project_id: str, user_id: str) -> bool: """Check that user has project affiliation. diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index d711ef0c5..82b6cec3c 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -406,15 +406,14 @@ async def patch_template(sess, schema, template_id, update_filename, project_id) return ans_put["accessionId"] -async def delete_template(sess, schema, template_id, project_id): +async def delete_template(sess, schema, template_id): """Delete metadata object within session. :param sess: HTTP session in which request call is made :param schema: name of the schema (folder) used for testing :param template_id: id of the draft - :param template_id: id of the project the template belongs to """ - async with sess.delete(f"{templates_url}/{schema}/{template_id}?projectId={project_id}") as resp: + async with sess.delete(f"{templates_url}/{schema}/{template_id}") as resp: LOG.debug(f"Deleting template object {template_id} from {schema}") assert resp.status == 204, f"HTTP Status code error, got {resp.status}" @@ -1327,7 +1326,7 @@ async def test_crud_users_works(sess, project_id): assert res["projectId"] == project_id assert res["identifiers"]["primaryId"] == "SRP000539" - await delete_template(sess, "study", template_id, project_id) + await delete_template(sess, "study", template_id) async with sess.get(f"{templates_url}/study/{template_id}") as resp: LOG.debug(f"Checking that template {template_id} was deleted") assert resp.status == 404 From 48cdb68400dd1ac63458bd94333487812f7c2e3a Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Tue, 1 Mar 2022 09:33:54 +0200 Subject: [PATCH 243/336] remove deprecated user patch --- CHANGELOG.md | 10 ++-- metadata_backend/api/handlers/user.py | 80 --------------------------- metadata_backend/server.py | 1 - tests/test_handlers.py | 19 ------- 4 files changed, 6 insertions(+), 104 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a25924c49..5db8e6c71 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,12 +35,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - enum are sorted alphabetically, with the exception of other and unspecified values which are left at the end of the list - allow for accession key in `referenceAlignment` & `process sequence` as array, previously all accession keys were converted to `accessionId` which is not correct - add default `gender` as `unknown` -- Project ownership #346 --DRAFT - - deprecated `folders` and `templates` keys from `GET /users/current` - - as a side effect, deprecated `items` query parameter from the same endpoint +- Project ownership #346 - added new collection `project` - added new key `projects` to `user` - - added new key `projectId` to `folder` and `template-*` + - added new key `projectId` to `folder` and `template-*` collections - new mandatory `/userinfo` value from AAI at login time `sdSubmitProjects` - user is redirected to `/noproject` if key is empty or missing - new mandatory query parameter `projectId` in `GET /folders` @@ -79,6 +77,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed - Removed `Authlib` dependency #315 +- Project ownership #346 + - deprecated `folders` and `templates` keys from `GET /users/current` + - as a side effect, deprecated `items` query parameter from the same endpoint + - deprecated `PATCH /user` ### Deprecated diff --git a/metadata_backend/api/handlers/user.py b/metadata_backend/api/handlers/user.py index b706c7c94..a33bff7df 100644 --- a/metadata_backend/api/handlers/user.py +++ b/metadata_backend/api/handlers/user.py @@ -1,6 +1,4 @@ """Handle HTTP methods for server.""" -import re -from typing import Any import ujson from aiohttp import web @@ -16,59 +14,6 @@ class UserAPIHandler(RESTAPIHandler): """API Handler for users.""" - def _check_patch_user(self, patch_ops: Any) -> None: - """Check patch operations in request are valid. - - We check that ``folders`` have string values (one or a list) - and ``drafts`` have ``_required_values``. - For tags we check that the ``submissionType`` takes either ``XML`` or - ``Form`` as values. - :param patch_ops: JSON patch request - :raises: HTTPBadRequest if request does not fullfil one of requirements - :raises: HTTPUnauthorized if request tries to do anything else than add or replace - :returns: None - """ - _arrays = ["/templates/-", "/folders/-"] - _required_values = ["schema", "accessionId"] - _tags = re.compile("^/(templates)/[0-9]*/(tags)$") - for op in patch_ops: - if _tags.match(op["path"]): - LOG.info(f"{op['op']} on tags in folder") - if "submissionType" in op["value"].keys() and op["value"]["submissionType"] not in ["XML", "Form"]: - reason = "submissionType is restricted to either 'XML' or 'Form' values." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - pass - else: - if all(i not in op["path"] for i in _arrays): - reason = f"Request contains '{op['path']}' key that cannot be updated to user object" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - if op["op"] in ["remove", "copy", "test", "move", "replace"]: - reason = f"{op['op']} on {op['path']} is not allowed." - LOG.error(reason) - raise web.HTTPUnauthorized(reason=reason) - if op["path"] == "/folders/-": # DEPRECATED, what to do? - if not (isinstance(op["value"], str) or isinstance(op["value"], list)): - reason = "We only accept string folder IDs." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - if op["path"] == "/templates/-": # DEPRECATED, what to do? - _ops = op["value"] if isinstance(op["value"], list) else [op["value"]] - for item in _ops: - if not all(key in item.keys() for key in _required_values): - reason = "accessionId and schema are required fields." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - if ( - "tags" in item - and "submissionType" in item["tags"] - and item["tags"]["submissionType"] not in ["XML", "Form"] - ): - reason = "submissionType is restricted to either 'XML' or 'Form' values." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - async def get_user(self, req: Request) -> Response: """Get one user by its user ID. @@ -92,31 +37,6 @@ async def get_user(self, req: Request) -> Response: body=ujson.dumps(user, escape_forward_slashes=False), status=200, content_type="application/json" ) - async def patch_user(self, req: Request) -> Response: - """Update user object with a specific user ID. - - :param req: PATCH request - :raises: HTTPUnauthorized if not current user - :returns: JSON response containing user ID for updated user object - """ - user_id = req.match_info["userId"] - if user_id != "current": - LOG.info(f"User ID {user_id} patch was requested") - raise web.HTTPUnauthorized(reason="Only current user operations are allowed") - db_client = req.app["db_client"] - - patch_ops = await self._get_data(req) - self._check_patch_user(patch_ops) - - operator = UserOperator(db_client) - - current_user = get_session(req)["user_info"] - user = await operator.update_user(current_user, patch_ops if isinstance(patch_ops, list) else [patch_ops]) - - body = ujson.dumps({"userId": user}) - LOG.info(f"PATCH user with ID {user} was successful.") - return web.Response(body=body, status=200, content_type="application/json") - async def delete_user(self, req: Request) -> Response: """Delete user from database. diff --git a/metadata_backend/server.py b/metadata_backend/server.py index e7513f4bd..48b1d59d3 100644 --- a/metadata_backend/server.py +++ b/metadata_backend/server.py @@ -106,7 +106,6 @@ async def init() -> web.Application: web.patch("/publish/{folderId}", _folder.publish_folder), # users operations web.get("/users/{userId}", _user.get_user), - web.patch("/users/{userId}", _user.patch_user), web.delete("/users/{userId}", _user.delete_user), # submit web.post("/submit", _submission.submit), diff --git a/tests/test_handlers.py b/tests/test_handlers.py index dc8eded37..801a5e515 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -678,25 +678,6 @@ async def test_user_deletion_is_called(self): await self.client.delete("/users/current") self.MockedUserOperator().delete_user.assert_called_once() - async def test_update_user_fails_with_wrong_key(self): - """Test that user object does not update when forbidden keys are provided.""" - data = [{"op": "add", "path": "/userId"}] - response = await self.client.patch("/users/current", json=data) - self.assertEqual(response.status, 400) - json_resp = await response.json() - reason = "Request contains '/userId' key that cannot be updated to user object" - self.assertEqual(reason, json_resp["detail"]) - - async def test_update_user_passes(self): - """Test that user object would update with correct keys.""" - self.MockedUserOperator().update_user.return_value = self.user_id - data = [{"op": "add", "path": "/templates/-", "value": [{"accessionId": "3", "schema": "sample"}]}] - response = await self.client.patch("/users/current", json=data) - self.MockedUserOperator().update_user.assert_called_once() - self.assertEqual(response.status, 200) - json_resp = await response.json() - self.assertEqual(json_resp["userId"], self.user_id) - class FolderHandlerTestCase(HandlersTestCase): """Folder API endpoint class test cases.""" From 21f69af14e344fb015d2f5282efaba60a2b28e1b Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Tue, 1 Mar 2022 12:15:43 +0200 Subject: [PATCH 244/336] fix separate template list updating in project-collection --- CHANGELOG.md | 1 + metadata_backend/api/handlers/template.py | 20 +++++++++-- metadata_backend/api/operators.py | 23 +++++++++++++ tests/integration/run_tests.py | 13 ++++---- .../test_files/study/SRP000539_template.json | 3 ++ tests/test_files/study/patch.json | 6 +++- tests/test_operators.py | 33 +++++++++++++++++++ 7 files changed, 90 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5db8e6c71..0df200159 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - new mandatory query parameter `projectId` in `GET /folders` - new mandatory JSON key `projectId` in `POST /folders` and `POST /templates` - new endpoint `GET /templates` to replace `GET /users/current` `{"templates":[...]}` + - new JSON keys `index` and `tags` to `PATCH /templates/schema/templateId`, same values as were previously used in `PATCH /user` which is now removed - WARNING: breaking change that requires fresh database, because "project" is new information that did not exist before, and it can't be migrated to existing user-owned hierarchy ### Changed diff --git a/metadata_backend/api/handlers/template.py b/metadata_backend/api/handlers/template.py index 372d61203..a5cfff50c 100644 --- a/metadata_backend/api/handlers/template.py +++ b/metadata_backend/api/handlers/template.py @@ -193,8 +193,24 @@ async def patch_template(self, req: Request) -> Response: await operator.check_exists(collection, accession_id) - await self._handle_check_ownership(req, collection, accession_id) - + _, project_id = await self._handle_check_ownership(req, collection, accession_id) + + # Update the templates-list in project-collection + if "index" in content and "tags" in content: + LOG.debug("update template-list tags") + index = content.pop("index") + tags = content.pop("tags") + update_operation = [ + { + "op": "replace", + "path": f"/templates/{index}/tags", + "value": tags, + } + ] + project_operator = ProjectOperator(db_client) + await project_operator.update_project(project_id, update_operation) + + # Update the actual template data in template-collection accession_id = await operator.update_metadata_object(collection, accession_id, content) body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index ade723c9e..57d1e427c 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -1268,6 +1268,29 @@ async def remove_templates(self, project_id: str, object_ids: List) -> None: LOG.info(f"Removing templates={object_ids} from project={project_id} succeeded.") + async def update_project(self, project_id: str, patch: List) -> str: + """Update project object in database. + + :param project_id: ID of project to update + :param patch: Patch operations determined in the request + :returns: ID of the project updated to database + """ + try: + await self._check_project_exists(project_id) + update_success = await self.db_service.patch("project", project_id, patch) + except (ConnectionFailure, OperationFailure) as error: + reason = f"Error happened while getting project: {error}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + if not update_success: + reason = "Updating project in database failed for some reason." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + else: + LOG.info(f"Updating project={project_id} to database succeeded.") + return project_id + def _generate_project_id(self) -> str: """Generate random project id. diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index 82b6cec3c..6b209b86f 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -385,19 +385,15 @@ async def get_template(sess, schema, template_id): return json.dumps(ans) -async def patch_template(sess, schema, template_id, update_filename, project_id): +async def patch_template(sess, schema, template_id, update_filename): """Patch one metadata object within session, return accessionId. :param sess: HTTP session in which request call is made :param schema: name of the schema (folder) used for testing :param template_id: id of the draft :param update_filename: name of the file used to use for updating data. - :param project_id: id of the project the folder belongs to """ request_data = await create_request_json_data(schema, update_filename) - request_data = json.loads(request_data) - request_data["projectId"] = project_id - request_data = json.dumps(request_data) async with sess.patch(f"{templates_url}/{schema}/{template_id}", data=request_data) as resp: LOG.debug(f"Update draft object in {schema}") assert resp.status == 200, f"HTTP Status code error, got {resp.status}" @@ -1318,7 +1314,7 @@ async def test_crud_users_works(sess, project_id): assert resp.status == 404 template_id = await post_template_json(sess, "study", "SRP000539_template.json", project_id) - await patch_template(sess, "study", template_id, "patch.json", project_id) + await patch_template(sess, "study", template_id, "patch.json") async with sess.get(f"{templates_url}/study/{template_id}") as resp: LOG.debug(f"Checking that template: {template_id} was added") res = await resp.json() @@ -1326,6 +1322,11 @@ async def test_crud_users_works(sess, project_id): assert res["projectId"] == project_id assert res["identifiers"]["primaryId"] == "SRP000539" + async with sess.get(f"{templates_url}?projectId={project_id}") as resp: + LOG.debug("Checking that template display title was updated in separate templates list") + res = await resp.json() + assert res[0]["tags"]["displayTitle"] == "new name" + await delete_template(sess, "study", template_id) async with sess.get(f"{templates_url}/study/{template_id}") as resp: LOG.debug(f"Checking that template {template_id} was deleted") diff --git a/tests/test_files/study/SRP000539_template.json b/tests/test_files/study/SRP000539_template.json index 79be5fe58..2c189ab34 100644 --- a/tests/test_files/study/SRP000539_template.json +++ b/tests/test_files/study/SRP000539_template.json @@ -34,5 +34,8 @@ "value": "PRJNA107265" } ] + }, + "tags": { + "displayTitle": "Highly integrated epigenome maps in Arabidopsis - whole genome shotgun bisulfite sequencing" } } diff --git a/tests/test_files/study/patch.json b/tests/test_files/study/patch.json index 5b7ed1c93..196832d91 100644 --- a/tests/test_files/study/patch.json +++ b/tests/test_files/study/patch.json @@ -1,4 +1,8 @@ { "centerName": "GEOM", - "alias": "GSE10968" + "alias": "GSE10968", + "index": 0, + "tags": { + "displayTitle": "new name" + } } \ No newline at end of file diff --git a/tests/test_operators.py b/tests/test_operators.py index a35f1ab9c..4a0741720 100644 --- a/tests/test_operators.py +++ b/tests/test_operators.py @@ -1206,6 +1206,39 @@ async def test_project_objects_assing_fails(self): with self.assertRaises(HTTPBadRequest): await operator.assign_templates(self.project_generated_id, []) + async def test_update_project_fail_no_project(self): + """Test that project which does not exist can not be updated.""" + operator = ProjectOperator(self.client) + with self.assertRaises(HTTPNotFound): + with patch( + "metadata_backend.api.operators.ProjectOperator._check_project_exists", side_effect=HTTPNotFound + ): + await operator.update_project(self.project_generated_id, []) + + async def test_update_project_fail_connfail(self): + """Test project update failure with database connection failure.""" + operator = ProjectOperator(self.client) + operator.db_service.patch.side_effect = ConnectionFailure + with self.assertRaises(HTTPBadRequest): + with patch("metadata_backend.api.operators.ProjectOperator._check_project_exists", return_value=True): + await operator.update_project(self.project_generated_id, []) + + async def test_update_project_fail_general(self): + """Test project update failure with general error.""" + operator = ProjectOperator(self.client) + operator.db_service.patch.return_value = False + with self.assertRaises(HTTPBadRequest): + with patch("metadata_backend.api.operators.ProjectOperator._check_project_exists", return_value=True): + await operator.update_project(self.project_generated_id, []) + + async def test_update_project_pass(self): + """Test project update passes.""" + operator = ProjectOperator(self.client) + operator.db_service.patch.return_value = True + with patch("metadata_backend.api.operators.ProjectOperator._check_project_exists", return_value=True): + pid = await operator.update_project(self.project_generated_id, []) + self.assertEqual(pid, self.project_generated_id) + if __name__ == "__main__": unittest.main() From b7438916c67bdbf8db70d67895266dcfde2d3c2b Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Tue, 1 Mar 2022 12:56:06 +0200 Subject: [PATCH 245/336] add new word --- .github/config/.wordlist.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/config/.wordlist.txt b/.github/config/.wordlist.txt index cf53823bc..7cc4169e7 100644 --- a/.github/config/.wordlist.txt +++ b/.github/config/.wordlist.txt @@ -593,6 +593,7 @@ taxonomicreferenceset taxonomysystem taxonomysystemversion telephonenumber +templateId tigrinya tls toctree From b676f5023de5e4bfafd391dd4078a1f8d9b0c653 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Mar 2022 09:06:11 +0000 Subject: [PATCH 246/336] Bump actions/checkout from 2 to 3 Bumps [actions/checkout](https://github.com/actions/checkout) from 2 to 3. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v2...v3) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/codeql.yml | 2 +- .github/workflows/docs.yml | 4 ++-- .github/workflows/int.yml | 2 +- .github/workflows/production.yml | 2 +- .github/workflows/publish.yml | 2 +- .github/workflows/style.yml | 2 +- .github/workflows/unit.yml | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 16d77d6e0..65d90dc53 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -25,7 +25,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: # We must fetch at least the immediate parents so that if this is # a pull request then we can checkout the head. diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 8263df8b9..8933bfb48 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -12,7 +12,7 @@ jobs: runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@master + - uses: actions/checkout@v3 - uses: rojopolis/spellcheck-github-actions@0.22.1 name: Spellcheck with: @@ -28,7 +28,7 @@ jobs: runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: diff --git a/.github/workflows/int.yml b/.github/workflows/int.yml index bd2d36633..9272f28f3 100644 --- a/.github/workflows/int.yml +++ b/.github/workflows/int.yml @@ -15,7 +15,7 @@ jobs: name: Integration Tests steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: diff --git a/.github/workflows/production.yml b/.github/workflows/production.yml index cc70326fb..b8d097ff7 100644 --- a/.github/workflows/production.yml +++ b/.github/workflows/production.yml @@ -23,7 +23,7 @@ jobs: - 5000:5000 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 name: Get sources - name: Set up Docker Buildx diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 44dad60c4..5936add9e 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out the repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Prepare id: prep diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index 9a0287bcf..c8ea0eef4 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -13,7 +13,7 @@ jobs: runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: diff --git a/.github/workflows/unit.yml b/.github/workflows/unit.yml index 74022bb20..35b13193b 100644 --- a/.github/workflows/unit.yml +++ b/.github/workflows/unit.yml @@ -13,7 +13,7 @@ jobs: runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: From 02a791337dfca0069e85f63ce8eb2c215e61fd3a Mon Sep 17 00:00:00 2001 From: "teemu.kataja" Date: Fri, 11 Mar 2022 12:16:58 +0200 Subject: [PATCH 247/336] remove project check, already done in aai, unreachable code --- .github/config/.wordlist.txt | 1 - CHANGELOG.md | 2 +- metadata_backend/api/auth.py | 6 ------ 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/.github/config/.wordlist.txt b/.github/config/.wordlist.txt index 7cc4169e7..5967b0faa 100644 --- a/.github/config/.wordlist.txt +++ b/.github/config/.wordlist.txt @@ -389,7 +389,6 @@ nlmcatalog noindex nominallength nominalsdev -noproject northboundlatitude novaseq npm diff --git a/CHANGELOG.md b/CHANGELOG.md index 0df200159..02cfc3f80 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,7 +40,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - added new key `projects` to `user` - added new key `projectId` to `folder` and `template-*` collections - new mandatory `/userinfo` value from AAI at login time `sdSubmitProjects` - - user is redirected to `/noproject` if key is empty or missing + - user is redirected to an info page by AAI if key is missing - new mandatory query parameter `projectId` in `GET /folders` - new mandatory JSON key `projectId` in `POST /folders` and `POST /templates` - new endpoint `GET /templates` to replace `GET /users/current` `{"templates":[...]}` diff --git a/metadata_backend/api/auth.py b/metadata_backend/api/auth.py index 4a7c3de33..a8d198f1a 100644 --- a/metadata_backend/api/auth.py +++ b/metadata_backend/api/auth.py @@ -117,12 +117,6 @@ async def callback(self, req: Request) -> Response: LOG.error(f"OIDC Callback failed with: {e}") raise web.HTTPBadRequest(reason="Invalid OIDC callback.") - # If user has no project affiliations, they will be redirected to an instructions page - if not session["userinfo"].get("sdSubmitProjects", ""): - LOG.error("user has no project affiliations") - response = web.HTTPSeeOther(f"{self.redirect}/noproject") - return response - response = web.HTTPSeeOther(f"{self.redirect}/home") cookie, _ = generate_cookie(req) From f9e5b8089ed84750f7db401cdd83ddfa1300c92a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Mar 2022 09:08:06 +0000 Subject: [PATCH 248/336] Bump mypy from 0.931 to 0.940 Bumps [mypy](https://github.com/python/mypy) from 0.931 to 0.940. - [Release notes](https://github.com/python/mypy/releases) - [Commits](https://github.com/python/mypy/compare/v0.931...v0.940) --- updated-dependencies: - dependency-name: mypy dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 2556a46a1..57de5831c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -30,7 +30,7 @@ identify==2.3.6 # via pre-commit mccabe==0.6.1 # via flake8 -mypy==0.931 +mypy==0.940 # via -r requirements-dev.in mypy-extensions==0.4.3 # via From c7e04f9f258eade3723f60b94d9cace4ccf8265c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Mar 2022 12:27:15 +0000 Subject: [PATCH 249/336] Bump actions/setup-python from 2 to 3 Bumps [actions/setup-python](https://github.com/actions/setup-python) from 2 to 3. - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/v2...v3) --- updated-dependencies: - dependency-name: actions/setup-python dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/docs.yml | 2 +- .github/workflows/int.yml | 2 +- .github/workflows/style.yml | 2 +- .github/workflows/unit.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 8933bfb48..d421d356e 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -30,7 +30,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/.github/workflows/int.yml b/.github/workflows/int.yml index 9272f28f3..5e77482ed 100644 --- a/.github/workflows/int.yml +++ b/.github/workflows/int.yml @@ -17,7 +17,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} - name: Install requirements diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index c8ea0eef4..c92937042 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -15,7 +15,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/.github/workflows/unit.yml b/.github/workflows/unit.yml index 35b13193b..b784c5869 100644 --- a/.github/workflows/unit.yml +++ b/.github/workflows/unit.yml @@ -15,7 +15,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} - name: Install libcurl-devel From 8efdd97cf6952c3438614fce9cc81fcc1b801d76 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Mar 2022 13:14:20 +0000 Subject: [PATCH 250/336] Bump actions/upload-artifact from 2.3.1 to 3 Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 2.3.1 to 3. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/v2.3.1...v3) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/int.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/int.yml b/.github/workflows/int.yml index 5e77482ed..9134108a5 100644 --- a/.github/workflows/int.yml +++ b/.github/workflows/int.yml @@ -53,7 +53,7 @@ jobs: - name: Persist log files if: ${{ failure() }} - uses: actions/upload-artifact@v2.3.1 + uses: actions/upload-artifact@v3 with: name: test_debugging_help path: tests @@ -93,7 +93,7 @@ jobs: - name: Persist log files if: ${{ failure() }} - uses: actions/upload-artifact@v2.3.1 + uses: actions/upload-artifact@v3 with: name: test_debugging_help path: tests From d5d88f1579939f6e264e314081a825fe5e9ca451 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Mar 2022 13:23:30 +0000 Subject: [PATCH 251/336] Bump xmlschema from 1.9.2 to 1.10.0 Bumps [xmlschema](https://github.com/sissaschool/xmlschema) from 1.9.2 to 1.10.0. - [Release notes](https://github.com/sissaschool/xmlschema/releases) - [Changelog](https://github.com/sissaschool/xmlschema/blob/master/CHANGELOG.rst) - [Commits](https://github.com/sissaschool/xmlschema/compare/v1.9.2...v1.10.0) --- updated-dependencies: - dependency-name: xmlschema dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index a2661f1aa..ba7a6275a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,7 +29,7 @@ cryptography==36.0.1 # pyopenssl cryptojwt==1.6.1 # via oidcmsg -elementpath==2.4.0 +elementpath==2.5.0 # via xmlschema filelock==3.4.0 # via oidcmsg @@ -93,7 +93,7 @@ urllib3==1.26.7 # responses uvloop==0.16.0 # via -r requirements.in -xmlschema==1.9.2 +xmlschema==1.10.0 # via -r requirements.in yarl==1.7.2 # via aiohttp From a5a7c54970534c10835161b298c94db00b81b7bf Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Mar 2022 13:55:37 +0000 Subject: [PATCH 252/336] Bump pytest from 7.0.1 to 7.1.0 Bumps [pytest](https://github.com/pytest-dev/pytest) from 7.0.1 to 7.1.0. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/7.0.1...7.1.0) --- updated-dependencies: - dependency-name: pytest dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 4507a5ff5..87695f737 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ packages=find_packages(exclude=["tests"]), install_requires=requirements, extras_require={ - "test": ["coverage==6.3.2", "pytest==7.0.1", "pytest-cov==3.0.0", "tox==3.24.5"], + "test": ["coverage==6.3.2", "pytest==7.1.0", "pytest-cov==3.0.0", "tox==3.24.5"], "docs": ["sphinx >= 1.4", "sphinx_rtd_theme==1.0.0"], }, package_data={ From 366d9a8d26c242a6bb638cc03497a4c7507e3657 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Mar 2022 09:11:32 +0000 Subject: [PATCH 253/336] Bump pytest from 7.1.0 to 7.1.1 Bumps [pytest](https://github.com/pytest-dev/pytest) from 7.1.0 to 7.1.1. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/7.1.0...7.1.1) --- updated-dependencies: - dependency-name: pytest dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 87695f737..db47d9278 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ packages=find_packages(exclude=["tests"]), install_requires=requirements, extras_require={ - "test": ["coverage==6.3.2", "pytest==7.1.0", "pytest-cov==3.0.0", "tox==3.24.5"], + "test": ["coverage==6.3.2", "pytest==7.1.1", "pytest-cov==3.0.0", "tox==3.24.5"], "docs": ["sphinx >= 1.4", "sphinx_rtd_theme==1.0.0"], }, package_data={ From e8422cd00252395d7ba14565b7387bb160180eea Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Mar 2022 09:13:06 +0000 Subject: [PATCH 254/336] Bump cryptography from 36.0.1 to 36.0.2 Bumps [cryptography](https://github.com/pyca/cryptography) from 36.0.1 to 36.0.2. - [Release notes](https://github.com/pyca/cryptography/releases) - [Changelog](https://github.com/pyca/cryptography/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pyca/cryptography/compare/36.0.1...36.0.2) --- updated-dependencies: - dependency-name: cryptography dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index ba7a6275a..ea5ceb69b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,7 +22,7 @@ charset-normalizer==2.0.7 # via # aiohttp # requests -cryptography==36.0.1 +cryptography==36.0.2 # via # -r requirements.in # cryptojwt From faee0a99f226ce5ec6161aa2c2c31ce81ec25f2f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Mar 2022 12:16:13 +0000 Subject: [PATCH 255/336] Bump mypy from 0.940 to 0.941 Bumps [mypy](https://github.com/python/mypy) from 0.940 to 0.941. - [Release notes](https://github.com/python/mypy/releases) - [Commits](https://github.com/python/mypy/compare/v0.940...v0.941) --- updated-dependencies: - dependency-name: mypy dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 57de5831c..adf537348 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -30,7 +30,7 @@ identify==2.3.6 # via pre-commit mccabe==0.6.1 # via flake8 -mypy==0.940 +mypy==0.941 # via -r requirements-dev.in mypy-extensions==0.4.3 # via From 0e1ad2c1062a35e496168c0b7bac6703423a9b2e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Mar 2022 09:04:25 +0000 Subject: [PATCH 256/336] Bump mypy from 0.941 to 0.942 Bumps [mypy](https://github.com/python/mypy) from 0.941 to 0.942. - [Release notes](https://github.com/python/mypy/releases) - [Commits](https://github.com/python/mypy/compare/v0.941...v0.942) --- updated-dependencies: - dependency-name: mypy dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index adf537348..ecee0691e 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -30,7 +30,7 @@ identify==2.3.6 # via pre-commit mccabe==0.6.1 # via flake8 -mypy==0.941 +mypy==0.942 # via -r requirements-dev.in mypy-extensions==0.4.3 # via From 9e82411c33b8e7168e7da016edb1c0b025256471 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Thu, 31 Mar 2022 09:20:54 +0000 Subject: [PATCH 257/336] Ignore typing ANY error if tox.ini --- tox.ini | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 3adcb21b1..ebda48ea8 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,8 @@ skipsdist = True [flake8] max-line-length = 120 -ignore = D202, D203,D212,D213,D404,W503,ANN101 +# ANN40 will be fixed with separate PR +ignore = D202, D203, D212, D213, D404, W503, ANN101, ANN401 exclude = .git/, ./env/, ./venv/, ./.tox/, build/, metadata_backend.egg-info/ # Not using type hints in tests, ignore all errors per-file-ignores = From d5885f88854b6cd3ffecd2dbf92fdacb2b6bc960 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Thu, 31 Mar 2022 09:25:09 +0000 Subject: [PATCH 258/336] Update pip requirements files with newest versions --- requirements-dev.txt | 30 +++++++++++++++--------------- requirements.txt | 37 +++++++++++++++++++------------------ 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index ecee0691e..6735075b0 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -6,27 +6,25 @@ # aiofiles==0.8.0 # via -r requirements-dev.in -backports-entry-points-selectable==1.1.1 - # via virtualenv -black==22.1.0 +black==22.3.0 # via -r requirements-dev.in certifi==2021.10.8 # via -r requirements-dev.in cfgv==3.3.1 # via pre-commit -click==8.0.3 +click==8.1.1 # via # black # pip-tools -distlib==0.3.3 +distlib==0.3.4 # via virtualenv -filelock==3.4.0 +filelock==3.6.0 # via # tox # virtualenv flake8==4.0.1 # via -r requirements-dev.in -identify==2.3.6 +identify==2.4.12 # via pre-commit mccabe==0.6.1 # via flake8 @@ -38,7 +36,7 @@ mypy-extensions==0.4.3 # mypy nodeenv==1.6.0 # via pre-commit -packaging==21.2 +packaging==21.3 # via tox pathspec==0.9.0 # via black @@ -46,7 +44,7 @@ pep517==0.12.0 # via pip-tools pip-tools==6.5.1 # via -r requirements-dev.in -platformdirs==2.4.0 +platformdirs==2.5.1 # via # black # virtualenv @@ -60,7 +58,7 @@ pycodestyle==2.8.0 # via flake8 pyflakes==2.4.0 # via flake8 -pyparsing==2.4.7 +pyparsing==3.0.7 # via packaging pyyaml==6.0 # via pre-commit @@ -72,20 +70,22 @@ toml==0.10.2 # via # pre-commit # tox -tomli==1.2.2 +tomli==2.0.1 # via # black # mypy # pep517 tox==3.24.5 # via -r requirements-dev.in -typing-extensions==4.0.0 - # via mypy -virtualenv==20.10.0 +typing-extensions==4.1.1 + # via + # black + # mypy +virtualenv==20.14.0 # via # pre-commit # tox -wheel==0.37.0 +wheel==0.37.1 # via pip-tools # The following packages are considered to be unsafe in a requirements file: diff --git a/requirements.txt b/requirements.txt index ea5ceb69b..7efab8023 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,15 +2,15 @@ # This file is autogenerated by pip-compile with python 3.8 # To update, run: # -# pip-compile +# pip-compile requirements.in # aiohttp==3.8.1 # via -r requirements.in aiosignal==1.2.0 # via aiohttp -async-timeout==4.0.1 +async-timeout==4.0.2 # via aiohttp -attrs==21.2.0 +attrs==21.4.0 # via # aiohttp # jsonschema @@ -18,7 +18,7 @@ certifi==2021.10.8 # via requests cffi==1.15.0 # via cryptography -charset-normalizer==2.0.7 +charset-normalizer==2.0.12 # via # aiohttp # requests @@ -31,9 +31,9 @@ cryptojwt==1.6.1 # via oidcmsg elementpath==2.5.0 # via xmlschema -filelock==3.4.0 +filelock==3.6.0 # via oidcmsg -frozenlist==1.2.0 +frozenlist==1.3.0 # via # aiohttp # aiosignal @@ -43,11 +43,13 @@ idna==3.3 # via # requests # yarl +importlib-resources==5.6.0 + # via jsonschema jsonschema==4.4.0 # via -r requirements.in motor==2.5.1 # via -r requirements.in -multidict==5.2.0 +multidict==6.0.2 # via # aiohttp # yarl @@ -57,11 +59,11 @@ oidcrp==2.1.4 # via -r requirements.in pycparser==2.21 # via cffi -pymongo==3.12.1 +pymongo==3.12.3 # via motor -pyopenssl==21.0.0 +pyopenssl==22.0.0 # via oidcmsg -pyrsistent==0.18.0 +pyrsistent==0.18.1 # via jsonschema python-dateutil==2.8.2 # via -r requirements.in @@ -76,18 +78,15 @@ requests==2.27.1 # -r requirements.in # cryptojwt # responses -responses==0.16.0 +responses==0.20.0 # via oidcrp six==1.16.0 - # via - # pyopenssl - # python-dateutil - # responses -typing-extensions==4.0.0 - # via async-timeout + # via python-dateutil +typing-extensions==4.1.1 + # via readerwriterlock ujson==5.1.0 # via -r requirements.in -urllib3==1.26.7 +urllib3==1.26.9 # via # requests # responses @@ -97,6 +96,8 @@ xmlschema==1.10.0 # via -r requirements.in yarl==1.7.2 # via aiohttp +zipp==3.7.0 + # via importlib-resources # The following packages are considered to be unsafe in a requirements file: # setuptools From 61dfa3cf946c6acf6e5290e43e4d29069153dbb2 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 5 Jan 2022 11:16:28 +0000 Subject: [PATCH 259/336] Add metax mock api. Add metax env vars configuration and create container for metax mock api. --- .env.example | 5 + README.md | 2 +- docker-compose-tls.yml | 16 +++ docker-compose.yml | 16 +++ metadata_backend/conf/conf.py | 10 +- tests/integration/mock_metax_api.py | 216 ++++++++++++++++++++++++++++ 6 files changed, 262 insertions(+), 3 deletions(-) create mode 100644 tests/integration/mock_metax_api.py diff --git a/.env.example b/.env.example index e54037e01..ae6980910 100644 --- a/.env.example +++ b/.env.example @@ -31,3 +31,8 @@ DOI_API=http://mockdoi:8001/dois DOI_PREFIX=10.xxxx DOI_USER=user DOI_KEY=key + +# metax +METAX_USER=sd +METAX_PASS=demo_pass +METAX_URL=http://mockmetax:8002 diff --git a/README.md b/README.md index 4705c787e..96fbbfd5b 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ Server can then be found from `http://localhost:5430`. - Suitable mongodb instance can be launched with Docker by running `docker-compose up database` - After installing and setting up database, server can be launched with `metadata_submitter` -If you also need frontend for development, check out [frontend repository](https://github.com/CSCfi/metadata-submitter-frontend/). +If you also need frontend for development, check out [frontend repository](https://github.com/CSCfi/metadata-submitter-frontend/). You will also need to uncomment `REDIRECT_URL` environment variable from .env file. ## Tests diff --git a/docker-compose-tls.yml b/docker-compose-tls.yml index f669dfe8a..f0a355346 100644 --- a/docker-compose-tls.yml +++ b/docker-compose-tls.yml @@ -34,6 +34,9 @@ services: - "DOI_PREFIX=${DOI_PREFIX}" - "DOI_USER=${DOI_USER}" - "DOI_KEY=${DOI_KEY}" + - "METAX_USER=${METAX_USER}" + - "METAX_PASS=${METAX_PASS}" + - "METAX_URL=${METAX_URL}" database: image: "mongo" container_name: "metadata_submitter_database_dev" @@ -80,5 +83,18 @@ services: volumes: - ./tests/integration/mock_doi_api.py:/mock_doi_api.py entrypoint: ["python", "/mock_doi_api.py", "0.0.0.0", "8001"] + mockmetax: + build: + dockerfile: Dockerfile-dev + context: . + image: cscfi/metadata-submitter-dev + hostname: mockmetax + expose: + - 8002 + ports: + - 8002:8002 + volumes: + - ./tests/integration/mock_metax_api.py:/mock_metax_api.py + entrypoint: ["python", "/mock_metax_api.py", "0.0.0.0", "8002"] volumes: data: diff --git a/docker-compose.yml b/docker-compose.yml index 739a11e06..62f97f039 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -29,6 +29,9 @@ services: - "DOI_PREFIX=${DOI_PREFIX}" - "DOI_USER=${DOI_USER}" - "DOI_KEY=${DOI_KEY}" + - "METAX_USER=${METAX_USER}" + - "METAX_PASS=${METAX_PASS}" + - "METAX_URL=${METAX_URL}" database: image: "mongo" container_name: "metadata_submitter_database_dev" @@ -75,5 +78,18 @@ services: volumes: - ./tests/integration/mock_doi_api.py:/mock_doi_api.py entrypoint: [ "python", "/mock_doi_api.py", "0.0.0.0", "8001" ] + mockmetax: + build: + dockerfile: Dockerfile-dev + context: . + image: cscfi/metadata-submitter-dev + hostname: mockmetax + expose: + - 8002 + ports: + - 8002:8002 + volumes: + - ./tests/integration/mock_metax_api.py:/mock_metax_api.py + entrypoint: ["python", "/mock_metax_api.py", "0.0.0.0", "8002"] volumes: data: diff --git a/metadata_backend/conf/conf.py b/metadata_backend/conf/conf.py index 2b33d245f..6e89812c2 100644 --- a/metadata_backend/conf/conf.py +++ b/metadata_backend/conf/conf.py @@ -31,12 +31,12 @@ and inserted here in projects Dockerfile. """ -import ujson import os -from pathlib import Path from distutils.util import strtobool +from pathlib import Path from typing import Tuple +import ujson from motor.motor_asyncio import AsyncIOMotorClient from ..helpers.logger import LOG @@ -159,3 +159,9 @@ def create_db_client() -> AsyncIOMotorClient: doi_key = os.getenv("DOI_KEY", "") datacite_url = os.getenv("DATACITE_URL", "https://doi.org") publisher = "CSC - IT Center for Science" + +metax_config = { + "username": os.getenv("METAX_USER", "sd"), + "password": os.getenv("METAX_PASS", "test"), + "url": os.getenv("METAX_URL", "http://mockmetax:8002"), +} diff --git a/tests/integration/mock_metax_api.py b/tests/integration/mock_metax_api.py new file mode 100644 index 000000000..f740bd9c4 --- /dev/null +++ b/tests/integration/mock_metax_api.py @@ -0,0 +1,216 @@ +"""Mock aiohttp.web server for Metax API calls.""" + +import json +import logging +import os +from datetime import datetime +from uuid import uuid4 + +import ujson +from aiohttp import web + +FORMAT = "[%(asctime)s][%(levelname)-8s](L:%(lineno)s) %(funcName)s: %(message)s" +logging.basicConfig(format=FORMAT, datefmt="%Y-%m-%d %H:%M:%S") + +LOG = logging.getLogger("server") +LOG.setLevel(os.getenv("LOG_LEVEL", "DEBUG")) + +# Example error responds from Metax +# { +# "detail": [ +# "Specified organization object does not have a name. If you are using an org identifier from reference data, \ +# then the name will be populated automatically. If your org identifier is not from reference data, \ +# you must provide the organization name. The object that caused the error: {'@type': 'Organization'}" +# ], +# "error_identifier": "2022-01-21T10:27:02-02ad2e36", +# } +# { +# "detail": "[ErrorDetail(string=\"'creator' is a required property., code='invalid')]", +# "error_identifier": "2022-01-21T10:27:02-02ad2e36", +# } + +# mimic db for saved datasets, volatile!! +drafts = {} +published = {} + + +async def post_dataset(req: web.Request) -> web.Response: + """Mock endpoint for creating Metax dataset. + + :params req: HTTP request with data for Metax dataset + :return: HTTP response with mocked Metax dataset data + """ + LOG.info("Creating Metax dataset") + content = await validate_payload(req) + metax_id = str(uuid4()) + metax_additions = { + "identifier": metax_id, + "preservation_state": 0, + "state": "draft", + "use_doi_for_published": False, + "cumulative_state": 0, + "api_meta": {"version": 2}, + "date_created": f"{datetime.now()}", + "service_created": "sd", + "removed": False, + } + resp_data = dict(content, **metax_additions) + drafts[metax_id] = resp_data + LOG.info(f'Created Metax dataset with identifier {resp_data["identifier"]}') + return web.Response( + body=ujson.dumps(resp_data, escape_forward_slashes=False), + status=201, + content_type="application/json", + ) + + +async def update_dataset(req: web.Request) -> web.Response: + """Mock endpoint for updating Metax dataset. + + :params req: HTTP request with data for Metax dataset + :return: HTTP response with mocked Metax dataset data + """ + LOG.info("Updating Metax dataset") + metax_id = req.match_info["metax_id"] + if not metax_id: + raise web.HTTPBadRequest( + reason={ + "detail": ["Query params missing Metax ID."], + "error_identifier": datetime.now(), + } + ) + if metax_id not in drafts.keys(): + LOG.error(f"No dataset found with identifier {metax_id}") + raise web.HTTPNotFound(reason={"detail": "Not found."}) + + content = await validate_payload(req) + + for key, value in content.items(): + drafts[metax_id][key] = value + + drafts[metax_id]["date_modified"] = str(datetime.now()) + + LOG.info(f'Updated Metax dataset with identifier {drafts[metax_id]["identifier"]}') + return web.Response( + body=ujson.dumps(drafts[metax_id], escape_forward_slashes=False), + status=200, + content_type="application/json", + ) + + +async def publish_dataset(req: web.Request) -> web.Response: + """Mock endpoint for publishing Metax dataset. + + :params req: HTTP request with data for Metax dataset + :return: HTTP response with mocked Metax dataset data + """ + LOG.info("Publishing Metax dataset") + metax_id = req.query.get("identifier", None) + if not metax_id: + LOG.error("Query params missing Metax ID.") + raise web.HTTPBadRequest( + reason={ + "detail": ["Query params missing Metax ID."], + "error_identifier": datetime.now(), + } + ) + if metax_id in published: + LOG.error(f"Dataset {metax_id} is already published.") + reason = {"detail": ["Dataset is already published."], "error_identifier": datetime.now()} + raise web.HTTPBadRequest(reason=reason) + if metax_id not in drafts.keys(): + LOG.error(f"No dataset found with identifier {metax_id}") + raise web.HTTPNotFound(reason={"detail": "Not found."}) + + data = drafts[metax_id] + published[metax_id] = data + del drafts[metax_id] + published[metax_id]["state"] = "published" + published[metax_id]["modified"] = str(datetime.now()) + LOG.info(f"Published Metax dataset with identifier {metax_id}") + return web.Response( + body=ujson.dumps( + {"preferred_identifier": data["research_dataset"]["preferred_identifier"]}, escape_forward_slashes=False + ), + status=200, + content_type="application/json", + ) + + +async def delete_dataset(req: web.Request) -> web.Response: + """Mock endpoint for deleting Metax dataset. + + :params req: HTTP request with Metax dataset id + :return: HTTP response with HTTP status + """ + LOG.info("Deleting Metax dataset") + metax_id = req.match_info["metax_id"] + if not metax_id: + raise web.HTTPBadRequest( + reason={ + "detail": ["Query params missing Metax ID."], + "error_identifier": datetime.now(), + } + ) + if metax_id not in drafts.keys(): + raise web.HTTPNotFound(reason={"detail": "Not found."}) + else: + del drafts[metax_id] + LOG.info(f"Deleted Metax dataset with identifier {metax_id}") + return web.Response(status=204) + + +async def validate_payload(req: web.Request, draft=True) -> dict: + """Check for required fields in dataset. + + :param req: HTTP Request with data for dataset creation + :param draft: Indicator if dataset needs to be validated as draft or not; default true + """ + LOG.info("Validating payload") + try: + content = await req.json() + except json.decoder.JSONDecodeError as e: + reason = f"JSON is not correctly formatted. See: {e}" + LOG.error(f"Error while validating payload: {reason}") + raise web.HTTPBadRequest( + reason={ + "detail": reason, + "error_identifier": datetime.now(), + } + ) + + required = ["data_catalog", "metadata_provider_org", "metadata_provider_user", "research_dataset"] + rd_required = ["title", "description", "preferred_identifier", "access_rights", "publisher"] + + if not draft: + rd_required = rd_required + ["creator"] + + if not all(key in content.keys() for key in required): + reason = {"detail": [f"Dataset did not include all required fields: {', '.join(required)}."]} + reason = json.dumps(reason) + LOG.error(f"Error while validating payload: {reason}") + raise web.HTTPBadRequest(reason=reason, content_type="application/json") + if not all(key in content["research_dataset"].keys() for key in rd_required): + reason = {"detail": [f"Research dataset did not include all required fields: {', '.join(rd_required)}."]} + reason = json.dumps(reason) + LOG.error(f"Error while validating payload: {reason}") + raise web.HTTPBadRequest(reason=reason, content_type="application/json") + return content + + +def init() -> web.Application: + """Start server.""" + app = web.Application() + api_routes = [ + web.post("/rest/v2/datasets", post_dataset), + web.put("/rest/v2/datasets/{metax_id}", update_dataset), + web.delete("/rest/v2/datasets/{metax_id}", delete_dataset), + web.post("/rpc/v2/datasets/publish_dataset", publish_dataset), + ] + app.router.add_routes(api_routes) + LOG.info("Metax mock API started") + return app + + +if __name__ == "__main__": + web.run_app(init(), port=8002) From e5f745bc21633201001d7dd475ace7cd7a24e5a3 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 19 Jan 2022 12:00:32 +0000 Subject: [PATCH 260/336] Add functionality for creating dataset to Metax On succesful study or dataset object creation handle request to Metax api to create draft dataset: - Create handler for metax api calls. - Update object handler to use metax api handler. - Create temporay functionality for adding doi id to object. --- metadata_backend/api/handlers/object.py | 42 ++++++ metadata_backend/api/metax_api_handler.py | 160 ++++++++++++++++++++++ tests/test_handlers.py | 1 + 3 files changed, 203 insertions(+) create mode 100644 metadata_backend/api/metax_api_handler.py diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 8a89eb0fe..5f81970fb 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -9,6 +9,7 @@ from ...helpers.logger import LOG from ...helpers.validator import JSONValidator +from ..metax_api_handler import MetaxServiceHandler from ..operators import FolderOperator, Operator, XMLOperator from .common import multipart_content from .restapi import RESTAPIHandler @@ -146,6 +147,7 @@ async def post_object(self, req: Request) -> Response: accession_id, title = await operator.create_metadata_object(collection, item[0]) ids.append({"accessionId": accession_id, "title": title}) LOG.info(f"POST object with accesssion ID {accession_id} in schema {collection} was successful.") + # we format like this to make it consistent with the response from /submit endpoint data = [dict({"accessionId": item["accessionId"]}, **{"schema": schema_type}) for item in ids] # we take the first result if we get multiple @@ -163,6 +165,10 @@ async def post_object(self, req: Request) -> Response: patch = await self.prepare_folder_patch_new_object(collection, ids, patch_params) await folder_op.update_folder(folder_id, patch) + # Create draft dataset to Metax catalog + if collection in {"study", "dataset"}: + [await self.create_metax_dataset(req, collection, item["accessionId"]) for item in ids] + body = ujson.dumps(data, escape_forward_slashes=False) return web.Response( @@ -172,6 +178,42 @@ async def post_object(self, req: Request) -> Response: content_type="application/json", ) + # TODO: update doi related code + async def create_metax_dataset(self, req: Request, collection: str, accession_id: str) -> str: + """Handle connection to Metax api handler and returned data. + + Sends Dataset or Study object's data to Metax api handler. Updates object with returned metax ID to database. + Object's data has to be fetched first from db in case of XML input. + Has temporary DOI fetching, will be chaged with real data. + + :param req: HTTP request + :param collection: object's schema + :param accession_id: object's accession ID + :returns: Metax ID + """ + metax_service = MetaxServiceHandler(req) + operator = Operator(req.app["db_client"]) + object_data, _ = await operator.read_metadata_object(collection, accession_id) + if isinstance(object_data, Dict): + object_data["doi"] = await self.create_doi() + metax_id = await metax_service.post_dataset_as_draft(collection, object_data) + else: + raise ValueError("Object's data must be dictionary") + new_info = {"doi": object_data["doi"], "metaxIdentifier": {"identifier": metax_id, "status": "draft"}} + accession_id = await operator.update_metadata_object(collection, accession_id, new_info) + return metax_id + + # TODO: to be replaced with real doi fetching + async def create_doi(self) -> str: + """Temporary function for random DOI creation. + + :returns: Temporary DOI string + """ + from uuid import uuid4 + + rand = str(uuid4()).split("-")[1:3] + return f"10.{rand[0]}/{rand[1]}" + async def query_objects(self, req: Request) -> Response: """Query metadata objects from database. diff --git a/metadata_backend/api/metax_api_handler.py b/metadata_backend/api/metax_api_handler.py new file mode 100644 index 000000000..c4742c976 --- /dev/null +++ b/metadata_backend/api/metax_api_handler.py @@ -0,0 +1,160 @@ +"""Class for handling calls to METAX API.""" +import os +from typing import Any, Dict + +import aiohttp +from aiohttp.web import HTTPBadRequest, HTTPError, HTTPForbidden, HTTPNotFound, Request + +from ..helpers.logger import LOG +from .middlewares import get_session +from .operators import UserOperator + + +class MetaxServiceHandler: + """API handler for uploading submitter's metadata to METAX service.""" + + def __init__(self, req: Request) -> None: + """Define variables and paths. + + Define variables and paths used for connecting to Metax API and + default inputs for Metax Dataset creation. + + :param req: HTTP request from calling service + """ + self.req = req + self.db_client = self.req.app["db_client"] + + self.username = os.getenv("METAX_USER", "sd") + self.password = os.getenv("METAX_PASS", "test") + self.metax_url = os.getenv("METAX_URL", "http://mockmetax:8002") + self.rest_route = "/rest/v2/datasets" + self.publish_route = "/rpc/v2/datasets/publish_dataset" + + catalog_pid = "urn:nbn:fi:att:data-catalog-sd" + + self.minimal_dataset_template: Dict[Any, Any] = { + "data_catalog": catalog_pid, + "metadata_provider_org": "csc.fi", + "research_dataset": { + # submitter given DOI + "preferred_identifier": "", + "title": {"en": ""}, + # study abstract or dataset description + "description": {"en": ""}, + # default + "access_rights": { + "access_type": {"identifier": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted"}, + }, + # default + "publisher": { + "@type": "Organization", + "name": { + "en": "CSC Sensitive Data Services for Research", + "fi": "CSC:n Arkaluonteisen datan palveluiden aineistokatalogi", + }, + }, + }, + } + + # TODO + def authenticate(self) -> None: + """Handle authentication to Metax.""" + pass + + async def get_metadata_provider_user(self) -> str: + """Get current user's external id. + + returns: current users external ID + """ + current_user = get_session(self.req)["user_info"] + user_op = UserOperator(self.db_client) + user = await user_op.read_user(current_user) + metadata_provider_user = user["externalId"] + return metadata_provider_user + + async def post_dataset_as_draft(self, collection: str, data: Dict) -> str: + """Send draft dataset to Metax. + + Construct Metax dataset data from submitters' Study or Dataset and + send it as new draft dataset to Metax Dataset API. + + :param collection: schema of incomming submitters metadata + :param data: validated Study or Dataset data dict + :raises: HTTPError depending on returned error from Metax + :returns: Metax ID for dataset returned by Metax API + """ + metax_dataset = self.minimal_dataset_template + metax_dataset["metadata_provider_user"] = await self.get_metadata_provider_user() + if collection == "dataset": + dataset_data = await self.create_metax_dataset_data_from_dataset(data) + else: + dataset_data = await self.create_metax_dataset_data_from_study(data) + metax_dataset["research_dataset"] = dataset_data + LOG.debug( + f"Creating draft dataset to Metax service from Submitter {collection} with accession ID " + f"{data['accessionId']}" + ) + async with aiohttp.ClientSession() as sess: + resp = await sess.post( + f"{self.metax_url}{self.rest_route}", + params="draft", + json=metax_dataset, + auth=aiohttp.BasicAuth(self.username, self.password), + ) + status = resp.status + if status == 201: + metax_data = await resp.json() + LOG.debug( + f"Created Metax draft dataset {metax_data['identifier']} from Submitter {collection} " + f"{data['accessionId']} with data: {metax_dataset}." + ) + return metax_data["identifier"] + else: + # TODO: how front end should react on this?? + reason = await resp.text() + raise self.process_error(status, reason) + + async def create_metax_dataset_data_from_study(self, data: Dict) -> Dict: + """Construct Metax dataset's research dataset dictionary from Submitters Study. + + :param data: Study data + :returns: constructed research dataset + """ + research_dataset = self.minimal_dataset_template["research_dataset"] + + research_dataset["preferred_identifier"] = data["doi"] + research_dataset["title"]["en"] = data["descriptor"]["studyTitle"] + research_dataset["description"]["en"] = data["descriptor"]["studyAbstract"] + LOG.debug(f"Created Metax dataset from Study with data: {research_dataset}") + return research_dataset + + async def create_metax_dataset_data_from_dataset(self, data: Dict) -> Dict: + """Construct Metax dataset's research dataset dictionary from Submitters Dataset. + + :param data: Dataset data + :returns: constructed research dataset + """ + research_dataset = self.minimal_dataset_template["research_dataset"] + research_dataset["preferred_identifier"] = data["doi"] + research_dataset["title"]["en"] = data["title"] + research_dataset["description"]["en"] = data["description"] + LOG.debug(f"Created Metax dataset from Dataset with data: {research_dataset}") + return research_dataset + + # we dont know exactly what is comming from Metax so we try it all + def process_error(self, status: int, resp_json: str) -> HTTPError: + """Construct Metax dataset's research dataset dictionary from Submitters Dataset. + + :param status: Status code of the HTTP exception + :param resp_json: Response mesage for returning exeption + :returns: HTTP error depending on incomming status + """ + LOG.error(resp_json) + if status == 400: + return HTTPBadRequest(reason=resp_json) + if status == 403: + return HTTPForbidden(reason=resp_json) + if status == 404: + return HTTPNotFound(reason=resp_json) + else: + return HTTPError(reason=resp_json) diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 801a5e515..5cb57932d 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -5,6 +5,7 @@ from aiohttp import FormData from aiohttp.test_utils import AioHTTPTestCase, make_mocked_coro +from metadata_backend.api.handlers.object import ObjectAPIHandler from metadata_backend.api.handlers.restapi import RESTAPIHandler from metadata_backend.api.middlewares import generate_cookie from metadata_backend.server import init From 07623958341106e25d3d035ca4f664e5fda06ae8 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Thu, 20 Jan 2022 13:45:34 +0000 Subject: [PATCH 261/336] Add functionality for updating dataset to Metax. Adds metaxIdentifier to forbidden keys for db object update operations. --- metadata_backend/api/handlers/object.py | 88 +++++++++++++---------- metadata_backend/api/metax_api_handler.py | 38 ++++++++++ metadata_backend/api/operators.py | 18 +++++ tests/test_operators.py | 13 +++- 4 files changed, 118 insertions(+), 39 deletions(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 5f81970fb..54d7797f0 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -167,7 +167,7 @@ async def post_object(self, req: Request) -> Response: # Create draft dataset to Metax catalog if collection in {"study", "dataset"}: - [await self.create_metax_dataset(req, collection, item["accessionId"]) for item in ids] + [await self.create_or_update_metax_dataset(req, collection, item["accessionId"]) for item in ids] body = ujson.dumps(data, escape_forward_slashes=False) @@ -178,42 +178,6 @@ async def post_object(self, req: Request) -> Response: content_type="application/json", ) - # TODO: update doi related code - async def create_metax_dataset(self, req: Request, collection: str, accession_id: str) -> str: - """Handle connection to Metax api handler and returned data. - - Sends Dataset or Study object's data to Metax api handler. Updates object with returned metax ID to database. - Object's data has to be fetched first from db in case of XML input. - Has temporary DOI fetching, will be chaged with real data. - - :param req: HTTP request - :param collection: object's schema - :param accession_id: object's accession ID - :returns: Metax ID - """ - metax_service = MetaxServiceHandler(req) - operator = Operator(req.app["db_client"]) - object_data, _ = await operator.read_metadata_object(collection, accession_id) - if isinstance(object_data, Dict): - object_data["doi"] = await self.create_doi() - metax_id = await metax_service.post_dataset_as_draft(collection, object_data) - else: - raise ValueError("Object's data must be dictionary") - new_info = {"doi": object_data["doi"], "metaxIdentifier": {"identifier": metax_id, "status": "draft"}} - accession_id = await operator.update_metadata_object(collection, accession_id, new_info) - return metax_id - - # TODO: to be replaced with real doi fetching - async def create_doi(self) -> str: - """Temporary function for random DOI creation. - - :returns: Temporary DOI string - """ - from uuid import uuid4 - - rand = str(uuid4()).split("-")[1:3] - return f"10.{rand[0]}/{rand[1]}" - async def query_objects(self, req: Request) -> Response: """Query metadata objects from database. @@ -308,6 +272,10 @@ async def put_object(self, req: Request) -> Response: patch = await self.prepare_folder_patch_update_object(collection, accession_id, title, filename) await folder_op.update_folder(folder_id, patch) + # Update draft dataset to Metax catalog + if collection in {"study", "dataset"}: + await self.create_or_update_metax_dataset(req, collection, accession_id) + body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) LOG.info(f"PUT object with accession ID {accession_id} in schema {collection} was successful.") return web.Response(body=body, status=200, content_type="application/json") @@ -357,6 +325,10 @@ async def patch_object(self, req: Request) -> Response: except (TypeError, KeyError): pass + # Update draft dataset to Metax catalog + if collection in {"study", "dataset"}: + await self.create_or_update_metax_dataset(req, collection, accession_id) + body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) LOG.info(f"PATCH object with accession ID {accession_id} in schema {collection} was successful.") return web.Response(body=body, status=200, content_type="application/json") @@ -433,3 +405,45 @@ async def prepare_folder_patch_update_object( } ) return [patch_op] + + # TODO: update doi related code + async def create_or_update_metax_dataset(self, req: Request, collection: str, accession_id: str) -> str: + """Handle connection to Metax api handler. + + Sends Dataset or Study object's data to Metax api handler. + If creating new dataset, object is updated with returned metax ID to database. + Object's data has to be fetched first from db in case of XML data in request. + Has temporary DOI fetching, will be chaged with real data. + + :param req: HTTP request + :param collection: object's schema + :param accession_id: object's accession ID + :returns: Metax ID + """ + metax_service = MetaxServiceHandler(req) + operator = Operator(req.app["db_client"]) + object_data, _ = await operator.read_metadata_object(collection, accession_id) + if isinstance(object_data, Dict): + if object_data.get("metaxIdentifier", None): + LOG.info("Updating draft dataset to Metax.") + metax_id = await metax_service.update_draft_dataset(collection, object_data) + else: + LOG.info("Creating draft dataset to Metax.") + object_data["doi"] = await self.create_doi() + metax_id = await metax_service.post_dataset_as_draft(collection, object_data) + new_info = {"doi": object_data["doi"], "metaxIdentifier": {"identifier": metax_id, "status": "draft"}} + accession_id = await operator.update_metadata_object(collection, accession_id, new_info) + else: + raise ValueError("Object's data must be dictionary") + return metax_id + + # TODO: to be replaced with real doi fetching + async def create_doi(self) -> str: + """Temporary function for random DOI creation. + + :returns: Temporary DOI string + """ + from uuid import uuid4 + + rand = str(uuid4()).split("-")[1:3] + return f"10.{rand[0]}/{rand[1]}" diff --git a/metadata_backend/api/metax_api_handler.py b/metadata_backend/api/metax_api_handler.py index c4742c976..368a556a7 100644 --- a/metadata_backend/api/metax_api_handler.py +++ b/metadata_backend/api/metax_api_handler.py @@ -114,6 +114,44 @@ async def post_dataset_as_draft(self, collection: str, data: Dict) -> str: reason = await resp.text() raise self.process_error(status, reason) + async def update_draft_dataset(self, collection: str, data: Dict) -> str: + """Update draft dataset to Metax. + + Construct Metax draft dataset data from submitters' Study or Dataset and + send it to Metax Dataset API for update. + + :param collection: schema of incomming submitters metadata + :param data: validated Study or Dataset data dict + :raises: HTTPError depending on returned error from Metax + :returns: Metax ID for dataset returned by Metax API + """ + metax_dataset = self.minimal_dataset_template + # TODO: should this be changed if person updating data is different from data creator? + metax_dataset["metadata_provider_user"] = await self.get_metadata_provider_user() + if collection == "dataset": + dataset_data = await self.create_metax_dataset_data_from_dataset(data) + else: + dataset_data = await self.create_metax_dataset_data_from_study(data) + metax_dataset["research_dataset"] = dataset_data + LOG.info(f"Sending updated {collection} object data to Metax service.") + + async with aiohttp.ClientSession() as sess: + resp = await sess.put( + f'{self.metax_url}{self.rest_route}/{data["metaxIdentifier"]["identifier"]}', + params="draft", + json=metax_dataset, + auth=aiohttp.BasicAuth(self.username, self.password), + ) + status = resp.status + if status == 200: + metax_data = await resp.json() + LOG.info(f"Updated Metax draft dataset with ID {metax_data['identifier']} with data: {metax_data}") + return metax_data["identifier"] + else: + # TODO: how front end should react on this?? + reason = await resp.text() + raise self.process_error(status, reason) + async def create_metax_dataset_data_from_study(self, data: Dict) -> Dict: """Construct Metax dataset's research dataset dictionary from Submitters Study. diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 57d1e427c..34d3debcd 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -482,12 +482,24 @@ async def _format_data_to_replace_and_add_to_db( :returns: Accession Id for object inserted to database """ forbidden_keys = ["accessionId", "publishDate", "dateCreated"] + # when replacing from xml file there are no metax data in content data + # therefore we need to check if the object already exists in database and has metax id + if schema_type in {"study", "dataset"}: + read_data = await self.db_service.read(schema_type, accession_id) + if read_data.get("metaxIdentifier", None): + forbidden_keys.extend(["metaxIdentifier"]) if any(i in data for i in forbidden_keys): reason = f"Some items (e.g: {', '.join(forbidden_keys)}) cannot be changed." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) data["accessionId"] = accession_id data["dateModified"] = datetime.utcnow() + if schema_type in {"study", "dataset"}: + try: + data["metaxIdentifier"] = read_data["metaxIdentifier"] + data["doi"] = read_data["doi"] + except KeyError: + pass LOG.debug(f"Operator formatted data for {schema_type} to add to DB") return await self._replace_object_from_db(schema_type, accession_id, data) @@ -500,12 +512,18 @@ async def _format_data_to_update_and_add_to_db(self, schema_type: str, accession :returns: Accession Id for object inserted to database """ forbidden_keys = ["accessionId", "publishDate", "dateCreated"] + # check if object already has metax id or is it first time writing it + if schema_type in {"study", "dataset"}: + read_data = await self.db_service.read(schema_type, accession_id) + if read_data.get("metaxIdentifier", None): + forbidden_keys.extend(["metaxIdentifier"]) if any(i in data for i in forbidden_keys): reason = f"Some items (e.g: {', '.join(forbidden_keys)}) cannot be changed." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) data["accessionId"] = accession_id data["dateModified"] = datetime.utcnow() + LOG.debug(f"Operator formatted data for {schema_type} to add to DB") return await self._update_object_from_db(schema_type, accession_id, data) diff --git a/tests/test_operators.py b/tests/test_operators.py index 4a0741720..054d57290 100644 --- a/tests/test_operators.py +++ b/tests/test_operators.py @@ -310,7 +310,7 @@ async def test_correct_data_is_set_to_json_when_creating(self): ) self.assertEqual(acc, self.accession_id) - async def test_wront_data_is_set_to_json_when_replacing(self): + async def test_wrong_data_is_set_to_json_when_replacing(self): """Test operator replace catches error.""" operator = Operator(self.client) with patch("metadata_backend.api.operators.Operator._replace_object_from_db", return_value=self.accession_id): @@ -338,11 +338,20 @@ async def test_correct_data_is_set_to_json_when_replacing(self): ) as mocked_insert: with patch("metadata_backend.api.operators.datetime") as m_date: m_date.utcnow.return_value = datetime.datetime(2020, 4, 14) + self.MockedDbService().read.return_value = { + "accessionId": self.accession_id, + "dateModified": datetime.datetime(2020, 4, 14), + "metaxIdentifier": {"identifier": 12345}, + } acc = await (operator._format_data_to_replace_and_add_to_db("study", self.accession_id, {})) mocked_insert.assert_called_once_with( "study", self.accession_id, - {"accessionId": self.accession_id, "dateModified": datetime.datetime(2020, 4, 14)}, + { + "accessionId": self.accession_id, + "dateModified": datetime.datetime(2020, 4, 14), + "metaxIdentifier": {"identifier": 12345}, + }, ) self.assertEqual(acc, self.accession_id) From f54acf1b5b46813e16908fc8d1fac5ce27bebead Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 16 Feb 2022 12:40:40 +0000 Subject: [PATCH 262/336] Delete object from Metax service --- metadata_backend/api/handlers/object.py | 30 +++++++++++++++++++++-- metadata_backend/api/metax_api_handler.py | 18 ++++++++++++++ tests/test_handlers.py | 4 ++- 3 files changed, 49 insertions(+), 3 deletions(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 54d7797f0..3886f7e0d 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -203,7 +203,8 @@ async def delete_object(self, req: Request) -> Response: accession_id = req.match_info["accessionId"] db_client = req.app["db_client"] - await Operator(db_client).check_exists(collection, accession_id) + operator = Operator(db_client) + await operator.check_exists(collection, accession_id) await self._handle_check_ownership(req, collection, accession_id) @@ -220,7 +221,21 @@ async def delete_object(self, req: Request) -> Response: LOG.error(reason) raise web.HTTPUnprocessableEntity(reason=reason) - accession_id = await Operator(db_client).delete_metadata_object(collection, accession_id) + metax_id: str = "" + if collection in {"study", "dataset"}: + try: + object_data, _ = await operator.read_metadata_object(collection, accession_id) + # MYPY related if statement, Operator (when not XMLOperator) always returns object_data as dict + if isinstance(object_data, dict): + metax_id = object_data["metaxIdentifier"]["identifier"] + except KeyError: + LOG.warning(f"MetadataObject {collection} {accession_id} was never added to Metax service.") + + accession_id = await operator.delete_metadata_object(collection, accession_id) + + # Delete draft dataset from Metax catalog + if collection in {"study", "dataset"}: + await self.delete_metax_dataset(req, metax_id) LOG.info(f"DELETE object with accession ID {accession_id} in schema {collection} was successful.") return web.Response(status=204) @@ -423,6 +438,7 @@ async def create_or_update_metax_dataset(self, req: Request, collection: str, ac metax_service = MetaxServiceHandler(req) operator = Operator(req.app["db_client"]) object_data, _ = await operator.read_metadata_object(collection, accession_id) + # MYPY related if statement, Operator (when not XMLOperator) always returns object_data as dict if isinstance(object_data, Dict): if object_data.get("metaxIdentifier", None): LOG.info("Updating draft dataset to Metax.") @@ -437,6 +453,16 @@ async def create_or_update_metax_dataset(self, req: Request, collection: str, ac raise ValueError("Object's data must be dictionary") return metax_id + async def delete_metax_dataset(self, req: Request, metax_id: str) -> None: + """Handle deletion of Study or Dataset object from Metax service. + + :param req: HTTP request + :param metax_id: object's Metax ID + :returns: True if request succeded, else raises error + """ + metax_service = MetaxServiceHandler(req) + await metax_service.delete_draft_dataset(metax_id) + # TODO: to be replaced with real doi fetching async def create_doi(self) -> str: """Temporary function for random DOI creation. diff --git a/metadata_backend/api/metax_api_handler.py b/metadata_backend/api/metax_api_handler.py index 368a556a7..d579b407d 100644 --- a/metadata_backend/api/metax_api_handler.py +++ b/metadata_backend/api/metax_api_handler.py @@ -152,6 +152,24 @@ async def update_draft_dataset(self, collection: str, data: Dict) -> str: reason = await resp.text() raise self.process_error(status, reason) + async def delete_draft_dataset(self, metax_id: str) -> None: + """Delete draft dataset from Metax service. + + :param metax_id: Identification string pointing to Metax dataset to be deleted + """ + async with aiohttp.ClientSession() as sess: + resp = await sess.delete( + f"{self.metax_url}{self.rest_route}/{metax_id}", + auth=aiohttp.BasicAuth(self.username, self.password), + ) + status = resp.status + if status == 204: + LOG.info(f"Deleted draft dataset {metax_id} from Metax service") + else: + # TODO: how front end should react on this?? + reason = await resp.text() + raise self.process_error(status, reason) + async def create_metax_dataset_data_from_study(self, data: Dict) -> Dict: """Construct Metax dataset's research dataset dictionary from Submitters Study. diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 5cb57932d..1bc4ad412 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -101,7 +101,9 @@ async def setUpAsync(self): "filter_user.side_effect": self.fake_useroperator_filter_user, } - RESTAPIHandler._handle_check_ownership = make_mocked_coro(True) + RESTAPIHandler._handle_check_ownedby_user = make_mocked_coro(True) + ObjectAPIHandler.create_or_update_metax_dataset = make_mocked_coro("111-222-333") + ObjectAPIHandler.delete_metax_dataset = make_mocked_coro() async def tearDownAsync(self): """Cleanup mocked stuff.""" From b8dc5f9e08bc86d560a881659cd16ca6cbb4a0ee Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Fri, 21 Jan 2022 13:20:44 +0000 Subject: [PATCH 263/336] Update integration tests for basic crud with metax Add dataset and study test files for put and patch tests. Add get endpoint to mock_metax_api. --- tests/integration/mock_metax_api.py | 35 +++++++ tests/integration/run_tests.py | 100 +++++++++++++++++++- tests/test_files/dataset/dataset.json | 7 ++ tests/test_files/dataset/dataset_patch.json | 4 + tests/test_files/dataset/dataset_put.xml | 18 ++++ tests/test_files/study/SRP000539_put.xml | 2 +- 6 files changed, 164 insertions(+), 2 deletions(-) create mode 100644 tests/test_files/dataset/dataset.json create mode 100644 tests/test_files/dataset/dataset_patch.json create mode 100644 tests/test_files/dataset/dataset_put.xml diff --git a/tests/integration/mock_metax_api.py b/tests/integration/mock_metax_api.py index f740bd9c4..79865bb1d 100644 --- a/tests/integration/mock_metax_api.py +++ b/tests/integration/mock_metax_api.py @@ -34,6 +34,40 @@ published = {} +async def get_dataset(req: web.Request) -> web.Response: + """Mock endpoint for retrieving Metax dataset. + + :params req: HTTP request with data for Metax dataset + :return: HTTP response with mocked Metax dataset data + """ + metax_id = req.match_info["metax_id"] + # await asyncio.sleep(1) + LOG.info(f"Retrieving Metax dataset {metax_id}") + if not metax_id: + LOG.error("Query params missing Metax ID.") + raise web.HTTPBadRequest( + reason={ + "detail": ["Query params missing Metax ID."], + "error_identifier": datetime.now(), + } + ) + stuff = list(drafts.keys()) + list(published.keys()) + if metax_id not in stuff: + LOG.error(f"No dataset found with identifier {metax_id}") + raise web.HTTPNotFound(reason={"detail": "Not found."}) + try: + content = drafts[metax_id] + except KeyError: + content = published[metax_id] + + LOG.debug(f"Found {content['state']} dataset {content['identifier']} with data: {content}") + return web.Response( + body=ujson.dumps(content, escape_forward_slashes=False), + status=200, + content_type="application/json", + ) + + async def post_dataset(req: web.Request) -> web.Response: """Mock endpoint for creating Metax dataset. @@ -206,6 +240,7 @@ def init() -> web.Application: web.put("/rest/v2/datasets/{metax_id}", update_dataset), web.delete("/rest/v2/datasets/{metax_id}", delete_dataset), web.post("/rpc/v2/datasets/publish_dataset", publish_dataset), + web.get("/rest/v2/datasets/{metax_id}", get_dataset), ] app.router.add_routes(api_routes) LOG.info("Metax mock API started") diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index 6b209b86f..95557f7fc 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -47,6 +47,7 @@ test_json_files = [ ("study", "SRP000539.json", "SRP000539.json"), ("sample", "SRS001433.json", "SRS001433.json"), + ("dataset", "dataset.json", "dataset.json"), ("run", "ERR000076.json", "ERR000076.json"), ("experiment", "ERX000119.json", "ERX000119.json"), ("analysis", "ERZ266973.json", "ERZ266973.json"), @@ -60,6 +61,7 @@ users_url = f"{base_url}/users" submit_url = f"{base_url}/submit" publish_url = f"{base_url}/publish" +metax_url = "http://mockmetax:8002/rest/v2/datasets" # to form direct contact to db with create_folder() DATABASE = os.getenv("MONGO_DATABASE", "default") AUTHDB = os.getenv("MONGO_AUTHDB", "admin") @@ -285,6 +287,23 @@ async def put_object_json(sess, schema, accession_id, update_filename): assert resp.status == 415, f"HTTP Status code error, got {resp.status}" +async def patch_object_json(sess, schema, accession_id, update_filename): + """Patch one metadata object within session, returns accessionId. + + :param sess: HTTP session in which request call is made + :param schema: name of the schema (folder) used for testing + :param draft_id: id of the draft + :param update_filename: name of the file used to use for updating data. + """ + request_data = await create_request_json_data(schema, update_filename) + async with sess.patch(f"{objects_url}/{schema}/{accession_id}", data=request_data) as resp: + LOG.debug(f"Try to patch object in {schema}") + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + ans_put = await resp.json() + assert ans_put["accessionId"] == accession_id, "accession ID error" + return ans_put["accessionId"] + + async def put_object_xml(sess, schema, accession_id, update_filename): """Put one metadata object within session, returns accessionId. @@ -850,7 +869,77 @@ async def test_getting_all_objects_from_schema_works(sess, folder_id): await asyncio.gather(*[delete_object(sess, "study", accession_id) for accession_id, _ in files]) -async def test_crud_folders_works(sess, project_id: str): +async def test_metax_crud(sess, folder_id): + """Test Metax service with study and dataset POST, PATCH, PUBLISH and DELETE reqs. + + :param sess: HTTP session in which request call is made + :param folder_id: id of the folder where objects reside + """ + # POST to object endpoint creates draft dataset in Metax for Study and Dataset + ids = [] + xml_files = set() + for schema, filename, update_filename in { + ("study", "SRP000539.xml", "SRP000539_put.xml"), + ("dataset", "dataset.xml", "dataset_put.xml"), + }: + accession_id, _ = await post_object(sess, schema, folder_id, filename) + xml_files.add((schema, accession_id, update_filename)) + ids.append([schema, accession_id]) + + json_files = set() + for schema, filename, update_filename in { + ("study", "SRP000539.json", "patch.json"), + ("dataset", "dataset.json", "dataset_patch.json"), + }: + accession_id = await post_object_json(sess, schema, folder_id, filename) + json_files.add((schema, accession_id, filename, update_filename)) + ids.append([schema, accession_id]) + + for object in ids: + schema, accession_id = object + async with sess.get(f"{objects_url}/{schema}/{accession_id}") as resp: + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + res = await resp.json() + try: + metax_id = res["metaxIdentifier"]["identifier"] + except KeyError: + assert False, "Metax ID was not in response data" + object.append(metax_id) + async with sess.get(f"{metax_url}/{metax_id}") as metax_resp: + assert metax_resp.status == 200, f"HTTP Status code error, got {resp.status}" + metax_res = await metax_resp.json() + assert ( + res.get("doi", None) == metax_res["research_dataset"]["preferred_identifier"] + ), "Object's DOI was not in Metax response data preferred_identifier" + assert metax_res.get("date_modified", None) is None + + # PUT and PATCH to object endpoint updates draft dataset in Metax for Study and Dataset + for schema, accession_id, filename in xml_files: + await put_object_xml(sess, schema, accession_id, filename) + for schema, accession_id, filename, _ in json_files: + await put_object_json(sess, schema, accession_id, filename) + for schema, accession_id, _, filename in json_files: + await patch_object_json(sess, schema, accession_id, filename) + + for _, _, metax_id in ids: + async with sess.get(f"{metax_url}/{metax_id}") as metax_resp: + assert metax_resp.status == 200, f"HTTP Status code error, got {resp.status}" + metax_res = await metax_resp.json() + assert ( + metax_res.get("date_modified", None) is not None + ), f"Object with metax id {metax_res['identifier']} was not updated in Metax" + + # DELETE object from Metax + for schema, accession_id, _ in xml_files: + await delete_object(sess, schema, accession_id) + for schema, accession_id, _, _ in json_files: + await delete_object(sess, schema, accession_id) + for _, _, metax_id in ids: + async with sess.get(f"{metax_url}/{metax_id}") as metax_resp: + assert metax_resp.status == 404, f"HTTP Status code error - expected 404 Not Found, got {resp.status}" + + +async def test_crud_folders_works(sess): """Test folders REST api POST, GET, PATCH, PUBLISH and DELETE reqs. :param sess: HTTP session in which request call is made @@ -1604,6 +1693,15 @@ async def main(): if not TLS: await test_getting_folders_filtered_by_date_created(sess, project_id) + # Test objects study and dataset are connecting to metax and saving metax id to db + LOG.debug("=== Testing Metax integration related basic CRUD operations for study and dataset ===") + metax_folder = { + "name": "basic test pagination", + "description": "basic test pagination folder", + } + metax_folder_id = await post_folder(sess, metax_folder) + await test_metax_crud(sess, metax_folder_id) + # Test add, modify, validate and release action with submissions LOG.debug("=== Testing actions within submissions ===") submission_folder = { diff --git a/tests/test_files/dataset/dataset.json b/tests/test_files/dataset/dataset.json new file mode 100644 index 000000000..84930d4a7 --- /dev/null +++ b/tests/test_files/dataset/dataset.json @@ -0,0 +1,7 @@ +{ + "title": "Test Dataset", + "datasetType": [ + "Amplicon sequencing" + ], + "description": "ome cool test description for json dataset" +} diff --git a/tests/test_files/dataset/dataset_patch.json b/tests/test_files/dataset/dataset_patch.json new file mode 100644 index 000000000..c7ed09a55 --- /dev/null +++ b/tests/test_files/dataset/dataset_patch.json @@ -0,0 +1,4 @@ +{ + "title": "Updated Dataset", + "description": "one cool test description for updated dataset" +} diff --git a/tests/test_files/dataset/dataset_put.xml b/tests/test_files/dataset/dataset_put.xml new file mode 100644 index 000000000..787ae60ca --- /dev/null +++ b/tests/test_files/dataset/dataset_put.xml @@ -0,0 +1,18 @@ + + + + test updated + some cool test description updated + Exome sequencing + Genotyping by array + + + + + + + + + + + diff --git a/tests/test_files/study/SRP000539_put.xml b/tests/test_files/study/SRP000539_put.xml index 126ced42a..ba92a661e 100644 --- a/tests/test_files/study/SRP000539_put.xml +++ b/tests/test_files/study/SRP000539_put.xml @@ -6,7 +6,7 @@ GSE10966 - Highly integrated epigenome maps in Arabidopsis - whole genome shotgun bisulfite sequencing + Highly integrated epigenome maps in Arabidopsis - updated Part of a set of highly integrated epigenome maps for Arabidopsis thaliana. Keywords: From 08a6091a925b1ecf570802f4c23f10333f86973a Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Fri, 21 Jan 2022 13:25:54 +0000 Subject: [PATCH 264/336] Add Metax datasets publishing on folder publish --- metadata_backend/api/handlers/folder.py | 3 ++ metadata_backend/api/metax_api_handler.py | 52 ++++++++++++++++++++++- metadata_backend/api/operators.py | 15 ++++--- tests/integration/run_tests.py | 39 +++++++++++++++++ tests/test_handlers.py | 6 +++ 5 files changed, 109 insertions(+), 6 deletions(-) diff --git a/metadata_backend/api/handlers/folder.py b/metadata_backend/api/handlers/folder.py index 1cbe5ee83..4ebe9e90b 100644 --- a/metadata_backend/api/handlers/folder.py +++ b/metadata_backend/api/handlers/folder.py @@ -14,6 +14,7 @@ from ...helpers.doi import DOIHandler from ...helpers.logger import LOG from ...helpers.validator import JSONValidator +from ..metax_api_handler import MetaxServiceHandler from ..middlewares import get_session from ..operators import FolderOperator, Operator, UserOperator, ProjectOperator from .restapi import RESTAPIHandler @@ -304,6 +305,8 @@ async def publish_folder(self, req: Request) -> Response: ] new_folder = await operator.update_folder(folder_id, patch) + await MetaxServiceHandler(req).publish_dataset(new_folder) + body = ujson.dumps({"folderId": new_folder}, escape_forward_slashes=False) LOG.info(f"Patching folder with ID {new_folder} was successful.") return web.Response(body=body, status=200, content_type="application/json") diff --git a/metadata_backend/api/metax_api_handler.py b/metadata_backend/api/metax_api_handler.py index d579b407d..8b1a3129e 100644 --- a/metadata_backend/api/metax_api_handler.py +++ b/metadata_backend/api/metax_api_handler.py @@ -7,7 +7,7 @@ from ..helpers.logger import LOG from .middlewares import get_session -from .operators import UserOperator +from .operators import FolderOperator, Operator, UserOperator class MetaxServiceHandler: @@ -170,6 +170,56 @@ async def delete_draft_dataset(self, metax_id: str) -> None: reason = await resp.text() raise self.process_error(status, reason) + async def publish_dataset(self, folder_id: str) -> None: + """Publish draft dataset to Metax service. + + Fetch metadataObjects for published folder. Publish each object within Metax service and + update object's Metax status to db. + + :param folder_id: Folder ID where metadata objects to publish resides + """ + folder = await FolderOperator(self.db_client).read_folder(folder_id) + operator = Operator(self.db_client) + for object in folder["metadataObjects"]: + if object["schema"] in {"study", "dataset"}: + data, _ = await operator.read_metadata_object(object["schema"], object["accessionId"]) + if isinstance(data, dict): + metax_id = data["metaxIdentifier"]["identifier"] + doi = data["doi"] + async with aiohttp.ClientSession() as sess: + resp = await sess.post( + f"{self.metax_url}{self.publish_route}", + params={"identifier": metax_id}, + auth=aiohttp.BasicAuth(self.username, self.password), + ) + status = resp.status + if status == 200: + preferred_id = await resp.json() + if doi != preferred_id["preferred_identifier"]: + LOG.warning( + f"Metax Preferred Identifier {preferred_id['preferred_identifier']} " + f"does not match object's DOI {doi}" + ) + LOG.debug( + f"Object {object['schema']} with accession ID {object['accessionId']} is " + "published to Metax service." + ) + await operator.update_metadata_object( + object["schema"], + object["accessionId"], + { + "metaxIdentifier": { + "identifier": metax_id, + "status": "published", + } + }, + ) + else: + # TODO: how front end should react on this?? + reason = await resp.text() + raise self.process_error(status, reason) + LOG.info(f"Folder's {folder_id} metadata objects are published to Metax service.") + async def create_metax_dataset_data_from_study(self, data: Dict) -> Dict: """Construct Metax dataset's research dataset dictionary from Submitters Study. diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 34d3debcd..94c55e3cd 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -482,12 +482,11 @@ async def _format_data_to_replace_and_add_to_db( :returns: Accession Id for object inserted to database """ forbidden_keys = ["accessionId", "publishDate", "dateCreated"] - # when replacing from xml file there are no metax data in content data + # when replacing from xml file there are no (not supposed to be) metax data in content data # therefore we need to check if the object already exists in database and has metax id if schema_type in {"study", "dataset"}: read_data = await self.db_service.read(schema_type, accession_id) - if read_data.get("metaxIdentifier", None): - forbidden_keys.extend(["metaxIdentifier"]) + forbidden_keys.extend(["metaxIdentifier"]) if any(i in data for i in forbidden_keys): reason = f"Some items (e.g: {', '.join(forbidden_keys)}) cannot be changed." LOG.error(reason) @@ -513,9 +512,15 @@ async def _format_data_to_update_and_add_to_db(self, schema_type: str, accession """ forbidden_keys = ["accessionId", "publishDate", "dateCreated"] # check if object already has metax id or is it first time writing it - if schema_type in {"study", "dataset"}: + if schema_type in {"study", "dataset"} and data.get("metaxIdentifier", None): read_data = await self.db_service.read(schema_type, accession_id) - if read_data.get("metaxIdentifier", None): + # on firs write db doesnt have yet metaxIdentifier and + # on publish metax status inside metaxIdentifier is changed + # so we are checking that metax id is still the same + if ( + read_data.get("metaxIdentifier", None) + and data["metaxIdentifier"]["identifier"] != read_data["metaxIdentifier"]["identifier"] + ): forbidden_keys.extend(["metaxIdentifier"]) if any(i in data for i in forbidden_keys): reason = f"Some items (e.g: {', '.join(forbidden_keys)}) cannot be changed." diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index 95557f7fc..ae19a8c38 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -939,6 +939,44 @@ async def test_metax_crud(sess, folder_id): assert metax_resp.status == 404, f"HTTP Status code error - expected 404 Not Found, got {resp.status}" +async def test_metax_publish_dataset(sess, folder_id): + """Test publishing dataset to Metax service after folder(submission) is published. + + :param sess: HTTP session in which request call is made + :param folder_id: id of the folder where objects reside + """ + # POST to object endpoint creates draft dataset in Metax for Study and Dataset + objects = [] + for schema, filename in { + ("study", "SRP000539.xml"), + ("dataset", "dataset.xml"), + }: + accession_id, _ = await post_object(sess, schema, folder_id, filename) + objects.append([schema, accession_id]) + + for object in objects: + schema, object_id = object + async with sess.get(f"{objects_url}/{schema}/{object_id}") as resp: + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + res = await resp.json() + object.append(res["metaxIdentifier"]["identifier"]) + + await publish_folder(sess, folder_id) + + for schema, object_id, metax_id in objects: + async with sess.get(f"{objects_url}/{schema}/{object_id}") as resp: + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + res = await resp.json() + actual = res["metaxIdentifier"] + expected = {"identifier": metax_id, "status": "published"} + assert expected == actual + + async with sess.get(f"{metax_url}/{metax_id}") as metax_resp: + assert metax_resp.status == 200, f"HTTP Status code error, got {resp.status}" + metax_res = await metax_resp.json() + assert metax_res["state"] == "published" + + async def test_crud_folders_works(sess): """Test folders REST api POST, GET, PATCH, PUBLISH and DELETE reqs. @@ -1701,6 +1739,7 @@ async def main(): } metax_folder_id = await post_folder(sess, metax_folder) await test_metax_crud(sess, metax_folder_id) + await test_metax_publish_dataset(sess, metax_folder_id) # Test add, modify, validate and release action with submissions LOG.debug("=== Testing actions within submissions ===") diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 1bc4ad412..0554e2731 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -711,6 +711,10 @@ async def setUpAsync(self): self.patch_operator = patch(class_operator, **self.operator_config, spec=True) self.MockedOperator = self.patch_operator.start() + class_metaxhandler = "metadata_backend.api.handlers.folder.MetaxServiceHandler" + self.patch_metaxhandler = patch(class_metaxhandler, spec=True) + self.MockedMetaxHandler = self.patch_metaxhandler.start() + async def tearDownAsync(self): """Cleanup mocked stuff.""" await super().tearDownAsync() @@ -718,6 +722,7 @@ async def tearDownAsync(self): self.patch_folderoperator.stop() self.patch_useroperator.stop() self.patch_operator.stop() + self.patch_metaxhandler.stop() async def test_folder_creation_works(self): """Test that folder is created and folder ID returned.""" @@ -837,6 +842,7 @@ async def test_folder_is_published(self): """Test that folder would be published and DOI would be added.""" self.MockedDoiHandler().create_draft_doi.return_value = self.test_draft_doi self.MockedFolderOperator().update_folder.return_value = self.folder_id + self.MockedMetaxHandler().publish_dataset.return_value = None response = await self.client.patch("/publish/FOL12345678") self.MockedDoiHandler().create_draft_doi.assert_called_once() self.MockedFolderOperator().update_folder.assert_called_once() From e8d439b47e2500a9bfd9dddf46b725bcee556172 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 23 Feb 2022 15:21:29 +0000 Subject: [PATCH 265/336] Update changelog and pyspell wordlist --- .github/config/.wordlist.txt | 4 ++++ CHANGELOG.md | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/config/.wordlist.txt b/.github/config/.wordlist.txt index 5967b0faa..f207ff7d5 100644 --- a/.github/config/.wordlist.txt +++ b/.github/config/.wordlist.txt @@ -342,6 +342,9 @@ metagenomic metagenomics metatranscriptome metatranscriptomic +metax +metaxservicehandler +metaxidentifier methylation methylcytidine mf @@ -609,6 +612,7 @@ txt ui ujson umi +uncomment unencryptedchecksum uniqueitems unlocalised diff --git a/CHANGELOG.md b/CHANGELOG.md index 02cfc3f80..7e9dc5103 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added - +- Integration with Metax service + - Adds new local container for testing against mocked Metax API + - Introduces new env vars: METAX_USER, METAX_PASS, METAX_URL + - Adds new key metaxIdentifier to Study and Dataset collections with dict {identifier: , status: "draft"|"published"} + - Adds new handler MetaxServiceHandler to take care of mapping Submitter metadata to Metax metadata and to connect to Metax API - Add patching of folders after object save and update operations - Object is added or updated to folder(submission) where it belongs with it's accession ID, schema, submission type, title and filename in the case of CSV and XML upload - Adds configuration for mypy linting to VScode devcontainer setup From ece28aecc70dd98d827526631c506f9d834ab333 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Thu, 24 Feb 2022 08:54:22 +0000 Subject: [PATCH 266/336] Add integration test trying to update metax id --- tests/integration/run_tests.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index ae19a8c38..fc53995a5 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -939,6 +939,24 @@ async def test_metax_crud(sess, folder_id): assert metax_resp.status == 404, f"HTTP Status code error - expected 404 Not Found, got {resp.status}" +async def test_metax_id_not_updated_on_patch(sess, folder_id): + """Test that Metax id cannot be sent in patch. + + :param sess: HTTP session in which request call is made + :param folder_id: id of the folder where objects reside + """ + for schema, filename in { + ("study", "SRP000539.json"), + ("dataset", "dataset.json"), + }: + accession_id = await post_object_json(sess, schema, folder_id, filename) + async with sess.patch( + f"{objects_url}/{schema}/{accession_id}", data={"metaxIdentifier": {"identifier": "12345"}} + ) as resp: + LOG.debug(f"Try to patch object in {schema}") + assert resp.status == 400 + + async def test_metax_publish_dataset(sess, folder_id): """Test publishing dataset to Metax service after folder(submission) is published. @@ -1739,6 +1757,7 @@ async def main(): } metax_folder_id = await post_folder(sess, metax_folder) await test_metax_crud(sess, metax_folder_id) + await test_metax_id_not_updated_on_patch(sess, metax_folder_id) await test_metax_publish_dataset(sess, metax_folder_id) # Test add, modify, validate and release action with submissions From 9197574d7f0860ff3df8e80a5321712eb9055a0d Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Thu, 24 Feb 2022 09:20:09 +0000 Subject: [PATCH 267/336] Update env vars to building configs Update docker-compose-tls.yml with targets. Update default metax url if one is not found Update github workflow file --- .github/workflows/int.yml | 8 +++----- docker-compose-tls.yml | 7 ++++++- docker-compose.yml | 6 ++++-- metadata_backend/api/metax_api_handler.py | 2 +- tests/integration/run_tests.py | 2 +- 5 files changed, 15 insertions(+), 10 deletions(-) diff --git a/.github/workflows/int.yml b/.github/workflows/int.yml index 9134108a5..2e5d316f0 100644 --- a/.github/workflows/int.yml +++ b/.github/workflows/int.yml @@ -43,10 +43,7 @@ jobs: env: BASE_URL: http://localhost:5430 OIDC_URL: http://localhost:8000 - MONGO_HOST: localhost:27017 - MONGO_DATABASE: default - MONGO_AUTHDB: admin - + - name: Collect logs from docker if: ${{ failure() }} run: docker-compose logs --no-color -t > tests/dockerlogs || true @@ -78,6 +75,7 @@ jobs: MONGO_HOST: localhost:27017 MONGO_DATABASE: default MONGO_AUTHDB: admin + MONGO_SSL: True - name: Run Integration test run: | @@ -86,7 +84,7 @@ jobs: BASE_URL: http://localhost:5430 OIDC_URL: http://localhost:8000 MONGO_SSL: True - + - name: Collect logs from docker if: ${{ failure() }} run: docker-compose logs --no-color -t > tests/dockerlogs || true diff --git a/docker-compose-tls.yml b/docker-compose-tls.yml index f0a355346..c7f731643 100644 --- a/docker-compose-tls.yml +++ b/docker-compose-tls.yml @@ -4,6 +4,7 @@ services: build: dockerfile: Dockerfile-dev context: . + target: develop image: cscfi/metadata-submitter-dev container_name: "metadata_submitter_backend_dev" volumes: @@ -14,6 +15,7 @@ services: - database - mockauth - mockdoi + - mockmetax restart: on-failure environment: - "MONGO_HOST=${MONGO_HOST}" @@ -57,6 +59,7 @@ services: build: dockerfile: Dockerfile-dev context: . + target: develop image: cscfi/metadata-submitter-dev environment: - "LOG_LEVEL=${LOG_LEVEL}" @@ -74,6 +77,7 @@ services: build: dockerfile: Dockerfile-dev context: . + target: develop image: cscfi/metadata-submitter-dev hostname: mockdoi expose: @@ -87,6 +91,7 @@ services: build: dockerfile: Dockerfile-dev context: . + target: develop image: cscfi/metadata-submitter-dev hostname: mockmetax expose: @@ -97,4 +102,4 @@ services: - ./tests/integration/mock_metax_api.py:/mock_metax_api.py entrypoint: ["python", "/mock_metax_api.py", "0.0.0.0", "8002"] volumes: - data: + data: diff --git a/docker-compose.yml b/docker-compose.yml index 62f97f039..60bc332e5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,6 +13,7 @@ services: - database - mockauth - mockdoi + - mockmetax restart: on-failure environment: - "MONGO_HOST=${MONGO_HOST}" @@ -63,7 +64,7 @@ services: - 8000:8000 volumes: - ./tests/integration/mock_auth.py:/mock_auth.py - entrypoint: [ "python", "/mock_auth.py", "0.0.0.0", "8000" ] + entrypoint: ["python", "/mock_auth.py", "0.0.0.0", "8000"] mockdoi: build: dockerfile: Dockerfile-dev @@ -77,11 +78,12 @@ services: - 8001:8001 volumes: - ./tests/integration/mock_doi_api.py:/mock_doi_api.py - entrypoint: [ "python", "/mock_doi_api.py", "0.0.0.0", "8001" ] + entrypoint: ["python", "/mock_doi_api.py", "0.0.0.0", "8001"] mockmetax: build: dockerfile: Dockerfile-dev context: . + target: develop image: cscfi/metadata-submitter-dev hostname: mockmetax expose: diff --git a/metadata_backend/api/metax_api_handler.py b/metadata_backend/api/metax_api_handler.py index 8b1a3129e..2bc1a02f1 100644 --- a/metadata_backend/api/metax_api_handler.py +++ b/metadata_backend/api/metax_api_handler.py @@ -26,7 +26,7 @@ def __init__(self, req: Request) -> None: self.username = os.getenv("METAX_USER", "sd") self.password = os.getenv("METAX_PASS", "test") - self.metax_url = os.getenv("METAX_URL", "http://mockmetax:8002") + self.metax_url = os.getenv("METAX_URL", "http://localhost:8002") self.rest_route = "/rest/v2/datasets" self.publish_route = "/rpc/v2/datasets/publish_dataset" diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index fc53995a5..b182b4982 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -61,7 +61,7 @@ users_url = f"{base_url}/users" submit_url = f"{base_url}/submit" publish_url = f"{base_url}/publish" -metax_url = "http://mockmetax:8002/rest/v2/datasets" +metax_url = f"{os.getenv('METAX_URL', 'http://localhost:8002')}/rest/v2/datasets" # to form direct contact to db with create_folder() DATABASE = os.getenv("MONGO_DATABASE", "default") AUTHDB = os.getenv("MONGO_AUTHDB", "admin") From 9d18e61b4edb441246f9299561dafc5dafbda88f Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 1 Mar 2022 10:27:43 +0000 Subject: [PATCH 268/336] Update metax related confs and take them in use Also changes mock_metax_api delete response to HTTPNoContent. --- metadata_backend/api/metax_api_handler.py | 15 +++++++-------- metadata_backend/conf/conf.py | 3 +++ tests/integration/mock_metax_api.py | 3 +-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/metadata_backend/api/metax_api_handler.py b/metadata_backend/api/metax_api_handler.py index 2bc1a02f1..117267ced 100644 --- a/metadata_backend/api/metax_api_handler.py +++ b/metadata_backend/api/metax_api_handler.py @@ -1,10 +1,10 @@ """Class for handling calls to METAX API.""" -import os from typing import Any, Dict import aiohttp from aiohttp.web import HTTPBadRequest, HTTPError, HTTPForbidden, HTTPNotFound, Request +from ..conf.conf import metax_config from ..helpers.logger import LOG from .middlewares import get_session from .operators import FolderOperator, Operator, UserOperator @@ -24,13 +24,12 @@ def __init__(self, req: Request) -> None: self.req = req self.db_client = self.req.app["db_client"] - self.username = os.getenv("METAX_USER", "sd") - self.password = os.getenv("METAX_PASS", "test") - self.metax_url = os.getenv("METAX_URL", "http://localhost:8002") - self.rest_route = "/rest/v2/datasets" - self.publish_route = "/rpc/v2/datasets/publish_dataset" - - catalog_pid = "urn:nbn:fi:att:data-catalog-sd" + self.username = metax_config["username"] + self.password = metax_config["password"] + self.metax_url = metax_config["url"] + self.rest_route = metax_config["rest_route"] + self.publish_route = metax_config["publish_route"] + catalog_pid = metax_config["catalog_pid"] self.minimal_dataset_template: Dict[Any, Any] = { "data_catalog": catalog_pid, diff --git a/metadata_backend/conf/conf.py b/metadata_backend/conf/conf.py index 6e89812c2..edcd87ac5 100644 --- a/metadata_backend/conf/conf.py +++ b/metadata_backend/conf/conf.py @@ -164,4 +164,7 @@ def create_db_client() -> AsyncIOMotorClient: "username": os.getenv("METAX_USER", "sd"), "password": os.getenv("METAX_PASS", "test"), "url": os.getenv("METAX_URL", "http://mockmetax:8002"), + "rest_route": "/rest/v2/datasets", + "publish_route": "/rpc/v2/datasets/publish_dataset", + "catalog_pid": "urn:nbn:fi:att:data-catalog-sd", } diff --git a/tests/integration/mock_metax_api.py b/tests/integration/mock_metax_api.py index 79865bb1d..bb0624eae 100644 --- a/tests/integration/mock_metax_api.py +++ b/tests/integration/mock_metax_api.py @@ -41,7 +41,6 @@ async def get_dataset(req: web.Request) -> web.Response: :return: HTTP response with mocked Metax dataset data """ metax_id = req.match_info["metax_id"] - # await asyncio.sleep(1) LOG.info(f"Retrieving Metax dataset {metax_id}") if not metax_id: LOG.error("Query params missing Metax ID.") @@ -191,7 +190,7 @@ async def delete_dataset(req: web.Request) -> web.Response: else: del drafts[metax_id] LOG.info(f"Deleted Metax dataset with identifier {metax_id}") - return web.Response(status=204) + return web.HTTPNoContent() async def validate_payload(req: web.Request, draft=True) -> dict: From 20be8ce7a59deb690b2664a80a212ab7fe69b27e Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 1 Mar 2022 11:37:01 +0000 Subject: [PATCH 269/336] Change location of metax_api_handler --- metadata_backend/api/handlers/folder.py | 2 +- metadata_backend/api/handlers/object.py | 2 +- metadata_backend/{api => helpers}/metax_api_handler.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) rename metadata_backend/{api => helpers}/metax_api_handler.py (98%) diff --git a/metadata_backend/api/handlers/folder.py b/metadata_backend/api/handlers/folder.py index 4ebe9e90b..5768a25be 100644 --- a/metadata_backend/api/handlers/folder.py +++ b/metadata_backend/api/handlers/folder.py @@ -14,7 +14,7 @@ from ...helpers.doi import DOIHandler from ...helpers.logger import LOG from ...helpers.validator import JSONValidator -from ..metax_api_handler import MetaxServiceHandler +from ...helpers.metax_api_handler import MetaxServiceHandler from ..middlewares import get_session from ..operators import FolderOperator, Operator, UserOperator, ProjectOperator from .restapi import RESTAPIHandler diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 3886f7e0d..bf97b8a76 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -9,7 +9,7 @@ from ...helpers.logger import LOG from ...helpers.validator import JSONValidator -from ..metax_api_handler import MetaxServiceHandler +from ...helpers.metax_api_handler import MetaxServiceHandler from ..operators import FolderOperator, Operator, XMLOperator from .common import multipart_content from .restapi import RESTAPIHandler diff --git a/metadata_backend/api/metax_api_handler.py b/metadata_backend/helpers/metax_api_handler.py similarity index 98% rename from metadata_backend/api/metax_api_handler.py rename to metadata_backend/helpers/metax_api_handler.py index 117267ced..611b6c25f 100644 --- a/metadata_backend/api/metax_api_handler.py +++ b/metadata_backend/helpers/metax_api_handler.py @@ -4,10 +4,10 @@ import aiohttp from aiohttp.web import HTTPBadRequest, HTTPError, HTTPForbidden, HTTPNotFound, Request +from ..api.middlewares import get_session +from ..api.operators import FolderOperator, Operator, UserOperator from ..conf.conf import metax_config -from ..helpers.logger import LOG -from .middlewares import get_session -from .operators import FolderOperator, Operator, UserOperator +from .logger import LOG class MetaxServiceHandler: From 995bcbe223858d812e63d6faeede8eeb001a7140 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Fri, 4 Mar 2022 11:16:18 +0000 Subject: [PATCH 270/336] Fix extra DB read during Metax data creation Change operators functions responsible for metadata object creation to return full object data. --- metadata_backend/api/handlers/object.py | 95 ++++++++++++------- metadata_backend/api/handlers/submission.py | 4 +- metadata_backend/api/handlers/template.py | 16 ++-- metadata_backend/api/operators.py | 48 +++++----- metadata_backend/helpers/metax_api_handler.py | 21 ++-- tests/integration/run_tests.py | 25 ++--- tests/test_handlers.py | 11 ++- tests/test_operators.py | 22 ++--- 8 files changed, 139 insertions(+), 103 deletions(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index bf97b8a76..3ea28b564 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -8,8 +8,8 @@ from multidict import CIMultiDict from ...helpers.logger import LOG -from ...helpers.validator import JSONValidator from ...helpers.metax_api_handler import MetaxServiceHandler +from ...helpers.validator import JSONValidator from ..operators import FolderOperator, Operator, XMLOperator from .common import multipart_content from .restapi import RESTAPIHandler @@ -142,32 +142,36 @@ async def post_object(self, req: Request) -> Response: data: Union[List[Dict[str, str]], Dict[str, str]] if isinstance(content, List): LOG.debug(f"Inserting multiple objects for {schema_type}.") - ids: List[Dict[str, str]] = [] + objects: List[Dict[str, Any]] = [] for item in content: - accession_id, title = await operator.create_metadata_object(collection, item[0]) - ids.append({"accessionId": accession_id, "title": title}) - LOG.info(f"POST object with accesssion ID {accession_id} in schema {collection} was successful.") + json_data = await operator.create_metadata_object(collection, item[0]) + objects.append(json_data) + LOG.info( + f"POST object with accesssion ID {json_data['accessionId']} in schema {collection} was successful." + ) # we format like this to make it consistent with the response from /submit endpoint - data = [dict({"accessionId": item["accessionId"]}, **{"schema": schema_type}) for item in ids] + data = [dict({"accessionId": item["accessionId"]}, **{"schema": schema_type}) for item in objects] # we take the first result if we get multiple location_headers = CIMultiDict(Location=f"{url}/{data[0]['accessionId']}") else: - accession_id, title = await operator.create_metadata_object(collection, content) - data = {"accessionId": accession_id} - location_headers = CIMultiDict(Location=f"{url}/{accession_id}") - LOG.info(f"POST object with accesssion ID {accession_id} in schema {collection} was successful.") + json_data = await operator.create_metadata_object(collection, content) + data = {"accessionId": json_data["accessionId"]} + location_headers = CIMultiDict(Location=f"{url}/{json_data['accessionId']}") + LOG.info( + f"POST object with accesssion ID {json_data['accessionId']} in schema {collection} was successful." + ) # Gathering data for object to be added to folder if not isinstance(data, List): - ids = [dict(data, **{"title": title})] + objects = [json_data] folder_op = FolderOperator(db_client) - patch = await self.prepare_folder_patch_new_object(collection, ids, patch_params) + patch = self.prepare_folder_patch_new_object(collection, objects, patch_params) await folder_op.update_folder(folder_id, patch) # Create draft dataset to Metax catalog if collection in {"study", "dataset"}: - [await self.create_or_update_metax_dataset(req, collection, item["accessionId"]) for item in ids] + [await self.create_metax_dataset(req, collection, item) for item in objects] body = ujson.dumps(data, escape_forward_slashes=False) @@ -284,12 +288,12 @@ async def put_object(self, req: Request) -> Response: raise web.HTTPUnauthorized(reason=reason) accession_id, title = await operator.replace_metadata_object(collection, accession_id, content) - patch = await self.prepare_folder_patch_update_object(collection, accession_id, title, filename) + patch = self.prepare_folder_patch_update_object(collection, accession_id, title, filename) await folder_op.update_folder(folder_id, patch) # Update draft dataset to Metax catalog if collection in {"study", "dataset"}: - await self.create_or_update_metax_dataset(req, collection, accession_id) + await self.update_metax_dataset(req, collection, accession_id) body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) LOG.info(f"PUT object with accession ID {accession_id} in schema {collection} was successful.") @@ -335,24 +339,24 @@ async def patch_object(self, req: Request) -> Response: # If there's changed title it will be updated to folder try: title = content["descriptor"]["studyTitle"] if collection == "study" else content["title"] - patch = await self.prepare_folder_patch_update_object(collection, accession_id, title) + patch = self.prepare_folder_patch_update_object(collection, accession_id, title) await folder_op.update_folder(folder_id, patch) except (TypeError, KeyError): pass # Update draft dataset to Metax catalog if collection in {"study", "dataset"}: - await self.create_or_update_metax_dataset(req, collection, accession_id) + await self.update_metax_dataset(req, collection, accession_id) body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) LOG.info(f"PATCH object with accession ID {accession_id} in schema {collection} was successful.") return web.Response(body=body, status=200, content_type="application/json") - async def prepare_folder_patch_new_object(self, schema: str, ids: List, params: Dict[str, str]) -> List: + def prepare_folder_patch_new_object(self, schema: str, objects: List, params: Dict[str, str]) -> List: """Prepare patch operations list for adding an object or objects to a folder. :param schema: schema of objects to be added to the folder - :param ids: object IDs + :param objects: metadata objects :param params: addidtional data required for db entry :returns: list of patch operations """ @@ -368,16 +372,21 @@ async def prepare_folder_patch_new_object(self, schema: str, ids: List, params: patch = [] patch_ops: Dict[str, Any] = {} - for id in ids: + for object in objects: + try: + title = object["descriptor"]["studyTitle"] if schema in ["study", "draft-study"] else object["title"] + except (TypeError, KeyError): + title = "" + patch_ops = { "op": "add", "path": path, "value": { - "accessionId": id["accessionId"], + "accessionId": object["accessionId"], "schema": schema, "tags": { "submissionType": submission_type, - "displayTitle": id["title"], + "displayTitle": title, }, }, } @@ -386,7 +395,7 @@ async def prepare_folder_patch_new_object(self, schema: str, ids: List, params: patch.append(patch_ops) return patch - async def prepare_folder_patch_update_object( + def prepare_folder_patch_update_object( self, schema: str, accession_id: str, title: str, filename: str = "" ) -> List: """Prepare patch operation for updating object's title in a folder. @@ -422,7 +431,34 @@ async def prepare_folder_patch_update_object( return [patch_op] # TODO: update doi related code - async def create_or_update_metax_dataset(self, req: Request, collection: str, accession_id: str) -> str: + async def create_metax_dataset(self, req: Request, collection: str, object: Dict) -> str: + """Handle connection to Metax api handler. + + Sends Dataset or Study object's data to Metax api handler. + If creating new dataset, object is updated with returned metax ID to database. + Object's data has to be fetched first from db in case of XML data in request. + Has temporary DOI fetching, will be chaged with real data. + + :param req: HTTP request + :param collection: object's schema + :param object: metadata object + :returns: Metax ID + """ + metax_service = MetaxServiceHandler(req) + operator = Operator(req.app["db_client"]) + # MYPY related if statement, Operator (when not XMLOperator) always returns object_data as dict + if isinstance(object, Dict): + LOG.info("Creating draft dataset to Metax.") + object["doi"] = await self.create_doi() + metax_id = await metax_service.post_dataset_as_draft(collection, object) + new_info = {"doi": object["doi"], "metaxIdentifier": {"identifier": metax_id, "status": "draft"}} + await operator.update_metadata_object(collection, object["accessionId"], new_info) + else: + raise ValueError("Object's data must be dictionary") + return metax_id + + # TODO: update doi related code + async def update_metax_dataset(self, req: Request, collection: str, accession_id: str) -> str: """Handle connection to Metax api handler. Sends Dataset or Study object's data to Metax api handler. @@ -440,15 +476,8 @@ async def create_or_update_metax_dataset(self, req: Request, collection: str, ac object_data, _ = await operator.read_metadata_object(collection, accession_id) # MYPY related if statement, Operator (when not XMLOperator) always returns object_data as dict if isinstance(object_data, Dict): - if object_data.get("metaxIdentifier", None): - LOG.info("Updating draft dataset to Metax.") - metax_id = await metax_service.update_draft_dataset(collection, object_data) - else: - LOG.info("Creating draft dataset to Metax.") - object_data["doi"] = await self.create_doi() - metax_id = await metax_service.post_dataset_as_draft(collection, object_data) - new_info = {"doi": object_data["doi"], "metaxIdentifier": {"identifier": metax_id, "status": "draft"}} - accession_id = await operator.update_metadata_object(collection, accession_id, new_info) + LOG.info("Updating draft dataset to Metax.") + metax_id = await metax_service.update_draft_dataset(collection, object_data) else: raise ValueError("Object's data must be dictionary") return metax_id diff --git a/metadata_backend/api/handlers/submission.py b/metadata_backend/api/handlers/submission.py index e8386ca50..9c741cfee 100644 --- a/metadata_backend/api/handlers/submission.py +++ b/metadata_backend/api/handlers/submission.py @@ -128,9 +128,9 @@ async def _execute_action(self, schema: str, content: str, db_client: AsyncIOMot :returns: Dict containing specific action that was completed """ if action == "add": - assession_id, _ = await XMLOperator(db_client).create_metadata_object(schema, content) + json_data = await XMLOperator(db_client).create_metadata_object(schema, content) result = { - "accessionId": assession_id, + "accessionId": json_data["accessionId"], "schema": schema, } LOG.debug(f"added some content in {schema} ...") diff --git a/metadata_backend/api/handlers/template.py b/metadata_backend/api/handlers/template.py index a5cfff50c..42aadc468 100644 --- a/metadata_backend/api/handlers/template.py +++ b/metadata_backend/api/handlers/template.py @@ -122,12 +122,12 @@ async def post_template(self, req: Request) -> Response: # Process template # Move projectId to template structure, so that it is saved in mongo tmpl["template"]["projectId"] = tmpl["projectId"] - accession_id, _ = await operator.create_metadata_object(collection, tmpl["template"]) - data = [{"accessionId": accession_id, "schema": collection}] + json_data = await operator.create_metadata_object(collection, tmpl["template"]) + data = [{"accessionId": json_data["accessionId"], "schema": collection}] if "tags" in tmpl: data[0]["tags"] = tmpl["tags"] await project_op.assign_templates(tmpl["projectId"], data) - tmpl_list.append({"accessionId": accession_id}) + tmpl_list.append({"accessionId": json_data["accessionId"]}) body = ujson.dumps(tmpl_list, escape_forward_slashes=False) else: @@ -155,17 +155,17 @@ async def post_template(self, req: Request) -> Response: # Process template # Move projectId to template structure, so that it is saved in mongo content["template"]["projectId"] = content["projectId"] - accession_id, _ = await operator.create_metadata_object(collection, content["template"]) - data = [{"accessionId": accession_id, "schema": collection}] + json_data = await operator.create_metadata_object(collection, content["template"]) + data = [{"accessionId": json_data["accessionId"], "schema": collection}] if "tags" in content: data[0]["tags"] = content["tags"] await project_op.assign_templates(content["projectId"], data) - body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) + body = ujson.dumps({"accessionId": json_data["accessionId"]}, escape_forward_slashes=False) url = f"{req.scheme}://{req.host}{req.path}" - location_headers = CIMultiDict(Location=f"{url}/{accession_id}") - LOG.info(f"POST template with accesssion ID {accession_id} in schema {collection} was successful.") + location_headers = CIMultiDict(Location=f"{url}/{json_data['accessionId']}") + LOG.info(f"POST template with accesssion ID {json_data['accessionId']} in schema {collection} was successful.") return web.Response( body=body, status=201, diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 94c55e3cd..10c04b746 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -40,7 +40,7 @@ def __init__(self, db_name: str, content_type: str, db_client: AsyncIOMotorClien self.db_service = DBService(db_name, db_client) self.content_type = content_type - async def create_metadata_object(self, schema_type: str, data: Union[Dict, str]) -> Tuple[str, str]: + async def create_metadata_object(self, schema_type: str, data: Union[Dict, str]) -> Dict: """Create new metadata object to database. Data formatting and addition step for JSON or XML must be implemented @@ -50,9 +50,11 @@ async def create_metadata_object(self, schema_type: str, data: Union[Dict, str]) :param data: Data to be saved to database. :returns: Accession id for the object inserted to database """ - accession_id, title = await self._format_data_to_create_and_add_to_db(schema_type, data) - LOG.info(f"Inserting object with schema {schema_type} to database succeeded with accession id: {accession_id}") - return accession_id, title + data = await self._format_data_to_create_and_add_to_db(schema_type, data) + LOG.info( + f"Inserting object with schema {schema_type} to database succeeded with accession id: {data['accessionId']}" + ) + return data async def replace_metadata_object( self, schema_type: str, accession_id: str, data: Union[Dict, str] @@ -130,7 +132,7 @@ async def delete_metadata_object(self, schema_type: str, accession_id: str) -> s LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - async def _insert_formatted_object_to_db(self, schema_type: str, data: Dict) -> Tuple[str, str]: + async def _insert_formatted_object_to_db(self, schema_type: str, data: Dict) -> bool: """Insert formatted metadata object to database. :param schema_type: Schema type of the object to insert. @@ -144,16 +146,12 @@ async def _insert_formatted_object_to_db(self, schema_type: str, data: Dict) -> reason = f"Error happened while getting object: {error}" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - if insert_success: - try: - title = data["descriptor"]["studyTitle"] if schema_type in ["study", "draft-study"] else data["title"] - except (TypeError, KeyError): - title = "" - return data["accessionId"], title - else: + + if not insert_success: reason = "Inserting object to database failed for some reason." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) + return True async def _replace_object_from_db(self, schema_type: str, accession_id: str, data: Dict) -> Tuple[str, str]: """Replace formatted metadata object in database. @@ -260,7 +258,7 @@ async def check_exists(self, schema_type: str, accession_id: str) -> None: raise web.HTTPNotFound(reason=reason) @abstractmethod - async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Any) -> Tuple[str, str]: + async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Any) -> Dict: """Format and add data to database. Must be implemented by subclass. @@ -442,7 +440,7 @@ async def query_metadata_database( ) return data, page_num, page_size, total_objects[0]["total"] - async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Dict) -> Tuple[str, str]: + async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Dict) -> Dict: """Format JSON metadata object and add it to db. Adds necessary additional information to object before adding to db. @@ -462,7 +460,9 @@ async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Dic if schema_type == "study": data["publishDate"] = datetime.utcnow() + relativedelta(months=2) LOG.debug(f"Operator formatted data for {schema_type} to add to DB.") - return await self._insert_formatted_object_to_db(schema_type, data) + # return await self._insert_formatted_object_to_db(schema_type, data), data + await self._insert_formatted_object_to_db(schema_type, data) + return data async def _format_data_to_replace_and_add_to_db( self, schema_type: str, accession_id: str, data: Dict @@ -512,15 +512,12 @@ async def _format_data_to_update_and_add_to_db(self, schema_type: str, accession """ forbidden_keys = ["accessionId", "publishDate", "dateCreated"] # check if object already has metax id or is it first time writing it - if schema_type in {"study", "dataset"} and data.get("metaxIdentifier", None): + if schema_type in {"study", "dataset"}: # and data.get("metaxIdentifier", None): read_data = await self.db_service.read(schema_type, accession_id) # on firs write db doesnt have yet metaxIdentifier and # on publish metax status inside metaxIdentifier is changed # so we are checking that metax id is still the same - if ( - read_data.get("metaxIdentifier", None) - and data["metaxIdentifier"]["identifier"] != read_data["metaxIdentifier"]["identifier"] - ): + if read_data.get("metaxIdentifier", None): forbidden_keys.extend(["metaxIdentifier"]) if any(i in data for i in forbidden_keys): reason = f"Some items (e.g: {', '.join(forbidden_keys)}) cannot be changed." @@ -600,7 +597,7 @@ def __init__(self, db_client: AsyncIOMotorClient) -> None: """ super().__init__(mongo_database, "text/xml", db_client) - async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: str) -> Tuple[str, str]: + async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: str) -> Dict: """Format XML metadata object and add it to db. XML is validated, then parsed to JSON, which is added to database. @@ -614,12 +611,15 @@ async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: str # remove `draft-` from schema type schema = schema_type[6:] if schema_type.startswith("draft") else schema_type data_as_json = XMLToJSONParser().parse(schema, data) - accession_id, title = await Operator(db_client)._format_data_to_create_and_add_to_db(schema_type, data_as_json) + data_with_id = await Operator(db_client)._format_data_to_create_and_add_to_db(schema_type, data_as_json) LOG.debug(f"XMLOperator formatted data for xml-{schema_type} to add to DB") - return await self._insert_formatted_object_to_db( - f"xml-{schema_type}", {"accessionId": accession_id, "title": title, "content": data} + + await self._insert_formatted_object_to_db( + f"xml-{schema_type}", {"accessionId": data_with_id["accessionId"], "content": data} ) + return data_with_id + async def _format_data_to_replace_and_add_to_db( self, schema_type: str, accession_id: str, data: str ) -> Tuple[str, str]: diff --git a/metadata_backend/helpers/metax_api_handler.py b/metadata_backend/helpers/metax_api_handler.py index 611b6c25f..4af8d13b1 100644 --- a/metadata_backend/helpers/metax_api_handler.py +++ b/metadata_backend/helpers/metax_api_handler.py @@ -203,16 +203,17 @@ async def publish_dataset(self, folder_id: str) -> None: f"Object {object['schema']} with accession ID {object['accessionId']} is " "published to Metax service." ) - await operator.update_metadata_object( - object["schema"], - object["accessionId"], - { - "metaxIdentifier": { - "identifier": metax_id, - "status": "published", - } - }, - ) + # This must be updated as Metax identifier will be moved to folder from object after publishing + # await operator.update_metadata_object( + # object["schema"], + # object["accessionId"], + # { + # "metaxIdentifier": { + # "identifier": metax_id, + # "status": "published", + # } + # }, + # ) else: # TODO: how front end should react on this?? reason = await resp.text() diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index b182b4982..e72eec1f1 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -981,18 +981,19 @@ async def test_metax_publish_dataset(sess, folder_id): await publish_folder(sess, folder_id) - for schema, object_id, metax_id in objects: - async with sess.get(f"{objects_url}/{schema}/{object_id}") as resp: - assert resp.status == 200, f"HTTP Status code error, got {resp.status}" - res = await resp.json() - actual = res["metaxIdentifier"] - expected = {"identifier": metax_id, "status": "published"} - assert expected == actual - - async with sess.get(f"{metax_url}/{metax_id}") as metax_resp: - assert metax_resp.status == 200, f"HTTP Status code error, got {resp.status}" - metax_res = await metax_resp.json() - assert metax_res["state"] == "published" + # TODO: This must be updated as Metax identifier will be moved to folder from object after publishing + # for schema, object_id, metax_id in objects: + # async with sess.get(f"{objects_url}/{schema}/{object_id}") as resp: + # assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + # res = await resp.json() + # actual = res["metaxIdentifier"] + # expected = {"identifier": metax_id, "status": "published"} + # assert expected == actual + + # async with sess.get(f"{metax_url}/{metax_id}") as metax_resp: + # assert metax_resp.status == 200, f"HTTP Status code error, got {resp.status}" + # metax_res = await metax_resp.json() + # assert metax_res["state"] == "published" async def test_crud_folders_works(sess): diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 0554e2731..2506c8811 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -154,7 +154,7 @@ async def fake_xmloperator_read_metadata_object(self, schema_type, accession_id) async def fake_xmloperator_create_metadata_object(self, schema_type, content): """Fake create operation to return mocked accessionId.""" - return self.test_ega_string, "title" + return {"accessionId": self.test_ega_string, "title": "title"} async def fake_xmloperator_replace_metadata_object(self, schema_type, accession_id, content): """Fake replace operation to return mocked accessionId.""" @@ -162,7 +162,7 @@ async def fake_xmloperator_replace_metadata_object(self, schema_type, accession_ async def fake_operator_create_metadata_object(self, schema_type, content): """Fake create operation to return mocked accessionId.""" - return self.test_ega_string, "title" + return {"accessionId": self.test_ega_string, "title": "title"} async def fake_operator_update_metadata_object(self, schema_type, accession_id, content): """Fake update operation to return mocked accessionId.""" @@ -376,6 +376,11 @@ async def setUpAsync(self): self.patch_folderoperator = patch(class_folderoperator, **self.folderoperator_config, spec=True) self.MockedFolderOperator = self.patch_folderoperator.start() + class_metaxhandler = "metadata_backend.api.handlers.object.MetaxServiceHandler" + self.patch_metaxhandler = patch(class_metaxhandler, spec=True) + self.MockedMetaxHandler = self.patch_metaxhandler.start() + self.MockedMetaxHandler().post_dataset_as_draft.return_value = "123-456" + async def tearDownAsync(self): """Cleanup mocked stuff.""" await super().tearDownAsync() @@ -383,6 +388,7 @@ async def tearDownAsync(self): self.patch_csv_parser.stop() self.patch_folderoperator.stop() self.patch_operator.stop() + self.patch_metaxhandler.stop() async def test_submit_object_works(self): """Test that submission is handled, XMLOperator is called.""" @@ -444,7 +450,6 @@ async def test_post_object_works_with_csv(self): file_content = self.get_file_data("sample", "EGAformat.csv") self.MockedCSVParser().parse.return_value = [{}, {}, {}] response = await self.client.post("/objects/sample", params={"folder": "some id"}, data=data) - print("=== RESP ===", await response.text()) json_resp = await response.json() self.assertEqual(response.status, 201) self.assertEqual(self.test_ega_string, json_resp[0]["accessionId"]) diff --git a/tests/test_operators.py b/tests/test_operators.py index 054d57290..9121b16ed 100644 --- a/tests/test_operators.py +++ b/tests/test_operators.py @@ -209,9 +209,9 @@ async def test_json_create_passes_and_returns_accessionId(self): "descriptor": {"studyTitle": "Highly", "studyType": "Other"}, } operator.db_service.create.return_value = True - accession, _ = await operator.create_metadata_object("study", data) + data = await operator.create_metadata_object("study", data) operator.db_service.create.assert_called_once() - self.assertEqual(accession, self.accession_id) + self.assertEqual(data["accessionId"], self.accession_id) async def test_json_replace_passes_and_returns_accessionId(self): """Test replace method for JSON works.""" @@ -282,19 +282,19 @@ async def test_xml_create_passes_and_returns_accessionId(self): operator.db_service.create.return_value = True with patch( ("metadata_backend.api.operators.Operator._format_data_to_create_and_add_to_db"), - return_value=(self.accession_id, "title"), + return_value={"accessionId": self.accession_id}, ): with patch("metadata_backend.api.operators.XMLToJSONParser"): - accession, _ = await operator.create_metadata_object("study", "") + data = await operator.create_metadata_object("study", "") operator.db_service.create.assert_called_once() - self.assertEqual(accession, self.accession_id) + self.assertEqual(data["accessionId"], self.accession_id) async def test_correct_data_is_set_to_json_when_creating(self): """Test operator creates object and adds necessary info.""" operator = Operator(self.client) with patch( ("metadata_backend.api.operators.Operator._insert_formatted_object_to_db"), - return_value=self.accession_id, + return_value=True, ) as mocked_insert: with patch("metadata_backend.api.operators.datetime") as m_date: m_date.utcnow.return_value = datetime.datetime(2020, 4, 14) @@ -308,7 +308,7 @@ async def test_correct_data_is_set_to_json_when_creating(self): "publishDate": datetime.datetime(2020, 6, 14), }, ) - self.assertEqual(acc, self.accession_id) + self.assertEqual(acc["accessionId"], self.accession_id) async def test_wrong_data_is_set_to_json_when_replacing(self): """Test operator replace catches error.""" @@ -402,18 +402,18 @@ async def test_correct_data_is_set_to_xml_when_creating(self): xml_data = "" with patch( ("metadata_backend.api.operators.Operator._format_data_to_create_and_add_to_db"), - return_value=(self.accession_id, "title"), + return_value={"accessionId": self.accession_id}, ): with patch( ("metadata_backend.api.operators.XMLOperator._insert_formatted_object_to_db"), - return_value=self.accession_id, + return_value=True, ) as m_insert: with patch("metadata_backend.api.operators.XMLToJSONParser"): acc = await (operator._format_data_to_create_and_add_to_db("study", xml_data)) m_insert.assert_called_once_with( - "xml-study", {"accessionId": self.accession_id, "title": "title", "content": xml_data} + "xml-study", {"accessionId": self.accession_id, "content": xml_data} ) - self.assertEqual(acc, self.accession_id) + self.assertEqual(acc["accessionId"], self.accession_id) async def test_correct_data_is_set_to_xml_when_replacing(self): """Test XMLoperator replaces object and adds necessary info.""" From 77d864adc3c093375de7d2d9d8b037c385c03116 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Fri, 4 Mar 2022 12:55:44 +0000 Subject: [PATCH 271/336] Fix extra DB read during Metax data update Change operators functions responsible for metadata object replace to return full object data. --- metadata_backend/api/handlers/object.py | 19 +++++----- metadata_backend/api/operators.py | 46 +++++++++---------------- tests/test_handlers.py | 4 +-- tests/test_operators.py | 10 +++--- 4 files changed, 34 insertions(+), 45 deletions(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 3ea28b564..d07f49cef 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -287,8 +287,8 @@ async def put_object(self, req: Request) -> Response: LOG.error(reason) raise web.HTTPUnauthorized(reason=reason) - accession_id, title = await operator.replace_metadata_object(collection, accession_id, content) - patch = self.prepare_folder_patch_update_object(collection, accession_id, title, filename) + data = await operator.replace_metadata_object(collection, accession_id, content) + patch = self.prepare_folder_patch_update_object(collection, data, filename) await folder_op.update_folder(folder_id, patch) # Update draft dataset to Metax catalog @@ -338,8 +338,8 @@ async def patch_object(self, req: Request) -> Response: # If there's changed title it will be updated to folder try: - title = content["descriptor"]["studyTitle"] if collection == "study" else content["title"] - patch = self.prepare_folder_patch_update_object(collection, accession_id, title) + _ = content["descriptor"]["studyTitle"] if collection == "study" else content["title"] + patch = self.prepare_folder_patch_update_object(collection, content) await folder_op.update_folder(folder_id, patch) except (TypeError, KeyError): pass @@ -395,9 +395,7 @@ def prepare_folder_patch_new_object(self, schema: str, objects: List, params: Di patch.append(patch_ops) return patch - def prepare_folder_patch_update_object( - self, schema: str, accession_id: str, title: str, filename: str = "" - ) -> List: + def prepare_folder_patch_update_object(self, schema: str, data: Dict, filename: str = "") -> List: """Prepare patch operation for updating object's title in a folder. :param schema: schema of object to be updated @@ -412,8 +410,13 @@ def prepare_folder_patch_update_object( patch_op = { "op": "replace", - "match": {path.replace("/", ""): {"$elemMatch": {"schema": schema, "accessionId": accession_id}}}, + "match": {path.replace("/", ""): {"$elemMatch": {"schema": schema, "accessionId": data["accessionId"]}}}, } + try: + title = data["descriptor"]["studyTitle"] if schema in ["study", "draft-study"] else data["title"] + except (TypeError, KeyError): + title = "" + if not filename: patch_op.update( { diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 10c04b746..57670edef 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -56,9 +56,7 @@ async def create_metadata_object(self, schema_type: str, data: Union[Dict, str]) ) return data - async def replace_metadata_object( - self, schema_type: str, accession_id: str, data: Union[Dict, str] - ) -> Tuple[str, str]: + async def replace_metadata_object(self, schema_type: str, accession_id: str, data: Union[Dict, str]) -> Dict: """Replace metadata object from database. Data formatting and addition step for JSON or XML must be implemented @@ -69,9 +67,9 @@ async def replace_metadata_object( :param data: Data to be saved to database. :returns: Accession id for the object replaced to database """ - accession_id, title = await self._format_data_to_replace_and_add_to_db(schema_type, accession_id, data) + data = await self._format_data_to_replace_and_add_to_db(schema_type, accession_id, data) LOG.info(f"Replacing object with schema {schema_type} to database succeeded with accession id: {accession_id}") - return accession_id, title + return data async def update_metadata_object(self, schema_type: str, accession_id: str, data: Union[Dict, str]) -> str: """Update metadata object from database. @@ -153,7 +151,7 @@ async def _insert_formatted_object_to_db(self, schema_type: str, data: Dict) -> raise web.HTTPBadRequest(reason=reason) return True - async def _replace_object_from_db(self, schema_type: str, accession_id: str, data: Dict) -> Tuple[str, str]: + async def _replace_object_from_db(self, schema_type: str, accession_id: str, data: Dict) -> bool: """Replace formatted metadata object in database. :param schema_type: Schema type of the object to replace. @@ -173,16 +171,11 @@ async def _replace_object_from_db(self, schema_type: str, accession_id: str, dat reason = f"Error happened while getting object: {error}" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - if replace_success: - try: - title = data["descriptor"]["studyTitle"] if schema_type in ["study", "draft-study"] else data["title"] - except (TypeError, KeyError): - title = "" - return accession_id, title - else: + if not replace_success: reason = "Replacing object to database failed for some reason." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) + return True async def _update_object_from_db(self, schema_type: str, accession_id: str, data: Dict) -> str: """Update formatted metadata object in database. @@ -265,9 +258,7 @@ async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Any """ @abstractmethod - async def _format_data_to_replace_and_add_to_db( - self, schema_type: str, accession_id: str, data: Any - ) -> Tuple[str, str]: + async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accession_id: str, data: Any) -> Dict: """Format and replace data in database. Must be implemented by subclass. @@ -460,13 +451,10 @@ async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Dic if schema_type == "study": data["publishDate"] = datetime.utcnow() + relativedelta(months=2) LOG.debug(f"Operator formatted data for {schema_type} to add to DB.") - # return await self._insert_formatted_object_to_db(schema_type, data), data await self._insert_formatted_object_to_db(schema_type, data) return data - async def _format_data_to_replace_and_add_to_db( - self, schema_type: str, accession_id: str, data: Dict - ) -> Tuple[str, str]: + async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accession_id: str, data: Dict) -> Dict: """Format JSON metadata object and replace it in db. Replace information in object before adding to db. @@ -500,7 +488,8 @@ async def _format_data_to_replace_and_add_to_db( except KeyError: pass LOG.debug(f"Operator formatted data for {schema_type} to add to DB") - return await self._replace_object_from_db(schema_type, accession_id, data) + await self._replace_object_from_db(schema_type, accession_id, data) + return data async def _format_data_to_update_and_add_to_db(self, schema_type: str, accession_id: str, data: Any) -> str: """Format and update data in database. @@ -512,11 +501,9 @@ async def _format_data_to_update_and_add_to_db(self, schema_type: str, accession """ forbidden_keys = ["accessionId", "publishDate", "dateCreated"] # check if object already has metax id or is it first time writing it - if schema_type in {"study", "dataset"}: # and data.get("metaxIdentifier", None): + if schema_type in {"study", "dataset"}: read_data = await self.db_service.read(schema_type, accession_id) - # on firs write db doesnt have yet metaxIdentifier and - # on publish metax status inside metaxIdentifier is changed - # so we are checking that metax id is still the same + # on firs write db doesnt have yet metaxIdentifier if read_data.get("metaxIdentifier", None): forbidden_keys.extend(["metaxIdentifier"]) if any(i in data for i in forbidden_keys): @@ -620,9 +607,7 @@ async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: str return data_with_id - async def _format_data_to_replace_and_add_to_db( - self, schema_type: str, accession_id: str, data: str - ) -> Tuple[str, str]: + async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accession_id: str, data: str) -> Dict: """Format XML metadata object and add it to db. XML is validated, then parsed to JSON, which is added to database. @@ -637,13 +622,14 @@ async def _format_data_to_replace_and_add_to_db( # remove `draft-` from schema type schema = schema_type[6:] if schema_type.startswith("draft") else schema_type data_as_json = XMLToJSONParser().parse(schema, data) - accession_id, title = await Operator(db_client)._format_data_to_replace_and_add_to_db( + data_with_id = await Operator(db_client)._format_data_to_replace_and_add_to_db( schema_type, accession_id, data_as_json ) LOG.debug(f"XMLOperator formatted data for xml-{schema_type} to add to DB") - return await self._replace_object_from_db( + await self._replace_object_from_db( f"xml-{schema_type}", accession_id, {"accessionId": accession_id, "content": data} ) + return data_with_id async def _format_data_to_update_and_add_to_db(self, schema_type: str, accession_id: str, data: str) -> str: """Raise not implemented. diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 2506c8811..db4c940d9 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -158,7 +158,7 @@ async def fake_xmloperator_create_metadata_object(self, schema_type, content): async def fake_xmloperator_replace_metadata_object(self, schema_type, accession_id, content): """Fake replace operation to return mocked accessionId.""" - return self.test_ega_string, "title" + return {"accessionId": self.test_ega_string, "title": "title"} async def fake_operator_create_metadata_object(self, schema_type, content): """Fake create operation to return mocked accessionId.""" @@ -170,7 +170,7 @@ async def fake_operator_update_metadata_object(self, schema_type, accession_id, async def fake_operator_replace_metadata_object(self, schema_type, accession_id, content): """Fake replace operation to return mocked accessionId.""" - return self.test_ega_string, "title" + return {"accessionId": self.test_ega_string, "title": "title"} async def fake_operator_delete_metadata_object(self, schema_type, accession_id): """Fake delete operation to await successful operation indicator.""" diff --git a/tests/test_operators.py b/tests/test_operators.py index 9121b16ed..86ec388bf 100644 --- a/tests/test_operators.py +++ b/tests/test_operators.py @@ -223,9 +223,9 @@ async def test_json_replace_passes_and_returns_accessionId(self): operator = Operator(self.client) operator.db_service.exists.return_value = True operator.db_service.replace.return_value = True - accession, _ = await operator.replace_metadata_object("study", self.accession_id, data) + data = await operator.replace_metadata_object("study", self.accession_id, data) operator.db_service.replace.assert_called_once() - self.assertEqual(accession, self.accession_id) + self.assertEqual(data["accessionId"], self.accession_id) async def test_json_replace_raises_if_not_exists(self): """Test replace method raises error.""" @@ -353,7 +353,7 @@ async def test_correct_data_is_set_to_json_when_replacing(self): "metaxIdentifier": {"identifier": 12345}, }, ) - self.assertEqual(acc, self.accession_id) + self.assertEqual(acc["accessionId"], self.accession_id) async def test_correct_data_is_set_to_json_when_updating(self): """Test operator updates object and adds necessary info.""" @@ -422,7 +422,7 @@ async def test_correct_data_is_set_to_xml_when_replacing(self): xml_data = "" with patch( "metadata_backend.api.operators.Operator._format_data_to_replace_and_add_to_db", - return_value=(self.accession_id, "title"), + return_value={"accessionId": self.accession_id}, ): with patch( "metadata_backend.api.operators.XMLOperator._replace_object_from_db", @@ -435,7 +435,7 @@ async def test_correct_data_is_set_to_xml_when_replacing(self): self.accession_id, {"accessionId": self.accession_id, "content": xml_data}, ) - self.assertEqual(acc, self.accession_id) + self.assertEqual(acc["accessionId"], self.accession_id) async def test_deleting_metadata_deletes_json_and_xml(self): """Test metadata is deleted.""" From baac01bd57efb2f8f954108a0085024f5b597f92 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Fri, 4 Mar 2022 13:13:49 +0000 Subject: [PATCH 272/336] Fix private methods names with underscore --- metadata_backend/api/handlers/object.py | 24 ++++++++++++------------ tests/test_handlers.py | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index d07f49cef..c9b26239b 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -166,12 +166,12 @@ async def post_object(self, req: Request) -> Response: if not isinstance(data, List): objects = [json_data] folder_op = FolderOperator(db_client) - patch = self.prepare_folder_patch_new_object(collection, objects, patch_params) + patch = self._prepare_folder_patch_new_object(collection, objects, patch_params) await folder_op.update_folder(folder_id, patch) # Create draft dataset to Metax catalog if collection in {"study", "dataset"}: - [await self.create_metax_dataset(req, collection, item) for item in objects] + [await self._create_metax_dataset(req, collection, item) for item in objects] body = ujson.dumps(data, escape_forward_slashes=False) @@ -239,7 +239,7 @@ async def delete_object(self, req: Request) -> Response: # Delete draft dataset from Metax catalog if collection in {"study", "dataset"}: - await self.delete_metax_dataset(req, metax_id) + await self._delete_metax_dataset(req, metax_id) LOG.info(f"DELETE object with accession ID {accession_id} in schema {collection} was successful.") return web.Response(status=204) @@ -288,12 +288,12 @@ async def put_object(self, req: Request) -> Response: raise web.HTTPUnauthorized(reason=reason) data = await operator.replace_metadata_object(collection, accession_id, content) - patch = self.prepare_folder_patch_update_object(collection, data, filename) + patch = self._prepare_folder_patch_update_object(collection, data, filename) await folder_op.update_folder(folder_id, patch) # Update draft dataset to Metax catalog if collection in {"study", "dataset"}: - await self.update_metax_dataset(req, collection, accession_id) + await self._update_metax_dataset(req, collection, accession_id) body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) LOG.info(f"PUT object with accession ID {accession_id} in schema {collection} was successful.") @@ -339,20 +339,20 @@ async def patch_object(self, req: Request) -> Response: # If there's changed title it will be updated to folder try: _ = content["descriptor"]["studyTitle"] if collection == "study" else content["title"] - patch = self.prepare_folder_patch_update_object(collection, content) + patch = self._prepare_folder_patch_update_object(collection, content) await folder_op.update_folder(folder_id, patch) except (TypeError, KeyError): pass # Update draft dataset to Metax catalog if collection in {"study", "dataset"}: - await self.update_metax_dataset(req, collection, accession_id) + await self._update_metax_dataset(req, collection, accession_id) body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) LOG.info(f"PATCH object with accession ID {accession_id} in schema {collection} was successful.") return web.Response(body=body, status=200, content_type="application/json") - def prepare_folder_patch_new_object(self, schema: str, objects: List, params: Dict[str, str]) -> List: + def _prepare_folder_patch_new_object(self, schema: str, objects: List, params: Dict[str, str]) -> List: """Prepare patch operations list for adding an object or objects to a folder. :param schema: schema of objects to be added to the folder @@ -395,7 +395,7 @@ def prepare_folder_patch_new_object(self, schema: str, objects: List, params: Di patch.append(patch_ops) return patch - def prepare_folder_patch_update_object(self, schema: str, data: Dict, filename: str = "") -> List: + def _prepare_folder_patch_update_object(self, schema: str, data: Dict, filename: str = "") -> List: """Prepare patch operation for updating object's title in a folder. :param schema: schema of object to be updated @@ -434,7 +434,7 @@ def prepare_folder_patch_update_object(self, schema: str, data: Dict, filename: return [patch_op] # TODO: update doi related code - async def create_metax_dataset(self, req: Request, collection: str, object: Dict) -> str: + async def _create_metax_dataset(self, req: Request, collection: str, object: Dict) -> str: """Handle connection to Metax api handler. Sends Dataset or Study object's data to Metax api handler. @@ -461,7 +461,7 @@ async def create_metax_dataset(self, req: Request, collection: str, object: Dict return metax_id # TODO: update doi related code - async def update_metax_dataset(self, req: Request, collection: str, accession_id: str) -> str: + async def _update_metax_dataset(self, req: Request, collection: str, accession_id: str) -> str: """Handle connection to Metax api handler. Sends Dataset or Study object's data to Metax api handler. @@ -485,7 +485,7 @@ async def update_metax_dataset(self, req: Request, collection: str, accession_id raise ValueError("Object's data must be dictionary") return metax_id - async def delete_metax_dataset(self, req: Request, metax_id: str) -> None: + async def _delete_metax_dataset(self, req: Request, metax_id: str) -> None: """Handle deletion of Study or Dataset object from Metax service. :param req: HTTP request diff --git a/tests/test_handlers.py b/tests/test_handlers.py index db4c940d9..5d42bc880 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -103,7 +103,7 @@ async def setUpAsync(self): RESTAPIHandler._handle_check_ownedby_user = make_mocked_coro(True) ObjectAPIHandler.create_or_update_metax_dataset = make_mocked_coro("111-222-333") - ObjectAPIHandler.delete_metax_dataset = make_mocked_coro() + ObjectAPIHandler._delete_metax_dataset = make_mocked_coro() async def tearDownAsync(self): """Cleanup mocked stuff.""" From b87175855397a8c65c58fb945e35e4be95c709a3 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 10 Jan 2022 09:20:47 +0200 Subject: [PATCH 273/336] add study and dataset identifiers to folder schema --- metadata_backend/helpers/schemas/folders.json | 89 +++++++++++++++---- 1 file changed, 73 insertions(+), 16 deletions(-) diff --git a/metadata_backend/helpers/schemas/folders.json b/metadata_backend/helpers/schemas/folders.json index 5321549a9..0bece1442 100644 --- a/metadata_backend/helpers/schemas/folders.json +++ b/metadata_backend/helpers/schemas/folders.json @@ -933,21 +933,86 @@ "type": "object", "title": "The extra DOI info schema", "properties": { - "identifier": { + "studyIdentifier": { "type": "object", - "title": "identifier object", "required": [ - "identifierType", - "doi" + "accessionId", + "identifier", + "url" ], "properties": { - "identifierType": { + "identifier": { + "type": "object", + "title": "identifier object", + "required": [ + "identifierType", + "doi" + ], + "properties": { + "identifierType": { + "type": "string", + "title": "Type of identifier (= DOI)" + }, + "doi": { + "type": "string", + "title": "A persistent identifier for a resource" + } + } + }, + "accessionId": { "type": "string", - "title": "Type of identifier (= DOI)" + "title": "Internal accessionId for the study" }, - "doi": { + "url": { "type": "string", - "title": "A persistent identifier for a resource" + "title": "URL of the digital location of the object" + }, + "types": { + "type": "object", + "title": "Type info of the resource. Multiple types can be listed: ResourceTypeGeneral, schemaOrg etc." + } + } + }, + "datasetIdentifiers": { + "type": "array", + "items": { + "type": "object", + "required": [ + "accessionId", + "identifier", + "url" + ], + "properties": { + "identifier": { + "type": "object", + "title": "identifier object", + "required": [ + "identifierType", + "doi" + ], + "properties": { + "identifierType": { + "type": "string", + "title": "Type of identifier (= DOI)" + }, + "doi": { + "type": "string", + "title": "A persistent identifier for a resource" + } + } + }, + "accessionId": { + "type": "string", + "title": "Internal accessionid for the dataset" + }, + "url": { + "type": "string", + "title": "URL of the digital location of the object" + }, + "types": { + "type": "object", + "title": "Type info of the resource. Multiple types can be listed: ResourceTypeGeneral, schemaOrg etc." + } } } }, @@ -959,14 +1024,6 @@ "type": "integer", "title": "Publication Year" }, - "types": { - "type": "object", - "title": "Type info of the resource. Multiple types can be listed: ResourceTypeGeneral, schemaOrg etc." - }, - "url": { - "type": "string", - "title": "URL of the digital location of the object" - }, "version": { "type": "string", "title": "Version number of the resource" From 9ec3bbea3363921f64bf5d7384e8d4b7f739fe1f Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 25 Jan 2022 10:10:54 +0200 Subject: [PATCH 274/336] make dataset description mandatory this is for metax/ etsin to add description to a study resource --- metadata_backend/helpers/schemas/ena_dataset.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/metadata_backend/helpers/schemas/ena_dataset.json b/metadata_backend/helpers/schemas/ena_dataset.json index f3fc832b3..d781ce6be 100644 --- a/metadata_backend/helpers/schemas/ena_dataset.json +++ b/metadata_backend/helpers/schemas/ena_dataset.json @@ -241,8 +241,10 @@ "type": "object", "description": "Describes an object that contains data access policy information.", "required": [ - "title" + "title", + "description" ], + "additionalProperties": true, "properties": { "title": { "title": "Dataset Title", From de388c9f73518773e5e2969ca24bb964fbaf0ad0 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 25 Jan 2022 10:18:58 +0200 Subject: [PATCH 275/336] make study abstract mandatory this is for metax/ etsin to add description to a study resource --- .../helpers/schemas/ena_study.json | 4 ++- tests/test_handlers.py | 36 +++++++++++++++---- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/metadata_backend/helpers/schemas/ena_study.json b/metadata_backend/helpers/schemas/ena_study.json index 075399396..da2c2e823 100644 --- a/metadata_backend/helpers/schemas/ena_study.json +++ b/metadata_backend/helpers/schemas/ena_study.json @@ -188,13 +188,15 @@ "required": [ "descriptor" ], + "additionalProperties": true, "properties": { "descriptor": { "type": "object", "title": "Study Description", "required": [ "studyTitle", - "studyType" + "studyType", + "studyAbstract" ], "properties": { "studyTitle": { diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 5d42bc880..d8c1908b5 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -404,7 +404,11 @@ async def test_submit_object_works_with_json(self): json_req = { "centerName": "GEO", "alias": "GSE10966", - "descriptor": {"studyTitle": "Highly", "studyType": "Other"}, + "descriptor": { + "studyTitle": "Highly", + "studyType": "Other", + "studyAbstract": "abstract description for testing", + }, } response = await self.client.post("/objects/study", params={"folder": "some id"}, json=json_req) self.assertEqual(response.status, 201) @@ -424,7 +428,11 @@ async def test_submit_object_bad_field_json(self): json_req = { "centerName": "GEO", "alias": "GSE10966", - "descriptor": {"studyTitle": "Highly", "studyType": "ceva"}, + "descriptor": { + "studyTitle": "Highly", + "studyType": "ceva", + "studyAbstract": "abstract description for testing", + }, } response = await self.client.post("/objects/study", params={"folder": "some id"}, json=json_req) reason = "Provided input does not seem correct for field: 'descriptor'" @@ -436,7 +444,11 @@ async def test_post_object_bad_json(self): json_req = { "centerName": "GEO", "alias": "GSE10966", - "descriptor": {"studyTitle": "Highly", "studyType": "Other"}, + "descriptor": { + "studyTitle": "Highly", + "studyType": "Other", + "studyAbstract": "abstract description for testing", + }, } response = await self.client.post("/objects/study", params={"folder": "some id"}, data=json_req) reason = "JSON is not correctly formatted. See: Expecting value: line 1 column 1" @@ -478,7 +490,11 @@ async def test_put_object_bad_json(self): json_req = { "centerName": "GEO", "alias": "GSE10966", - "descriptor": {"studyTitle": "Highly", "studyType": "Other"}, + "descriptor": { + "studyTitle": "Highly", + "studyType": "Other", + "studyAbstract": "abstract description for testing", + }, } call = "/drafts/study/EGA123456" response = await self.client.put(call, data=json_req) @@ -500,7 +516,11 @@ async def test_submit_draft_works_with_json(self): json_req = { "centerName": "GEO", "alias": "GSE10966", - "descriptor": {"studyTitle": "Highly", "studyType": "Other"}, + "descriptor": { + "studyTitle": "Highly", + "studyType": "Other", + "studyAbstract": "abstract description for testing", + }, } response = await self.client.post("/drafts/study", params={"folder": "some id"}, json=json_req) self.assertEqual(response.status, 201) @@ -512,7 +532,11 @@ async def test_put_draft_works_with_json(self): json_req = { "centerName": "GEO", "alias": "GSE10966", - "descriptor": {"studyTitle": "Highly", "studyType": "Other"}, + "descriptor": { + "studyTitle": "Highly", + "studyType": "Other", + "studyAbstract": "abstract description for testing", + }, } call = "/drafts/study/EGA123456" response = await self.client.put(call, json=json_req) From 842040fc4f1487c2f19007e668de92e8db0bbbfe Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 25 Jan 2022 10:19:06 +0200 Subject: [PATCH 276/336] add keywords to folder doi info we can collect keywords with other doi info, for metax/etsin integration --- metadata_backend/helpers/schemas/datacite.json | 14 ++++++++++++-- metadata_backend/helpers/schemas/folders.json | 14 ++++++++++++-- tests/test_files/doi/test_doi.json | 6 +++++- 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/metadata_backend/helpers/schemas/datacite.json b/metadata_backend/helpers/schemas/datacite.json index 3a641f729..3706beabe 100644 --- a/metadata_backend/helpers/schemas/datacite.json +++ b/metadata_backend/helpers/schemas/datacite.json @@ -3,7 +3,8 @@ "title": "Datacite DOI Registration Information", "required": [ "creators", - "subjects" + "subjects", + "keywords" ], "properties": { "creators": { @@ -94,7 +95,7 @@ "subjects": { "type": "array", "title": "Subjects", - "description": "Subject, keyword, classification code, or key phrase describing the resources specified by OECD Fields of Science and Technology (FOS)", + "description": "Subject, classification code, or key phrase describing the resources specified by OECD Fields of Science and Technology (FOS)", "items": { "type": "object", "title": "Subjects", @@ -161,6 +162,15 @@ }, "uniqueItems": true }, + "keywords": { + "type": "array", + "title": "Keywords", + "description": "A keyword or tag describing the resources. It is recommended to use a controlled vocabulary, ontology or classification when choosing keywords. At least one keyword is required.", + "items": { + "minLength": 1, + "type": "string" + } + }, "contributors": { "type": "array", "title": "Contributors", diff --git a/metadata_backend/helpers/schemas/folders.json b/metadata_backend/helpers/schemas/folders.json index 0bece1442..4e5172493 100644 --- a/metadata_backend/helpers/schemas/folders.json +++ b/metadata_backend/helpers/schemas/folders.json @@ -44,7 +44,8 @@ "title": "Datacite DOI Registration Information", "required": [ "creators", - "subjects" + "subjects", + "keywords" ], "properties": { "creators": { @@ -140,7 +141,7 @@ "subjects": { "type": "array", "title": "Subjects", - "description": "Subject, keyword, classification code, or key phrase describing the resources specified by OECD Fields of Science and Technology (FOS)", + "description": "Subject, classification code, or key phrase describing the resources specified by OECD Fields of Science and Technology (FOS)", "items": { "type": "object", "title": "Subjects", @@ -211,6 +212,15 @@ }, "uniqueItems": true }, + "keywords": { + "type": "array", + "title": "Keywords", + "description": "A keyword or tag describing the resources. It is recommended to use a controlled vocabulary, ontology or classification when choosing keywords. At least one keyword is required.", + "items": { + "minLength": 1, + "type": "string" + } + }, "contributors": { "type": "array", "title": "Contributors", diff --git a/tests/test_files/doi/test_doi.json b/tests/test_files/doi/test_doi.json index 67a8b55ae..17fa355a7 100644 --- a/tests/test_files/doi/test_doi.json +++ b/tests/test_files/doi/test_doi.json @@ -22,6 +22,10 @@ "subjectScheme": "Fields of Science and Technology (FOS)" } ], + "keywords": [ + "test", + "keyword" + ], "contributors": [ { "name": "Contributor, Test", @@ -39,4 +43,4 @@ "contributorType": "Researcher" } ] -} +} \ No newline at end of file From 5dd22d49c544f17f97cb7c29f209487ec10ac499 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 15 Feb 2022 15:55:22 +0200 Subject: [PATCH 277/336] not all functions need to be async --- metadata_backend/api/handlers/folder.py | 2 +- metadata_backend/api/handlers/object.py | 2 +- metadata_backend/api/handlers/restapi.py | 2 +- metadata_backend/api/handlers/user.py | 49 +++++++++++++++++++++++- 4 files changed, 51 insertions(+), 4 deletions(-) diff --git a/metadata_backend/api/handlers/folder.py b/metadata_backend/api/handlers/folder.py index 5768a25be..9f20c461b 100644 --- a/metadata_backend/api/handlers/folder.py +++ b/metadata_backend/api/handlers/folder.py @@ -157,7 +157,7 @@ async def get_folders(self, req: Request) -> Response: ) url = f"{req.scheme}://{req.host}{req.path}" - link_headers = await self._header_links(url, page, per_page, total_folders) + link_headers = self._header_links(url, page, per_page, total_folders) LOG.debug(f"Pagination header links: {link_headers}") LOG.info(f"Querying for project={project_id} folders resulted in {total_folders} folders") return web.Response( diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index c9b26239b..1ae26457a 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -52,7 +52,7 @@ async def _handle_query(self, req: Request) -> Response: escape_forward_slashes=False, ) url = f"{req.scheme}://{req.host}{req.path}" - link_headers = await self._header_links(url, page_num, per_page, total_objects) + link_headers = self._header_links(url, page_num, per_page, total_objects) LOG.debug(f"Pagination header links: {link_headers}") LOG.info(f"Querying for objects in {collection} resulted in {total_objects} objects ") return web.Response( diff --git a/metadata_backend/api/handlers/restapi.py b/metadata_backend/api/handlers/restapi.py index 96dcf4462..81d575265 100644 --- a/metadata_backend/api/handlers/restapi.py +++ b/metadata_backend/api/handlers/restapi.py @@ -174,7 +174,7 @@ async def get_json_schema(self, req: Request) -> Response: LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - async def _header_links(self, url: str, page: int, size: int, total_objects: int) -> CIMultiDict[str]: + def _header_links(self, url: str, page: int, size: int, total_objects: int) -> CIMultiDict[str]: """Create link header for pagination. :param url: base url for request diff --git a/metadata_backend/api/handlers/user.py b/metadata_backend/api/handlers/user.py index a33bff7df..1523b48fe 100644 --- a/metadata_backend/api/handlers/user.py +++ b/metadata_backend/api/handlers/user.py @@ -1,14 +1,18 @@ """Handle HTTP methods for server.""" +from math import ceil +from typing import Dict, Tuple + import ujson from aiohttp import web from aiohttp.web import Request, Response +from multidict import CIMultiDict from ...conf.conf import aai_config from ...helpers.logger import LOG -from .restapi import RESTAPIHandler from ..middlewares import decrypt_cookie, get_session from ..operators import UserOperator +from .restapi import RESTAPIHandler class UserAPIHandler(RESTAPIHandler): @@ -70,3 +74,46 @@ async def delete_user(self, req: Request) -> Response: ) LOG.debug("Logged out user ") raise response + + async def _get_user_items(self, req: Request, user: Dict, item_type: str) -> Tuple[Dict, CIMultiDict[str]]: + """Get draft templates owned by the user with pagination values. + + :param req: GET request + :param user: User object + :param item_type: Name of the items ("templates" or "folders") + :raises: HTTPUnauthorized if not current user + :returns: Paginated list of user draft templates and link header + """ + # Check item_type parameter is not faulty + if item_type not in ["templates", "folders"]: + reason = f"{item_type} is a faulty item parameter. Should be either folders or templates" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + page = self._get_page_param(req, "page", 1) + per_page = self._get_page_param(req, "per_page", 5) + + db_client = req.app["db_client"] + operator = UserOperator(db_client) + user_id = req.match_info["userId"] + + query = {"userId": user} + + items, total_items = await operator.filter_user(query, item_type, page, per_page) + LOG.info(f"GET user with ID {user_id} was successful.") + + result = { + "page": { + "page": page, + "size": per_page, + "totalPages": ceil(total_items / per_page), + "total" + item_type.title(): total_items, + }, + item_type: items, + } + + url = f"{req.scheme}://{req.host}{req.path}" + link_headers = self._header_links(url, page, per_page, total_items) + LOG.debug(f"Pagination header links: {link_headers}") + LOG.info(f"Querying for user's {item_type} resulted in {total_items} {item_type}") + return result, link_headers From 0fe5c226c6df02a1b67036fdf9aa03a593e88b00 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 16 Feb 2022 12:44:05 +0200 Subject: [PATCH 278/336] correct error reasons for operators --- metadata_backend/api/operators.py | 71 +++++++++++++++++++++++++++---- 1 file changed, 63 insertions(+), 8 deletions(-) diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 57670edef..11ef86204 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -12,12 +12,12 @@ from multidict import MultiDictProxy from pymongo.errors import ConnectionFailure, OperationFailure -from .middlewares import get_session from ..conf.conf import mongo_database, query_map from ..database.db_service import DBService, auto_reconnect from ..helpers.logger import LOG from ..helpers.parser import XMLToJSONParser from ..helpers.validator import JSONValidator +from .middlewares import get_session class BaseOperator(ABC): @@ -104,7 +104,7 @@ async def read_metadata_object(self, schema_type: str, accession_id: str) -> Tup raise web.HTTPNotFound() data = await self._format_read_data(schema_type, data_raw) except (ConnectionFailure, OperationFailure) as error: - reason = f"Error happened while getting object: {error}" + reason = f"Error happened while reading object: {error}" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) return data, self.content_type @@ -712,7 +712,7 @@ async def check_object_in_folder(self, collection: str, accession_id: str) -> Tu ) folder_check = [folder async for folder in folder_cursor] except (ConnectionFailure, OperationFailure) as error: - reason = f"Error happened while inserting user: {error}" + reason = f"Error happened while checking object in folder: {error}" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) @@ -732,7 +732,7 @@ async def get_collection_objects(self, folder_id: str, collection: str) -> List: """List objects ids per collection. :param collection: collection it belongs to, it would be used as path - :returns: count of objects + :returns: List of objects """ try: folder_path = "drafts" if collection.startswith("draft") else "metadataObjects" @@ -742,7 +742,7 @@ async def get_collection_objects(self, folder_id: str, collection: str) -> List: ) folders = [folder async for folder in folder_cursor] except (ConnectionFailure, OperationFailure) as error: - reason = f"Error happened while inserting user: {error}" + reason = f"Error happened while getting collection objects: {error}" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) @@ -852,7 +852,7 @@ async def update_folder(self, folder_id: str, patch: List) -> str: try: update_success = await self.db_service.patch("folder", folder_id, patch) except (ConnectionFailure, OperationFailure) as error: - reason = f"Error happened while getting folder: {error}" + reason = f"Error happened while updating folder: {error}" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) @@ -878,7 +878,7 @@ async def remove_object(self, folder_id: str, collection: str, accession_id: str upd_content = {folder_path: {"accessionId": accession_id}} await self.db_service.remove("folder", folder_id, upd_content) except (ConnectionFailure, OperationFailure) as error: - reason = f"Error happened while getting user: {error}" + reason = f"Error happened while removing object from folder: {error}" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) @@ -1114,7 +1114,7 @@ async def update_user(self, user_id: str, patch: List) -> str: await self._check_user_exists(user_id) update_success = await self.db_service.patch("user", user_id, patch) except (ConnectionFailure, OperationFailure) as error: - reason = f"Error happened while getting user: {error}" + reason = f"Error happened while updating user: {error}" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) @@ -1126,6 +1126,61 @@ async def update_user(self, user_id: str, patch: List) -> str: LOG.info(f"Updating user with id {user_id} to database succeeded.") return user_id + async def assign_objects(self, user_id: str, collection: str, object_ids: List) -> None: + """Assing object to user. + + An object can be folder(s) or templates(s). + + :param user_id: ID of user to update + :param collection: collection where to remove the id from + :param object_ids: ID or list of IDs of folder(s) to assign + :raises: HTTPBadRequest if assigning templates/folders to user was not successful + returns: None + """ + try: + await self._check_user_exists(user_id) + assign_success = await self.db_service.append( + "user", user_id, {collection: {"$each": object_ids, "$position": 0}} + ) + except (ConnectionFailure, OperationFailure) as error: + reason = f"Error happened while assigning objects to user: {error}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + if not assign_success: + reason = "Assigning objects to user failed." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + LOG.info(f"Assigning {object_ids} from {user_id} succeeded.") + + async def remove_objects(self, user_id: str, collection: str, object_ids: List) -> None: + """Remove object from user. + + An object can be folder(s) or template(s). + + :param user_id: ID of user to update + :param collection: collection where to remove the id from + :param object_ids: ID or list of IDs of folder(s) to remove + :raises: HTTPBadRequest if db connection fails + returns: None + """ + remove_content: Dict + try: + await self._check_user_exists(user_id) + for obj in object_ids: + if collection == "templates": + remove_content = {"templates": {"accessionId": obj}} + else: + remove_content = {"folders": obj} + await self.db_service.remove("user", user_id, remove_content) + except (ConnectionFailure, OperationFailure) as error: + reason = f"Error happened while removing objects from user: {error}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + LOG.info(f"Removing {object_ids} from {user_id} succeeded.") + async def delete_user(self, user_id: str) -> str: """Delete user object from database. From 882ed07a8e81515ccb2deea044bb8125fe683eae Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 16 Feb 2022 16:35:37 +0200 Subject: [PATCH 279/336] create draft dois for study and datasets restrict one study per folder. deprecate creating draft when publishing folder --- metadata_backend/api/handlers/folder.py | 17 ------ metadata_backend/api/handlers/object.py | 77 +++++++++++++++++++++++++ metadata_backend/helpers/doi.py | 6 +- tests/test_doi.py | 4 +- tests/test_handlers.py | 42 +++++++++----- 5 files changed, 108 insertions(+), 38 deletions(-) diff --git a/metadata_backend/api/handlers/folder.py b/metadata_backend/api/handlers/folder.py index 9f20c461b..313916ca8 100644 --- a/metadata_backend/api/handlers/folder.py +++ b/metadata_backend/api/handlers/folder.py @@ -11,7 +11,6 @@ from multidict import CIMultiDict from ...conf.conf import publisher -from ...helpers.doi import DOIHandler from ...helpers.logger import LOG from ...helpers.validator import JSONValidator from ...helpers.metax_api_handler import MetaxServiceHandler @@ -275,9 +274,6 @@ async def publish_folder(self, req: Request) -> Response: obj_ops = Operator(db_client) # Create draft DOI and delete draft objects from the folder - doi = DOIHandler() - doi_data = await doi.create_draft_doi() - identifier = {"identifierType": "DOI", "doi": doi_data["fullDOI"]} for obj in folder["drafts"]: await obj_ops.delete_metadata_object(obj["schema"], obj["accessionId"]) @@ -287,20 +283,7 @@ async def publish_folder(self, req: Request) -> Response: {"op": "replace", "path": "/published", "value": True}, {"op": "replace", "path": "/drafts", "value": []}, {"op": "add", "path": "/datePublished", "value": int(datetime.now().timestamp())}, - {"op": "add", "path": "/extraInfo/identifier", "value": identifier}, - {"op": "add", "path": "/extraInfo/url", "value": doi_data["dataset"]}, {"op": "add", "path": "/extraInfo/publisher", "value": publisher}, - { - "op": "add", - "path": "/extraInfo/types", - "value": { - "ris": "DATA", - "bibtex": "misc", - "citeproc": "dataset", - "schemaOrg": "Dataset", - "resourceTypeGeneral": "Dataset", - }, - }, {"op": "add", "path": "/extraInfo/publicationYear", "value": date.today().year}, ] new_folder = await operator.update_folder(folder_id, patch) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 1ae26457a..881aefed7 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -13,11 +13,57 @@ from ..operators import FolderOperator, Operator, XMLOperator from .common import multipart_content from .restapi import RESTAPIHandler +from ...helpers.doi import DOIHandler class ObjectAPIHandler(RESTAPIHandler): """API Handler for Objects.""" + def __init__(self) -> None: + """Init Object handler.""" + super().__init__() + self.doi = DOIHandler() + + async def _draft_doi(self, schema_type: str) -> Dict: + """Create draft DOI for study and dataset. + + The Draft DOI will be created only on POST and the data added to the + folder. Any update of this should not be possible. + + :param schema_type: schema can be either study or dataset + :returns: Dict with DOI of the study or dataset as well as the types. + """ + _doi_data = await self.doi.create_draft(prefix=schema_type) + + LOG.debug(f"doi created with doi: {_doi_data['fullDOI']}") + + data: Dict = {} + if schema_type == "study": + data["identifier"] = { + "identifierType": "DOI", + "doi": _doi_data["fullDOI"], + } + data["types"] = { + "bibtex": "misc", + "citeproc": "collection", + "schemaOrg": "Collection", + "resourceTypeGeneral": "Collection", + } + elif schema_type == "dataset": + data["identifier"] = { + "identifierType": "DOI", + "doi": _doi_data["fullDOI"], + } + data["types"] = { + "ris": "DATA", + "bibtex": "misc", + "citeproc": "dataset", + "schemaOrg": "Dataset", + "resourceTypeGeneral": "Dataset", + } + + return data + async def _handle_query(self, req: Request) -> Response: """Handle query results. @@ -101,6 +147,7 @@ async def post_object(self, req: Request) -> Response: :returns: JSON response containing accessionId for submitted object """ _allowed_csv = ["sample"] + _allowed_doi = {"study", "dataset"} schema_type = req.match_info["schema"] folder_id = req.query.get("folder", "") @@ -115,6 +162,16 @@ async def post_object(self, req: Request) -> Response: collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type db_client = req.app["db_client"] + folder_op = FolderOperator(db_client) + + # we need to check if there is already a study in a folder + # we only allow one study per folder + if not req.path.startswith("/drafts") and schema_type == "study": + _ids = await folder_op.get_collection_objects(folder_id, collection) + if len(_ids) == 1: + reason = "Only one study is allowed per submission." + raise web.HTTPBadRequest(reason=reason) + content: Union[Dict[str, Any], str, List[Tuple[Any, str]]] operator: Union[Operator, XMLOperator] if req.content_type == "multipart/form-data": @@ -505,3 +562,23 @@ async def create_doi(self) -> str: rand = str(uuid4()).split("-")[1:3] return f"10.{rand[0]}/{rand[1]}" + + async def _prepare_folder_patch_doi(self, schema: str, ids: List) -> List: + """Prepare patch operation for updating object's doi information in a folder. + + :param schema: schema of object to be updated + :param ids: object IDs + :returns: dict with patch operation + """ + patch = [] + for id in ids: + _data = await self._draft_doi(schema) + _data["accessionId"] = id["accessionId"] + if schema == "study": + patch_op = {"op": "add", "path": "/extraInfo/studyIdentifier", "value": _data} + patch.append(patch_op) + elif schema == "dataset": + patch_op = {"op": "add", "path": "/extraInfo/datasetIdentifiers/-", "value": _data} + patch.append(patch_op) + + return patch diff --git a/metadata_backend/helpers/doi.py b/metadata_backend/helpers/doi.py index 769a47ab0..c3de8ac51 100644 --- a/metadata_backend/helpers/doi.py +++ b/metadata_backend/helpers/doi.py @@ -3,7 +3,7 @@ The DOI handler from SDA orchestration was used as reference: https://github.com/neicnordic/sda-orchestration/blob/master/sda_orchestrator/utils/id_ops.py """ -from typing import Dict +from typing import Dict, Union from uuid import uuid4 from aiohttp import web, ClientSession, BasicAuth @@ -23,10 +23,10 @@ def __init__(self) -> None: self.doi_key = conf.doi_key self.doi_url = f"{conf.datacite_url.rstrip('/')}/{self.doi_prefix}" - async def create_draft_doi(self) -> Dict: + async def create_draft(self, prefix: Union[str, None] = None) -> Dict: """Generate random suffix and POST request a draft DOI to DataCite DOI API.""" suffix = uuid4().hex[:10] - doi_suffix = f"{suffix[:4]}-{suffix[4:]}" + doi_suffix = f"{prefix}.{suffix[:4]}-{suffix[4:]}" if prefix else f"{suffix[:4]}-{suffix[4:]}" headers = {"Content-Type": "application/json"} doi_payload = {"data": {"type": "dois", "attributes": {"doi": f"{self.doi_prefix}/{doi_suffix}"}}} diff --git a/tests/test_doi.py b/tests/test_doi.py index 9a18defe0..0e8818aa6 100644 --- a/tests/test_doi.py +++ b/tests/test_doi.py @@ -19,7 +19,7 @@ async def test_400_is_raised(self): with patch("aiohttp.ClientSession.post") as mocked_post: mocked_post.return_value.status_code = 400 with self.assertRaises(web.HTTPBadRequest) as err: - await self.doi.create_draft_doi() + await self.doi.create_draft() self.assertEqual(str(err.exception), "DOI API draft creation request failed with code: 400") async def test_create_doi_draft_works(self): @@ -39,7 +39,7 @@ async def test_create_doi_draft_works(self): } } - output = await self.doi.create_draft_doi() + output = await self.doi.create_draft() assert mocked_post.called result = {"fullDOI": "10.xxxx/yyyyy", "dataset": "https://doi.org/10.xxxx/yyyyy"} self.assertEqual(output, result) diff --git a/tests/test_handlers.py b/tests/test_handlers.py index d8c1908b5..4fbf112fe 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -76,6 +76,19 @@ async def setUpAsync(self): "name": "tester", } + self._draf_doi_data = { + "identifier": { + "identifierType": "DOI", + "doi": "https://doi.org/10.xxxx/yyyyy", + }, + "types": { + "bibtex": "misc", + "citeproc": "collection", + "schemaOrg": "Collection", + "resourceTypeGeneral": "Collection", + }, + } + self.operator_config = { "read_metadata_object.side_effect": self.fake_operator_read_metadata_object, "query_metadata_database.side_effect": self.fake_operator_query_metadata_object, @@ -107,6 +120,7 @@ async def setUpAsync(self): async def tearDownAsync(self): """Cleanup mocked stuff.""" + await self.client.close() def create_submission_data(self, files): @@ -360,6 +374,8 @@ async def setUpAsync(self): await super().setUpAsync() + self._mock_draf_doi = "metadata_backend.api.handlers.object.ObjectAPIHandler._draft_doi" + class_xmloperator = "metadata_backend.api.handlers.object.XMLOperator" self.patch_xmloperator = patch(class_xmloperator, **self.xmloperator_config, spec=True) self.MockedXMLOperator = self.patch_xmloperator.start() @@ -394,10 +410,11 @@ async def test_submit_object_works(self): """Test that submission is handled, XMLOperator is called.""" files = [("study", "SRP000539.xml")] data = self.create_submission_data(files) - response = await self.client.post("/objects/study", params={"folder": "some id"}, data=data) - self.assertEqual(response.status, 201) - self.assertIn(self.test_ega_string, await response.text()) - self.MockedXMLOperator().create_metadata_object.assert_called_once() + with patch(self._mock_draf_doi, return_value=self._draf_doi_data): + response = await self.client.post("/objects/study", params={"folder": "some id"}, data=data) + self.assertEqual(response.status, 201) + self.assertIn(self.test_ega_string, await response.text()) + self.MockedXMLOperator().create_metadata_object.assert_called_once() async def test_submit_object_works_with_json(self): """Test that JSON submission is handled, operator is called.""" @@ -410,10 +427,11 @@ async def test_submit_object_works_with_json(self): "studyAbstract": "abstract description for testing", }, } - response = await self.client.post("/objects/study", params={"folder": "some id"}, json=json_req) - self.assertEqual(response.status, 201) - self.assertIn(self.test_ega_string, await response.text()) - self.MockedOperator().create_metadata_object.assert_called_once() + with patch(self._mock_draf_doi, return_value=self._draf_doi_data): + response = await self.client.post("/objects/study", params={"folder": "some id"}, json=json_req) + self.assertEqual(response.status, 201) + self.assertIn(self.test_ega_string, await response.text()) + self.MockedOperator().create_metadata_object.assert_called_once() async def test_submit_object_missing_field_json(self): """Test that JSON has missing property.""" @@ -723,11 +741,6 @@ async def setUpAsync(self): await super().setUpAsync() - self.test_draft_doi = {"fullDOI": "10.xxxx/yyyyy", "dataset": "https://doi.org/10.xxxx/yyyyy"} - class_doihandler = "metadata_backend.api.handlers.folder.DOIHandler" - self.patch_doihandler = patch(class_doihandler, spec=True) - self.MockedDoiHandler = self.patch_doihandler.start() - class_folderoperator = "metadata_backend.api.handlers.folder.FolderOperator" self.patch_folderoperator = patch(class_folderoperator, **self.folderoperator_config, spec=True) self.MockedFolderOperator = self.patch_folderoperator.start() @@ -747,7 +760,6 @@ async def setUpAsync(self): async def tearDownAsync(self): """Cleanup mocked stuff.""" await super().tearDownAsync() - self.patch_doihandler.stop() self.patch_folderoperator.stop() self.patch_useroperator.stop() self.patch_operator.stop() @@ -869,11 +881,9 @@ async def test_update_folder_passes(self): async def test_folder_is_published(self): """Test that folder would be published and DOI would be added.""" - self.MockedDoiHandler().create_draft_doi.return_value = self.test_draft_doi self.MockedFolderOperator().update_folder.return_value = self.folder_id self.MockedMetaxHandler().publish_dataset.return_value = None response = await self.client.patch("/publish/FOL12345678") - self.MockedDoiHandler().create_draft_doi.assert_called_once() self.MockedFolderOperator().update_folder.assert_called_once() self.assertEqual(response.status, 200) json_resp = await response.json() From 7ed442f0268174ff1c4ce35efe7d39aefbd06b79 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 17 Feb 2022 19:02:18 +0200 Subject: [PATCH 280/336] Resource Type formatted for front-end display --- .../helpers/schemas/datacite.json | 20 ++++++++--------- metadata_backend/helpers/schemas/folders.json | 22 +++++++++---------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/metadata_backend/helpers/schemas/datacite.json b/metadata_backend/helpers/schemas/datacite.json index 3706beabe..0e205c93c 100644 --- a/metadata_backend/helpers/schemas/datacite.json +++ b/metadata_backend/helpers/schemas/datacite.json @@ -712,23 +712,23 @@ "enum": [ "Audiovisual", "Book", - "BookChapter", + "Book Chapter", "Collection", - "ComputationalNotebook", - "ConferencePaper", - "ConferenceProceeding", - "DataPaper", + "Computational Notebook", + "Conference Paper", + "Conference Proceeding", + "Data Paper", "Dataset", "Dissertation", "Event", "Image", - "InteractiveResource", + "Interactive Resource", "Journal", - "JournalArticle", + "Journal Article", "Model", - "OutputManagementPlan", - "PeerReview", - "PhysicalObject", + "Output Management Plan", + "Peer Review", + "Physical Object", "Preprint", "Report", "Service", diff --git a/metadata_backend/helpers/schemas/folders.json b/metadata_backend/helpers/schemas/folders.json index 4e5172493..2b91ad8dc 100644 --- a/metadata_backend/helpers/schemas/folders.json +++ b/metadata_backend/helpers/schemas/folders.json @@ -785,23 +785,23 @@ "enum": [ "Audiovisual", "Book", - "BookChapter", + "Book Chapter", "Collection", - "ComputationalNotebook", - "ConferencePaper", - "ConferenceProceeding", - "DataPaper", + "Computational Notebook", + "Conference Paper", + "Conference Proceeding", + "Data Paper", "Dataset", "Dissertation", "Event", "Image", - "InteractiveResource", + "Interactive Resource", "Journal", - "JournalArticle", + "Journal Article", "Model", - "OutputManagementPlan", - "PeerReview", - "PhysicalObject", + "Output Management Plan", + "Peer Review", + "Physical Object", "Preprint", "Report", "Service", @@ -1121,4 +1121,4 @@ } }, "additionalProperties": false -} +} \ No newline at end of file From 2a6a2aff601b146862ad5266aec57dfc561689c1 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 17 Feb 2022 21:54:01 +0200 Subject: [PATCH 281/336] multilevel patch objects /extraInfo/datasetIdentifiers/- needs dot notation for mongo to work e.g. extraInfo.datasetIdentifiers --- metadata_backend/helpers/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata_backend/helpers/parser.py b/metadata_backend/helpers/parser.py index c0fa09841..567e134a2 100644 --- a/metadata_backend/helpers/parser.py +++ b/metadata_backend/helpers/parser.py @@ -446,7 +446,7 @@ def jsonpatch_mongo(identifier: Dict, json_patch: List[Dict[str, Any]]) -> List: identifier, { "$addToSet": { - op["path"][1:-2]: { + op["path"][1:-2].replace("/", "."): { "$each": op["value"] if isinstance(op["value"], list) else [op["value"]] }, }, From c9a58b628389fa94378604f9464a3c41167ffc36 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 17 Feb 2022 21:55:09 +0200 Subject: [PATCH 282/336] refactor doi in objects to make it easier to test --- metadata_backend/api/handlers/object.py | 19 ++++++------------- tests/test_handlers.py | 6 +++--- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 881aefed7..4e71e9a38 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -19,11 +19,6 @@ class ObjectAPIHandler(RESTAPIHandler): """API Handler for Objects.""" - def __init__(self) -> None: - """Init Object handler.""" - super().__init__() - self.doi = DOIHandler() - async def _draft_doi(self, schema_type: str) -> Dict: """Create draft DOI for study and dataset. @@ -33,16 +28,18 @@ async def _draft_doi(self, schema_type: str) -> Dict: :param schema_type: schema can be either study or dataset :returns: Dict with DOI of the study or dataset as well as the types. """ - _doi_data = await self.doi.create_draft(prefix=schema_type) + doi_ops = DOIHandler() + _doi_data = await doi_ops.create_draft(prefix=schema_type) LOG.debug(f"doi created with doi: {_doi_data['fullDOI']}") - data: Dict = {} - if schema_type == "study": - data["identifier"] = { + data = { + "identifier": { "identifierType": "DOI", "doi": _doi_data["fullDOI"], } + } + if schema_type == "study": data["types"] = { "bibtex": "misc", "citeproc": "collection", @@ -50,10 +47,6 @@ async def _draft_doi(self, schema_type: str) -> Dict: "resourceTypeGeneral": "Collection", } elif schema_type == "dataset": - data["identifier"] = { - "identifierType": "DOI", - "doi": _doi_data["fullDOI"], - } data["types"] = { "ris": "DATA", "bibtex": "misc", diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 4fbf112fe..f2e3b2868 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -374,7 +374,7 @@ async def setUpAsync(self): await super().setUpAsync() - self._mock_draf_doi = "metadata_backend.api.handlers.object.ObjectAPIHandler._draft_doi" + self._mock_draft_doi = "metadata_backend.api.handlers.object.ObjectAPIHandler._draft_doi" class_xmloperator = "metadata_backend.api.handlers.object.XMLOperator" self.patch_xmloperator = patch(class_xmloperator, **self.xmloperator_config, spec=True) @@ -410,7 +410,7 @@ async def test_submit_object_works(self): """Test that submission is handled, XMLOperator is called.""" files = [("study", "SRP000539.xml")] data = self.create_submission_data(files) - with patch(self._mock_draf_doi, return_value=self._draf_doi_data): + with patch(self._mock_draft_doi, return_value=self._draf_doi_data): response = await self.client.post("/objects/study", params={"folder": "some id"}, data=data) self.assertEqual(response.status, 201) self.assertIn(self.test_ega_string, await response.text()) @@ -427,7 +427,7 @@ async def test_submit_object_works_with_json(self): "studyAbstract": "abstract description for testing", }, } - with patch(self._mock_draf_doi, return_value=self._draf_doi_data): + with patch(self._mock_draft_doi, return_value=self._draf_doi_data): response = await self.client.post("/objects/study", params={"folder": "some id"}, json=json_req) self.assertEqual(response.status, 201) self.assertIn(self.test_ega_string, await response.text()) From 3e33fe79820af2c24d40fe18debee8dd25589a66 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 17 Feb 2022 21:56:43 +0200 Subject: [PATCH 283/336] mock doi API PUT request for publishing DOI --- tests/integration/mock_doi_api.py | 205 ++++++++++++++++++------------ 1 file changed, 123 insertions(+), 82 deletions(-) diff --git a/tests/integration/mock_doi_api.py b/tests/integration/mock_doi_api.py index a3e2f04f6..f0befb386 100644 --- a/tests/integration/mock_doi_api.py +++ b/tests/integration/mock_doi_api.py @@ -2,8 +2,9 @@ import json import logging -from datetime import datetime +from datetime import date, datetime from os import getenv +import collections.abc from aiohttp import web @@ -13,8 +14,89 @@ LOG = logging.getLogger("server") LOG.setLevel(getenv("LOG_LEVEL", "INFO")) +BASE_RESPONSE = { + "data": { + "id": "10.xxxx/yyyy", + "type": "dois", + "attributes": { + "doi": "10.xxxx/yyyy", + "prefix": "10.xxxx", + "suffix": "yyyy", + "identifiers": [{"identifier": "https://mock_doi.org/10.xxxx/yyyy", "identifierType": "DOI"}], + "creators": [], + "titles": [], + "publisher": None, + "container": {}, + "publicationYear": date.today().year, + "subjects": [], + "contributors": [], + "dates": [], + "language": None, + "types": {}, + "relatedIdentifiers": [], + "sizes": [], + "formats": [], + "version": None, + "rightsList": [], + "descriptions": [], + "geoLocations": [], + "fundingReferences": [], + "xml": None, + "url": None, + "contentUrl": None, + "metadataVersion": 1, + "schemaVersion": "https://schema.datacite.org/meta/kernel-4", + "source": None, + "isActive": None, + "state": "draft", + "reason": None, + "created": "", + "registered": None, + "updated": "", + }, + "relationships": { + "client": {"data": {"id": "datacite.datacite", "type": "clients"}}, + "media": {"data": []}, + }, + }, + "included": [ + { + "id": "mockcite.mockcite", + "type": "clients", + "attributes": { + "name": "MockCite", + "symbol": "MOCKCITE.MOCKCITE", + "year": date.today().year, + "contactName": "MockCite", + "contactEmail": "support@mock_cite.org", + "description": None, + "domains": "*", + "url": None, + "created": "2010-01-01 12:00:00.000", + "updated": str(datetime.utcnow()), + "isActive": True, + "hasPassword": True, + }, + "relationships": { + "provider": {"data": {"id": "mockcite", "type": "providers"}}, + "prefixes": {"data": [{"id": "10.xxxx", "type": "prefixes"}]}, + }, + } + ], +} -async def dois(req: web.Request) -> web.Response: + +def update_dict(d, u): + """Update values in a dictionary with values from another dictionary.""" + for k, v in u.items(): + if isinstance(v, collections.abc.Mapping): + d[k] = update_dict(d.get(k, {}), v) + else: + d[k] = v + return d + + +async def create(req: web.Request) -> web.Response: """DOI endpoint.""" try: content = await req.json() @@ -23,97 +105,56 @@ async def dois(req: web.Request) -> web.Response: LOG.info(reason) raise web.HTTPBadRequest(reason=reason) + data = BASE_RESPONSE try: - attributes = content["data"]["attributes"] - except KeyError: - reason = "Provided payload did not include required attributes." + _doi = content["data"]["attributes"]["doi"] + data["data"]["id"] = content["data"]["attributes"]["doi"] + data["data"]["attributes"]["doi"] = _doi + data["data"]["attributes"]["prefix"] = _doi.split("/")[0] + data["data"]["attributes"]["suffix"] = _doi.split("/")[1] + data["data"]["attributes"]["identifiers"] = [ + {"identifier": f"https://mock_doi.org/{content['data']['attributes']['doi']}", "identifierType": "DOI"} + ] + except Exception as e: + reason = f"Provided payload did not include required attributes: {e}" LOG.info(reason) raise web.HTTPBadRequest(reason=reason) - data = { - "data": { - "id": "10.xxxx/yyyy", - "type": "dois", - "attributes": { - "doi": "10.xxxx/yyyy", - "prefix": "10.xxxx", - "suffix": "yyyy", - "identifiers": [{"identifier": "https://mock_doi.org/10.xxxx/yyyy", "identifierType": "DOI"}], - "creators": [], - "titles": [], - "publisher": None, - "container": {}, - "publicationYear": None, - "subjects": [], - "contributors": [], - "dates": [], - "language": None, - "types": {}, - "relatedIdentifiers": [], - "sizes": [], - "formats": [], - "version": None, - "rightsList": [], - "descriptions": [], - "geoLocations": [], - "fundingReferences": [], - "xml": None, - "url": None, - "contentUrl": None, - "metadataVersion": 1, - "schemaVersion": "http://datacite.org/schema/kernel-4", - "source": None, - "isActive": None, - "state": "draft", - "reason": None, - "created": str(datetime.utcnow()), - "registered": None, - "updated": str(datetime.utcnow()), - }, - "relationships": { - "client": {"data": {"id": "datacite.datacite", "type": "clients"}}, - "media": {"data": []}, - }, - }, - "included": [ - { - "id": "mockcite.mockcite", - "type": "clients", - "attributes": { - "name": "MockCite", - "symbol": "MOCKCITE.MOCKCITE", - "year": 2021, - "contactName": "MockCite", - "contactEmail": "support@mock_cite.org", - "description": None, - "domains": "*", - "url": None, - "created": "2010-01-01 12:00:00.000", - "updated": str(datetime.utcnow()), - "isActive": True, - "hasPassword": True, - }, - "relationships": { - "provider": {"data": {"id": "mockcite", "type": "providers"}}, - "prefixes": {"data": [{"id": "10.xxxx", "type": "prefixes"}]}, - }, - } - ], - } - - if "doi" in attributes or "prefix" in attributes: - LOG.info(data) - return web.json_response(data) - else: - reason = "Provided payload include faulty attributes." + data["data"]["attributes"]["created"] = str(datetime.utcnow()) + data["data"]["attributes"]["updated"] = str(datetime.utcnow()) + data["included"][0]["attributes"]["created"] = str(datetime.utcnow()) + data["included"][0]["attributes"]["updated"] = str(datetime.utcnow()) + + return web.json_response(data, status=201) + + +async def update(req: web.Request) -> web.Response: + """DOI endpoint.""" + try: + content = await req.json() + except json.decoder.JSONDecodeError as e: + reason = f"JSON is not correctly formatted. See: {e}" LOG.info(reason) raise web.HTTPBadRequest(reason=reason) + data = BASE_RESPONSE + data["data"]["attributes"]["updated"] = str(datetime.utcnow()) + data["included"][0]["attributes"]["updated"] = str(datetime.utcnow()) + try: + data = update_dict(data, content) + except Exception as e: + reason = f"Provided payload did not include required attributes: {e}" + LOG.info(reason) + raise web.HTTPBadRequest(reason=reason) + + return web.json_response(data, status=200) + def init() -> web.Application: """Start server.""" app = web.Application() - app.router.add_post("/dois", dois) + app.router.add_post("/dois", create) + app.router.add_put("/dois/{id:.*}", update) return app From 30fd3d52dc8e3aa60abac9afdfe251d8b735e714 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 17 Feb 2022 21:59:01 +0200 Subject: [PATCH 284/336] Publish Study and Dataset when publishing folders Prepare Study and Datasets DOI payload and relationships between them. For now url is omitted till Metax integration is done --- metadata_backend/api/handlers/folder.py | 132 +++++++++++++++++++++++- metadata_backend/helpers/doi.py | 31 +++++- tests/test_handlers.py | 31 +++++- 3 files changed, 182 insertions(+), 12 deletions(-) diff --git a/metadata_backend/api/handlers/folder.py b/metadata_backend/api/handlers/folder.py index 313916ca8..434d314ad 100644 --- a/metadata_backend/api/handlers/folder.py +++ b/metadata_backend/api/handlers/folder.py @@ -3,7 +3,7 @@ from datetime import date, datetime from distutils.util import strtobool from math import ceil -from typing import Any, Dict, Union +from typing import Any, Dict, List, Tuple, Union import ujson from aiohttp import web @@ -11,17 +11,134 @@ from multidict import CIMultiDict from ...conf.conf import publisher +from ...helpers.doi import DOIHandler from ...helpers.logger import LOG -from ...helpers.validator import JSONValidator from ...helpers.metax_api_handler import MetaxServiceHandler +from ...helpers.validator import JSONValidator from ..middlewares import get_session -from ..operators import FolderOperator, Operator, UserOperator, ProjectOperator +from ..operators import FolderOperator, Operator, ProjectOperator, UserOperator from .restapi import RESTAPIHandler class FolderAPIHandler(RESTAPIHandler): """API Handler for folders.""" + def _prepare_doi_update(self, folder: Dict) -> Tuple[Dict, List]: + """Prepare dictionary with values for the Datacite DOI update. + + We need to prepare data for Study and Datasets, publish doi for each, + and create links (relatedIdentifiers) between Study and Datasets. + All the required information should be in the folder ``doiInfo``, + as well as ``extraInfo`` which contains the draft DOIs created for the Study + and each Dataset. + + :param folder: Folder data + :returns: Tuple with the Study and list of Datasets. + """ + + _general_info = { + "attributes": { + "publisher": publisher, + "publicationYear": date.today().year, + "event": "publish", + "schemaVersion": "https://schema.datacite.org/meta/kernel-4", + }, + } + + study = {} + datasets = [] + + # we need to re-format these for Datacite, as in the JSON schemas + # we split the words so that front-end will display them nicely + _info = folder["doiInfo"] + if "relatedIdentifiers" in _info: + for d in _info["relatedIdentifiers"]: + d.update((k, "".join(v.split())) for k, v in d.items() if k in {"resourceTypeGeneral", "relationType"}) + + if "contributors" in _info: + for d in _info["contributors"]: + d.update((k, "".join(v.split())) for k, v in d.items() if k == "contributorType") + + if "descriptions" in _info: + for d in _info["descriptions"]: + d.update((k, "".join(v.split())) for k, v in d.items() if k == "descriptionType") + + if "fundingReferences" in _info: + for d in _info["fundingReferences"]: + d.update((k, "".join(v.split())) for k, v in d.items() if k == "funderIdentifierType") + + try: + # keywords are only required for Metax integration + # thus we remove them + _info.pop("keywords", None) + _general_info["attributes"].update(_info) + + _study_doi = folder["extraInfo"]["studyIdentifier"]["identifier"]["doi"] + study = { + "attributes": { + "doi": _study_doi, + "prefix": _study_doi.split("/")[0], + "suffix": _study_doi.split("/")[1], + "types": folder["extraInfo"]["studyIdentifier"]["types"], + # "url": folder["extraInfo"]["studyIdentifier"]["url"], + "identifiers": [folder["extraInfo"]["studyIdentifier"]["identifier"]], + }, + "id": _study_doi, + "type": "dois", + } + + study.update(_general_info) + + for ds in folder["extraInfo"]["datasetIdentifiers"]: + _doi = ds["identifier"]["doi"] + _tmp = { + "attributes": { + "doi": _doi, + "prefix": _doi.split("/")[0], + "suffix": _doi.split("/")[1], + "types": ds["types"], + # "url": ds["url"], + "identifiers": [ds["identifier"]], + }, + "id": _doi, + "type": "dois", + } + _tmp.update(_general_info) + + # A Dataset is described by a Study + if "relatedIdentifiers" not in _tmp["attributes"]: + _tmp["attributes"]["relatedIdentifiers"] = [] + + _tmp["attributes"]["relatedIdentifiers"].append( + { + "relationType": "IsDescribedBy", + "relatedIdentifier": _study_doi, + "resourceTypeGeneral": "Collection", + "relatedIdentifierType": "DOI", + } + ) + + datasets.append(_tmp) + + # A Study describes a Dataset + if "relatedIdentifiers" not in study["attributes"]: + study["attributes"]["relatedIdentifiers"] = [] + + study["attributes"]["relatedIdentifiers"].append( + { + "relationType": "Describes", + "relatedIdentifier": _doi, + "resourceTypeGeneral": "Dataset", + "relatedIdentifierType": "DOI", + } + ) + except Exception as e: + reason = f"Could not construct DOI data, reason: {e}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + return (study, datasets) + def _check_patch_folder(self, patch_ops: Any) -> None: """Check patch operations in request are valid. @@ -271,6 +388,15 @@ async def publish_folder(self, req: Request) -> Response: folder = await operator.read_folder(folder_id) + # we first try to publish the DOI before actually publishing the folder + study, datasets = self._prepare_doi_update(folder) + + doi_ops = DOIHandler() + + await doi_ops.set_state(study) + for ds in datasets: + await doi_ops.set_state(ds) + obj_ops = Operator(db_client) # Create draft DOI and delete draft objects from the folder diff --git a/metadata_backend/helpers/doi.py b/metadata_backend/helpers/doi.py index c3de8ac51..5b3da3f28 100644 --- a/metadata_backend/helpers/doi.py +++ b/metadata_backend/helpers/doi.py @@ -6,7 +6,7 @@ from typing import Dict, Union from uuid import uuid4 -from aiohttp import web, ClientSession, BasicAuth +from aiohttp import web, ClientSession, BasicAuth, ClientTimeout from ..helpers.logger import LOG from ..conf import conf @@ -22,18 +22,20 @@ def __init__(self) -> None: self.doi_user = conf.doi_user self.doi_key = conf.doi_key self.doi_url = f"{conf.datacite_url.rstrip('/')}/{self.doi_prefix}" + self.timeout = ClientTimeout(total=2 * 60) # 2 minutes timeout + self.headers = {"Content-Type": "application/vnd.api+json"} async def create_draft(self, prefix: Union[str, None] = None) -> Dict: """Generate random suffix and POST request a draft DOI to DataCite DOI API.""" suffix = uuid4().hex[:10] doi_suffix = f"{prefix}.{suffix[:4]}-{suffix[4:]}" if prefix else f"{suffix[:4]}-{suffix[4:]}" - headers = {"Content-Type": "application/json"} + # this payload is sufficient to get a draft DOI doi_payload = {"data": {"type": "dois", "attributes": {"doi": f"{self.doi_prefix}/{doi_suffix}"}}} auth = BasicAuth(login=self.doi_user, password=self.doi_key) - async with ClientSession(headers=headers, auth=auth) as session: + async with ClientSession(headers=self.headers, auth=auth) as session: async with session.post(self.doi_api, json=doi_payload) as response: - if response.status == 201 or response.status == 200: # This should only ever be 201 + if response.status == 201: draft_resp = await response.json() full_doi = draft_resp["data"]["attributes"]["doi"] returned_suffix = draft_resp["data"]["attributes"]["suffix"] @@ -48,3 +50,24 @@ async def create_draft(self, prefix: Union[str, None] = None) -> Dict: raise web.HTTPBadRequest(reason=reason) # 400 might not be the correct error for this return doi_data + + async def set_state(self, doi_payload: dict) -> None: + """Set DOI and associated metadata. + + We will only support publish event type, and we expect the data to be + prepared for the update. + Partial updates are possible. + + :param doi_suffix: DOI to do operations on. + :param state: can be publish, register or hide. + """ + auth = BasicAuth(login=self.doi_user, password=self.doi_key) + async with ClientSession(headers=self.headers, auth=auth) as session: + async with session.put(f"{self.doi_api}/{doi_payload['id']}", json=doi_payload) as response: + if response.status == 200: + draft_resp = await response.json() + LOG.debug(f"Datacite doi response: {draft_resp}") + else: + reason = f"DOI API set state request failed with code: {response.status}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) # 400 might not be the correct error for this diff --git a/tests/test_handlers.py b/tests/test_handlers.py index f2e3b2868..745fd4224 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -114,6 +114,11 @@ async def setUpAsync(self): "filter_user.side_effect": self.fake_useroperator_filter_user, } + self.doi_handler = { + "create_draft.side_effect": self.fake_doi_create_draft, + "set_state.side_effect": self.fake_doi_set_state, + } + RESTAPIHandler._handle_check_ownedby_user = make_mocked_coro(True) ObjectAPIHandler.create_or_update_metax_dataset = make_mocked_coro("111-222-333") ObjectAPIHandler._delete_metax_dataset = make_mocked_coro() @@ -154,6 +159,13 @@ def get_file_data(self, schema, filename): _reader = csv_file.read() return _reader + async def fake_doi_create_draft(self, prefix): + """.""" + return {"fullDOI": "10.xxxx/yyyyy", "dataset": "https://doi.org/10.xxxx/yyyyy"} + + async def fake_doi_set_state(self, data): + """.""" + async def fake_operator_read_metadata_object(self, schema_type, accession_id): """Fake read operation to return mocked JSON.""" return (self.metadata_json, "application/json") @@ -741,6 +753,12 @@ async def setUpAsync(self): await super().setUpAsync() + class_doihandler = "metadata_backend.api.handlers.folder.DOIHandler" + self.patch_doihandler = patch(class_doihandler, **self.doi_handler, spec=True) + self.MockedDoiHandler = self.patch_doihandler.start() + + self._mock_prepare_doi = "metadata_backend.api.handlers.folder.FolderAPIHandler._prepare_doi_update" + class_folderoperator = "metadata_backend.api.handlers.folder.FolderOperator" self.patch_folderoperator = patch(class_folderoperator, **self.folderoperator_config, spec=True) self.MockedFolderOperator = self.patch_folderoperator.start() @@ -760,6 +778,7 @@ async def setUpAsync(self): async def tearDownAsync(self): """Cleanup mocked stuff.""" await super().tearDownAsync() + self.patch_doihandler.stop() self.patch_folderoperator.stop() self.patch_useroperator.stop() self.patch_operator.stop() @@ -881,13 +900,15 @@ async def test_update_folder_passes(self): async def test_folder_is_published(self): """Test that folder would be published and DOI would be added.""" + self.MockedDoiHandler().set_state.return_value = None self.MockedFolderOperator().update_folder.return_value = self.folder_id self.MockedMetaxHandler().publish_dataset.return_value = None - response = await self.client.patch("/publish/FOL12345678") - self.MockedFolderOperator().update_folder.assert_called_once() - self.assertEqual(response.status, 200) - json_resp = await response.json() - self.assertEqual(json_resp["folderId"], self.folder_id) + with patch(self._mock_prepare_doi, return_value=({}, [{}])): + response = await self.client.patch("/publish/FOL12345678") + # self.MockedFolderOperator().update_folder.assert_called_once() + self.assertEqual(response.status, 200) + json_resp = await response.json() + self.assertEqual(json_resp["folderId"], self.folder_id) async def test_folder_deletion_is_called(self): """Test that folder would be deleted.""" From 09993937ae1ee29376a5b6345629e878b5ddd8bf Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 17 Feb 2022 21:59:23 +0200 Subject: [PATCH 285/336] integration tests for publishing folder adjusted --- tests/integration/run_tests.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index e72eec1f1..bcf7edf24 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -1066,6 +1066,15 @@ async def test_crud_folders_works(sess): ], "folder metadataObjects content mismatch" # Publish the folder + # add a study and dataset for publishing a folder + doi_data_raw = await create_request_json_data("doi", "test_doi.json") + doi_data = json.loads(doi_data_raw) + patch_add_doi = [{"op": "add", "path": "/doiInfo", "value": doi_data}] + folder_id = await patch_folder(sess, folder_id, patch_add_doi) + + await post_object_json(sess, "study", folder_id, "SRP000539.json") + await post_object(sess, "dataset", folder_id, "dataset.xml") + folder_id = await publish_folder(sess, folder_id) await get_draft(sess, "sample", draft_id, 404) # checking the draft was deleted after publication @@ -1078,13 +1087,7 @@ async def test_crud_folders_works(sess): assert "datePublished" in res.keys() assert "extraInfo" in res.keys() assert res["drafts"] == [], "there are drafts in folder, expected empty" - assert res["metadataObjects"] == [ - { - "accessionId": accession_id, - "schema": "sample", - "tags": {"submissionType": "Form", "displayTitle": "HapMap sample from Homo sapiens"}, - } - ], "folder metadataObjects content mismatch" + assert len(res["metadataObjects"]) == 3, "folder metadataObjects content mismatch" # Delete folder await delete_folder_publish(sess, folder_id) @@ -1437,6 +1440,16 @@ async def test_crud_users_works(sess, project_id): "projectId": project_id, } publish_folder_id = await post_folder(sess, folder_published) + + # add a study and dataset for publishing a folder + doi_data_raw = await create_request_json_data("doi", "test_doi.json") + doi_data = json.loads(doi_data_raw) + patch_add_doi = [{"op": "add", "path": "/doiInfo", "value": doi_data}] + await patch_folder(sess, publish_folder_id, patch_add_doi) + + await post_object_json(sess, "study", publish_folder_id, "SRP000539.json") + await post_object(sess, "dataset", publish_folder_id, "dataset.xml") + await publish_folder(sess, publish_folder_id) async with sess.get(f"{folders_url}/{publish_folder_id}?projectId={project_id}") as resp: LOG.debug(f"Checking that folder {publish_folder_id} was published") From c1fe22599f832657eb674085266ad069ed250615 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 17 Feb 2022 22:07:21 +0200 Subject: [PATCH 286/336] adjust message logs for integration tests --- tests/integration/run_tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index bcf7edf24..19cba45e0 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -440,9 +440,9 @@ async def post_folder(sess, data): :param data: data used to update the folder """ async with sess.post(f"{folders_url}", data=json.dumps(data)) as resp: - LOG.debug("Adding new folder") ans = await resp.json() assert resp.status == 201, f"HTTP Status code error {resp.status} {ans}" + LOG.debug(f"Adding new folder {ans['folderId']}") return ans["folderId"] @@ -504,12 +504,12 @@ async def create_folder(data, user): :param user: User id to which data is assigned :returns: Folder id for the folder inserted to database """ - LOG.info("Creating new folder") url = f"mongodb://{AUTHDB}:{AUTHDB}@{HOST}/{DATABASE}?authSource=admin" db_client = AsyncIOMotorClient(url, connectTimeoutMS=1000, serverSelectionTimeoutMS=1000) database = db_client[DATABASE] folder_id = uuid4().hex + LOG.info(f"Creating new folder {folder_id}") data["folderId"] = folder_id data["text_name"] = " ".join(re.split("[\\W_]", data["name"])) data["drafts"] = [] From e8e46d280b219722195535669938b8c64c98160c Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 18 Feb 2022 19:20:33 +0200 Subject: [PATCH 287/336] Attempt to curb race condition with study POST We need to use find_one_and_update otherwise the bulkwrite will not be able to accurately catch it. It is not by far the best solution, and can be improved --- metadata_backend/api/handlers/folder.py | 14 ++++++++------ metadata_backend/api/handlers/object.py | 2 ++ metadata_backend/api/operators.py | 12 +++++++++--- metadata_backend/database/db_service.py | 24 ++++++++++++++++++++++++ tests/integration/run_tests.py | 18 +++++++++--------- 5 files changed, 52 insertions(+), 18 deletions(-) diff --git a/metadata_backend/api/handlers/folder.py b/metadata_backend/api/handlers/folder.py index 434d314ad..60ea26daf 100644 --- a/metadata_backend/api/handlers/folder.py +++ b/metadata_backend/api/handlers/folder.py @@ -66,22 +66,23 @@ def _prepare_doi_update(self, folder: Dict) -> Tuple[Dict, List]: if "fundingReferences" in _info: for d in _info["fundingReferences"]: d.update((k, "".join(v.split())) for k, v in d.items() if k == "funderIdentifierType") - + # need to add titles and descriptions for datasets and study try: # keywords are only required for Metax integration # thus we remove them _info.pop("keywords", None) _general_info["attributes"].update(_info) - _study_doi = folder["extraInfo"]["studyIdentifier"]["identifier"]["doi"] + _study = folder["extraInfo"]["studyIdentifier"] + _study_doi = _study["identifier"]["doi"] study = { "attributes": { "doi": _study_doi, "prefix": _study_doi.split("/")[0], "suffix": _study_doi.split("/")[1], - "types": folder["extraInfo"]["studyIdentifier"]["types"], - # "url": folder["extraInfo"]["studyIdentifier"]["url"], - "identifiers": [folder["extraInfo"]["studyIdentifier"]["identifier"]], + "types": _study["types"], + # "url": _study["url"], + "identifiers": [_study["identifier"]], }, "id": _study_doi, "type": "dois", @@ -89,7 +90,8 @@ def _prepare_doi_update(self, folder: Dict) -> Tuple[Dict, List]: study.update(_general_info) - for ds in folder["extraInfo"]["datasetIdentifiers"]: + _datasets = folder["extraInfo"]["datasetIdentifiers"] + for ds in _datasets: _doi = ds["identifier"]["doi"] _tmp = { "attributes": { diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 4e71e9a38..d3b19b537 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -159,6 +159,8 @@ async def post_object(self, req: Request) -> Response: # we need to check if there is already a study in a folder # we only allow one study per folder + # this is not enough to catch duplicate entries if updates happen in parallel + # that is why we check in db_service.update_study if not req.path.startswith("/drafts") and schema_type == "study": _ids = await folder_op.get_collection_objects(folder_id, collection) if len(_ids) == 1: diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 11ef86204..a9b5ae6b6 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -839,7 +839,7 @@ async def read_folder(self, folder_id: str) -> Dict: raise web.HTTPBadRequest(reason=reason) return folder - async def update_folder(self, folder_id: str, patch: List) -> str: + async def update_folder(self, folder_id: str, patch: List, schema: str = "") -> str: """Update object folder from database. Utilizes JSON Patch operations specified at: http://jsonpatch.com/ @@ -850,14 +850,20 @@ async def update_folder(self, folder_id: str, patch: List) -> str: :returns: ID of the folder updated to database """ try: - update_success = await self.db_service.patch("folder", folder_id, patch) + if schema == "study": + update_success = await self.db_service.update_study("folder", folder_id, patch) + else: + update_success = await self.db_service.patch("folder", folder_id, patch) except (ConnectionFailure, OperationFailure) as error: reason = f"Error happened while updating folder: {error}" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) if not update_success: - reason = "Updating folder to database failed for some reason." + if schema == "study": + reason = "Either there was a request to add another study to a folders or annother error occurred." + else: + reason = "Updating folder to database failed for some reason." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) else: diff --git a/metadata_backend/database/db_service.py b/metadata_backend/database/db_service.py index a961a5a0a..c9c952814 100644 --- a/metadata_backend/database/db_service.py +++ b/metadata_backend/database/db_service.py @@ -162,6 +162,30 @@ async def patch(self, collection: str, accession_id: str, patch_data: List[Dict] LOG.error(bwe.details) return False + @auto_reconnect + async def update_study(self, collection: str, accession_id: str, patch_data: Any) -> bool: + """Update and avoid duplicates for study object. + + Currently we don't allow duplicate studies in the same folder, + thus we need to check before inserting. Regular Bulkwrite cannot prevent race condition. + + :param collection: Collection where document should be searched from + :param accession_id: ID of the object/folder/user to be updated + :param patch_data: JSON representing the data that should be + updated to object it will update fields. + :returns: True if operation was successful + """ + find_by_id = {f"{collection}Id": accession_id, "metadataObjects.schema": {"$ne": "study"}} + requests = jsonpatch_mongo(find_by_id, patch_data) + for req in requests: + result = await self.database[collection].find_one_and_update( + find_by_id, req._doc, projection={"_id": False}, return_document=ReturnDocument.AFTER + ) + LOG.debug(f"DB doc in {collection} with data: {patch_data} modified for {accession_id}.") + if not result: + return False + return True + @auto_reconnect async def update(self, collection: str, accession_id: str, data_to_be_updated: Dict) -> bool: """Update some elements of object by its accessionId. diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index 19cba45e0..9fbb3d703 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -837,36 +837,36 @@ async def test_getting_all_objects_from_schema_works(sess, folder_id): :param folder_id: id of the folder used to group submission objects """ # Add objects - files = await asyncio.gather(*[post_object(sess, "study", folder_id, "SRP000539.xml") for _ in range(13)]) + files = await asyncio.gather(*[post_object(sess, "sample", folder_id, "SRS001433.xml") for _ in range(13)]) # Test default values - async with sess.get(f"{objects_url}/study") as resp: + async with sess.get(f"{objects_url}/sample") as resp: assert resp.status == 200 ans = await resp.json() assert ans["page"]["page"] == 1 assert ans["page"]["size"] == 10 assert ans["page"]["totalPages"] == 2 - assert ans["page"]["totalObjects"] == 16 + assert ans["page"]["totalObjects"] == 18, ans["page"]["totalObjects"] assert len(ans["objects"]) == 10 # Test with custom pagination values - async with sess.get(f"{objects_url}/study?page=2&per_page=3") as resp: + async with sess.get(f"{objects_url}/sample?page=2&per_page=3") as resp: assert resp.status == 200 ans = await resp.json() assert ans["page"]["page"] == 2 assert ans["page"]["size"] == 3 - assert ans["page"]["totalPages"] == 6 - assert ans["page"]["totalObjects"] == 16 + assert ans["page"]["totalPages"] == 6, ans["page"]["totalPages"] + assert ans["page"]["totalObjects"] == 18, ans["page"]["totalObjects"] assert len(ans["objects"]) == 3 # Test with wrong pagination values - async with sess.get(f"{objects_url}/study?page=-1") as resp: + async with sess.get(f"{objects_url}/sample?page=-1") as resp: assert resp.status == 400 - async with sess.get(f"{objects_url}/study?per_page=0") as resp: + async with sess.get(f"{objects_url}/sample?per_page=0") as resp: assert resp.status == 400 # Delete objects - await asyncio.gather(*[delete_object(sess, "study", accession_id) for accession_id, _ in files]) + await asyncio.gather(*[delete_object(sess, "sample", accession_id) for accession_id, _ in files]) async def test_metax_crud(sess, folder_id): From 3197ea41d7b5d6599b69467df524b179f89dfcee Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 15 Feb 2022 15:55:22 +0200 Subject: [PATCH 288/336] not all functions need to be async --- metadata_backend/api/handlers/object.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index d3b19b537..4b6d880ff 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -225,6 +225,9 @@ async def post_object(self, req: Request) -> Response: if collection in {"study", "dataset"}: [await self._create_metax_dataset(req, collection, item) for item in objects] + patch = self._prepare_folder_patch_new_object(collection, ids, patch_params) + await folder_op.update_folder(folder_id, patch) + body = ujson.dumps(data, escape_forward_slashes=False) return web.Response( From efa190165ce709c5e74db98de7036d211aa26337 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 16 Feb 2022 16:35:37 +0200 Subject: [PATCH 289/336] create draft dois for study and datasets restrict one study per folder. deprecate creating draft when publishing folder --- metadata_backend/api/handlers/object.py | 6 ++++++ tests/test_handlers.py | 1 - 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 4b6d880ff..0e7577698 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -228,6 +228,12 @@ async def post_object(self, req: Request) -> Response: patch = self._prepare_folder_patch_new_object(collection, ids, patch_params) await folder_op.update_folder(folder_id, patch) + # we don't create DOIs for drafts and we restrict doi creation to + # study and datasets + if not req.path.startswith("/drafts") and schema_type in _allowed_doi: + doi_patch = await self._prepare_folder_patch_doi(schema_type, ids) + await folder_op.update_folder(folder_id, doi_patch) + body = ujson.dumps(data, escape_forward_slashes=False) return web.Response( diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 745fd4224..95d2608ba 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -778,7 +778,6 @@ async def setUpAsync(self): async def tearDownAsync(self): """Cleanup mocked stuff.""" await super().tearDownAsync() - self.patch_doihandler.stop() self.patch_folderoperator.stop() self.patch_useroperator.stop() self.patch_operator.stop() From 3935764aa78605275bddbaebb94514e40616a168 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 17 Feb 2022 21:59:01 +0200 Subject: [PATCH 290/336] publish Study and Dataset when publishing folders prepare Study and Datasets DOI payload and relationships between them for now url is omitted till Metax integration is done --- tests/test_handlers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 95d2608ba..745fd4224 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -778,6 +778,7 @@ async def setUpAsync(self): async def tearDownAsync(self): """Cleanup mocked stuff.""" await super().tearDownAsync() + self.patch_doihandler.stop() self.patch_folderoperator.stop() self.patch_useroperator.stop() self.patch_operator.stop() From 895dba170a6d7bdcaee9a6d70d40aaf756922d77 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 18 Feb 2022 19:20:33 +0200 Subject: [PATCH 291/336] attempt to curb race condition with study POST we need to use find_one_and_update otherwise the bulkwrite will not be able to accurately catch it. It is not by far the best solution, and can be improved --- metadata_backend/api/handlers/object.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 0e7577698..afe284bb1 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -226,7 +226,7 @@ async def post_object(self, req: Request) -> Response: [await self._create_metax_dataset(req, collection, item) for item in objects] patch = self._prepare_folder_patch_new_object(collection, ids, patch_params) - await folder_op.update_folder(folder_id, patch) + await folder_op.update_folder(folder_id, patch, schema_type) # we don't create DOIs for drafts and we restrict doi creation to # study and datasets From 2efa393b9c88439b5745016d463ab9f1221cba09 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 15 Feb 2022 13:37:25 +0200 Subject: [PATCH 292/336] check values in set instead of list checking a set should be faster than checking a list --- metadata_backend/api/handlers/folder.py | 18 +++---- metadata_backend/api/handlers/object.py | 3 +- metadata_backend/api/handlers/restapi.py | 2 +- metadata_backend/api/handlers/user.py | 62 ++++++++++++++++++++++- metadata_backend/api/middlewares.py | 4 +- metadata_backend/api/operators.py | 4 +- metadata_backend/database/db_service.py | 15 +++--- metadata_backend/helpers/parser.py | 14 ++--- metadata_backend/helpers/schema_loader.py | 2 +- tests/integration/mongo_indexes.py | 2 +- 10 files changed, 92 insertions(+), 34 deletions(-) diff --git a/metadata_backend/api/handlers/folder.py b/metadata_backend/api/handlers/folder.py index 60ea26daf..6a69edaaf 100644 --- a/metadata_backend/api/handlers/folder.py +++ b/metadata_backend/api/handlers/folder.py @@ -152,29 +152,29 @@ def _check_patch_folder(self, patch_ops: Any) -> None: :raises: HTTPUnauthorized if request tries to do anything else than add or replace :returns: None """ - _required_paths = ["/name", "/description"] - _required_values = ["schema", "accessionId"] - _arrays = ["/metadataObjects/-", "/drafts/-", "/doiInfo"] + _required_paths = {"/name", "/description"} + _required_values = {"schema", "accessionId"} + _arrays = {"/metadataObjects/-", "/drafts/-", "/doiInfo"} _tags = re.compile("^/(metadataObjects|drafts)/[0-9]*/(tags)$") for op in patch_ops: if _tags.match(op["path"]): LOG.info(f"{op['op']} on tags in folder") - if "submissionType" in op["value"].keys() and op["value"]["submissionType"] not in [ + if "submissionType" in op["value"].keys() and op["value"]["submissionType"] not in { "XML", "CSV", "Form", - ]: + }: reason = "submissionType is restricted to either 'CSV', 'XML' or 'Form' values." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) pass else: - if all(i not in op["path"] for i in _required_paths + _arrays): + if all(i not in op["path"] for i in set.union(_required_paths, _arrays)): reason = f"Request contains '{op['path']}' key that cannot be updated to folders." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - if op["op"] in ["remove", "copy", "test", "move"]: + if op["op"] in {"remove", "copy", "test", "move"}: reason = f"{op['op']} on {op['path']} is not allowed." LOG.error(reason) raise web.HTTPUnauthorized(reason=reason) @@ -192,7 +192,7 @@ def _check_patch_folder(self, patch_ops: Any) -> None: if ( "tags" in item and "submissionType" in item["tags"] - and item["tags"]["submissionType"] not in ["XML", "Form"] + and item["tags"]["submissionType"] not in {"XML", "CSV", "Form"} ): reason = "submissionType is restricted to either 'XML' or 'Form' values." LOG.error(reason) @@ -223,7 +223,7 @@ async def get_folders(self, req: Request) -> Response: # Check if only published or draft folders are requestsed if "published" in req.query: pub_param = req.query.get("published", "").title() - if pub_param in ["True", "False"]: + if pub_param in {"True", "False"}: folder_query["published"] = {"$eq": bool(strtobool(pub_param))} else: reason = "'published' parameter must be either 'true' or 'false'" diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index afe284bb1..193e5564d 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -139,8 +139,9 @@ async def post_object(self, req: Request) -> Response: :param req: POST request :returns: JSON response containing accessionId for submitted object """ - _allowed_csv = ["sample"] + _allowed_csv = {"sample"} _allowed_doi = {"study", "dataset"} + schema_type = req.match_info["schema"] folder_id = req.query.get("folder", "") diff --git a/metadata_backend/api/handlers/restapi.py b/metadata_backend/api/handlers/restapi.py index 81d575265..96c578467 100644 --- a/metadata_backend/api/handlers/restapi.py +++ b/metadata_backend/api/handlers/restapi.py @@ -25,7 +25,7 @@ def _check_schema_exists(self, schema_type: str) -> None: :param schema_type: schema type. :raises: HTTPNotFound if schema does not exist. """ - if schema_type not in schema_types.keys(): + if schema_type not in set(schema_types.keys()): reason = f"Specified schema {schema_type} was not found." LOG.error(reason) raise web.HTTPNotFound(reason=reason) diff --git a/metadata_backend/api/handlers/user.py b/metadata_backend/api/handlers/user.py index 1523b48fe..747dadfc1 100644 --- a/metadata_backend/api/handlers/user.py +++ b/metadata_backend/api/handlers/user.py @@ -1,7 +1,8 @@ """Handle HTTP methods for server.""" +import re from math import ceil -from typing import Dict, Tuple +from typing import Any, Dict, Tuple import ujson from aiohttp import web @@ -18,6 +19,63 @@ class UserAPIHandler(RESTAPIHandler): """API Handler for users.""" + def _check_patch_user(self, patch_ops: Any) -> None: + """Check patch operations in request are valid. + + We check that ``folders`` have string values (one or a list) + and ``drafts`` have ``_required_values``. + For tags we check that the ``submissionType`` takes either ``XML`` or + ``Form`` as values. + :param patch_ops: JSON patch request + :raises: HTTPBadRequest if request does not fullfil one of requirements + :raises: HTTPUnauthorized if request tries to do anything else than add or replace + :returns: None + """ + _arrays = {"/templates/-", "/folders/-"} + _required_values = {"schema", "accessionId"} + _tags = re.compile("^/(templates)/[0-9]*/(tags)$") + for op in patch_ops: + if _tags.match(op["path"]): + LOG.info(f"{op['op']} on tags in folder") + if "submissionType" in op["value"].keys() and op["value"]["submissionType"] not in { + "XML", + "CSV", + "Form", + }: + reason = "submissionType is restricted to either 'XML' or 'Form' values." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + pass + else: + if all(i not in op["path"] for i in _arrays): + reason = f"Request contains '{op['path']}' key that cannot be updated to user object" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + if op["op"] in {"remove", "copy", "test", "move", "replace"}: + reason = f"{op['op']} on {op['path']} is not allowed." + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + if op["path"] == "/folders/-": + if not (isinstance(op["value"], str) or isinstance(op["value"], list)): + reason = "We only accept string folder IDs." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + if op["path"] == "/templates/-": + _ops = op["value"] if isinstance(op["value"], list) else [op["value"]] + for item in _ops: + if not all(key in item.keys() for key in _required_values): + reason = "accessionId and schema are required fields." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + if ( + "tags" in item + and "submissionType" in item["tags"] + and item["tags"]["submissionType"] not in {"XML", "CSV", "Form"} + ): + reason = "submissionType is restricted to either 'XML' or 'Form' values." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + async def get_user(self, req: Request) -> Response: """Get one user by its user ID. @@ -85,7 +143,7 @@ async def _get_user_items(self, req: Request, user: Dict, item_type: str) -> Tup :returns: Paginated list of user draft templates and link header """ # Check item_type parameter is not faulty - if item_type not in ["templates", "folders"]: + if item_type not in {"templates", "folders"}: reason = f"{item_type} is a faulty item parameter. Should be either folders or templates" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) diff --git a/metadata_backend/api/middlewares.py b/metadata_backend/api/middlewares.py index 69ce65773..b70f78336 100644 --- a/metadata_backend/api/middlewares.py +++ b/metadata_backend/api/middlewares.py @@ -18,7 +18,7 @@ def _check_error_page_requested(req: Request, error_code: int) -> web.Response: # type:ignore """Return the correct error page with correct status code.""" if "Accept" in req.headers and req.headers["Accept"]: - if req.headers["Accept"].split(",")[0] in ["text/html", "application/xhtml+xml"]: + if req.headers["Accept"].split(",")[0] in {"text/html", "application/xhtml+xml"}: raise web.HTTPSeeOther( f"/error{str(error_code)}", headers={ @@ -115,7 +115,7 @@ async def check_login(request: Request, handler: Callable) -> StreamResponse: if request.path.startswith(tuple(controlled_paths)) and "OIDC_URL" in os.environ and bool(os.getenv("OIDC_URL")): cookie = decrypt_cookie(request) session = request.app["Session"].setdefault(cookie["id"], {}) - if not all(x in ["access_token", "user_info", "oidc_state"] for x in session): + if not all(x in {"access_token", "user_info", "oidc_state"} for x in session): LOG.debug("checked session parameter") response = web.HTTPSeeOther(f"{aai_config['domain']}/aai") response.headers["Location"] = "/aai" diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index a9b5ae6b6..a2a43ad0d 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -469,7 +469,7 @@ async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accessio :param data: Metadata object :returns: Accession Id for object inserted to database """ - forbidden_keys = ["accessionId", "publishDate", "dateCreated"] + forbidden_keys = {"accessionId", "publishDate", "dateCreated"} # when replacing from xml file there are no (not supposed to be) metax data in content data # therefore we need to check if the object already exists in database and has metax id if schema_type in {"study", "dataset"}: @@ -499,7 +499,7 @@ async def _format_data_to_update_and_add_to_db(self, schema_type: str, accession :param data: Metadata object :returns: Accession Id for object inserted to database """ - forbidden_keys = ["accessionId", "publishDate", "dateCreated"] + forbidden_keys = {"accessionId", "publishDate", "dateCreated"} # check if object already has metax id or is it first time writing it if schema_type in {"study", "dataset"}: read_data = await self.db_service.read(schema_type, accession_id) diff --git a/metadata_backend/database/db_service.py b/metadata_backend/database/db_service.py index c9c952814..b50576c47 100644 --- a/metadata_backend/database/db_service.py +++ b/metadata_backend/database/db_service.py @@ -1,11 +1,10 @@ """Services that handle database connections. Implemented with MongoDB.""" from functools import wraps -from typing import Any, Callable, Dict, Union, List +from typing import Any, Callable, Dict, List, Union from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorCursor -from pymongo.errors import AutoReconnect, ConnectionFailure from pymongo import ReturnDocument -from pymongo.errors import BulkWriteError +from pymongo.errors import AutoReconnect, BulkWriteError, ConnectionFailure from ..conf.conf import serverTimeout from ..helpers.logger import LOG @@ -35,8 +34,8 @@ async def retry(*args: Any, **kwargs: Any) -> Any: message = f"Connection to database failed after {attempt} tries" raise ConnectionFailure(message=message) LOG.error( - "Connection not successful, trying to reconnect." - f"Reconnection attempt number {attempt}, waiting for {default_timeout} seconds." + "Connection not successful, trying to reconnect. " + + f"Reconnection attempt number {attempt}, waiting for {default_timeout} seconds." ) continue @@ -136,7 +135,7 @@ async def read(self, collection: str, accession_id: str) -> Dict: :param accession_id: ID of the object/folder/user to be searched :returns: First document matching the accession_id """ - id_key = f"{collection}Id" if (collection in ["folder", "user"]) else "accessionId" + id_key = f"{collection}Id" if (collection in {"folder", "user"}) else "accessionId" projection = {"_id": False, "eppn": False} if collection == "user" else {"_id": False} find_by_id = {id_key: accession_id} LOG.debug(f"DB doc in {collection} read for {accession_id}.") @@ -196,7 +195,7 @@ async def update(self, collection: str, accession_id: str, data_to_be_updated: D updated to object, can replace previous fields and add new ones. :returns: True if operation was successful """ - id_key = f"{collection}Id" if (collection in ["folder", "user"]) else "accessionId" + id_key = f"{collection}Id" if (collection in {"folder", "user"}) else "accessionId" find_by_id = {id_key: accession_id} update_op = {"$set": data_to_be_updated} result = await self.database[collection].update_one(find_by_id, update_op) @@ -277,7 +276,7 @@ async def delete(self, collection: str, accession_id: str) -> bool: :param accession_id: ID for object/folder/user to be deleted :returns: True if operation was successful """ - id_key = f"{collection}Id" if (collection in ["folder", "user"]) else "accessionId" + id_key = f"{collection}Id" if (collection in {"folder", "user"}) else "accessionId" find_by_id = {id_key: accession_id} result = await self.database[collection].delete_one(find_by_id) LOG.debug(f"DB doc in {collection} deleted for {accession_id}.") diff --git a/metadata_backend/helpers/parser.py b/metadata_backend/helpers/parser.py index 567e134a2..aa66980c4 100644 --- a/metadata_backend/helpers/parser.py +++ b/metadata_backend/helpers/parser.py @@ -52,7 +52,7 @@ def _flatten(self, data: Any) -> Union[Dict, List, str, None]: :param schema_type: XML data :returns: XML element flattened. """ - links = [ + links = { "studyLinks", "sampleLinks", "runLinks", @@ -64,7 +64,7 @@ def _flatten(self, data: Any) -> Union[Dict, List, str, None]: "datasetLinks", "assemblyLinks", "submissionLinks", - ] + } attrs = [ "studyAttributes", @@ -82,14 +82,14 @@ def _flatten(self, data: Any) -> Union[Dict, List, str, None]: "dataUses", ] - refs = ["analysisRef", "sampleRef", "runRef", "experimentRef"] + refs = {"analysisRef", "sampleRef", "runRef", "experimentRef"} children: Any = self.dict() for key, value, _ in self.map_content(data.content): key = self._to_camel(key.lower()) - if key in attrs and len(value) == 1: + if key in set(attrs) and len(value) == 1: attrs = list(value.values()) children[key] = attrs[0] if isinstance(attrs[0], list) else attrs continue @@ -106,7 +106,7 @@ def _flatten(self, data: Any) -> Union[Dict, List, str, None]: continue if "assembly" in key: - if next(iter(value)) in ["standard", "custom"]: + if next(iter(value)) in {"standard", "custom"}: children[key] = next(iter(value.values())) if "accessionId" in children[key]: children[key]["accession"] = children[key].pop("accessionId") @@ -377,7 +377,7 @@ def parse(self, schema_type: str, content: str) -> List: """ csv_reader = csv.DictReader(StringIO(content), delimiter=",", quoting=csv.QUOTE_NONE) - _sample_list = [ + _sample_list = { "title", "alias", "description", @@ -389,7 +389,7 @@ def parse(self, schema_type: str, content: str) -> List: "cellLine", "region", "phenotype", - ] + } if ( csv_reader.fieldnames diff --git a/metadata_backend/helpers/schema_loader.py b/metadata_backend/helpers/schema_loader.py index 42f9492a7..714372ce1 100644 --- a/metadata_backend/helpers/schema_loader.py +++ b/metadata_backend/helpers/schema_loader.py @@ -39,7 +39,7 @@ def _identify_file(self, schema_type: str) -> Path: """ schema_type = schema_type.lower() schema_file = None - for file in [x for x in self.path.iterdir()]: + for file in set([x for x in self.path.iterdir()]): if schema_type in file.name and file.name.endswith(self.loader_type): schema_file = file break diff --git a/tests/integration/mongo_indexes.py b/tests/integration/mongo_indexes.py index 982478ffe..bcde73f72 100755 --- a/tests/integration/mongo_indexes.py +++ b/tests/integration/mongo_indexes.py @@ -36,7 +36,7 @@ async def create_indexes(url: str) -> None: db = client[DATABASE] LOG.debug(f"Current database: {db}") LOG.debug("=== Create collections ===") - for col in ["folder", "user"]: + for col in {"folder", "user"}: try: await db.create_collection(col) except pymongo.errors.CollectionInvalid as e: From 1bc4c40a234fb32cf1bb68adcf3b938db43c830e Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 28 Feb 2022 18:22:46 +0200 Subject: [PATCH 293/336] add to set instead of extending list --- metadata_backend/api/operators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index a2a43ad0d..bf46f7939 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -474,7 +474,7 @@ async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accessio # therefore we need to check if the object already exists in database and has metax id if schema_type in {"study", "dataset"}: read_data = await self.db_service.read(schema_type, accession_id) - forbidden_keys.extend(["metaxIdentifier"]) + forbidden_keys.add("metaxIdentifier") if any(i in data for i in forbidden_keys): reason = f"Some items (e.g: {', '.join(forbidden_keys)}) cannot be changed." LOG.error(reason) From 4ff0e0e8ed003ceaf62587bfc0685f88951b6c0f Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 2 Mar 2022 16:59:46 +0200 Subject: [PATCH 294/336] integrating doi with metax --- metadata_backend/api/handlers/folder.py | 19 +++- metadata_backend/api/handlers/object.py | 90 ++++++------------- metadata_backend/helpers/schemas/folders.json | 10 --- tests/integration/run_tests.py | 66 ++++++++++---- 4 files changed, 92 insertions(+), 93 deletions(-) diff --git a/metadata_backend/api/handlers/folder.py b/metadata_backend/api/handlers/folder.py index 6a69edaaf..05cc07f90 100644 --- a/metadata_backend/api/handlers/folder.py +++ b/metadata_backend/api/handlers/folder.py @@ -80,8 +80,13 @@ def _prepare_doi_update(self, folder: Dict) -> Tuple[Dict, List]: "doi": _study_doi, "prefix": _study_doi.split("/")[0], "suffix": _study_doi.split("/")[1], - "types": _study["types"], - # "url": _study["url"], + "types": { + "bibtex": "misc", + "citeproc": "collection", + "schemaOrg": "Collection", + "resourceTypeGeneral": "Collection", + }, + "url": _study["url"], "identifiers": [_study["identifier"]], }, "id": _study_doi, @@ -98,8 +103,14 @@ def _prepare_doi_update(self, folder: Dict) -> Tuple[Dict, List]: "doi": _doi, "prefix": _doi.split("/")[0], "suffix": _doi.split("/")[1], - "types": ds["types"], - # "url": ds["url"], + "types": { + "ris": "DATA", + "bibtex": "misc", + "citeproc": "dataset", + "schemaOrg": "Dataset", + "resourceTypeGeneral": "Dataset", + }, + "url": ds["url"], "identifiers": [ds["identifier"]], }, "id": _doi, diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 193e5564d..c6eddd4ed 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -19,44 +19,6 @@ class ObjectAPIHandler(RESTAPIHandler): """API Handler for Objects.""" - async def _draft_doi(self, schema_type: str) -> Dict: - """Create draft DOI for study and dataset. - - The Draft DOI will be created only on POST and the data added to the - folder. Any update of this should not be possible. - - :param schema_type: schema can be either study or dataset - :returns: Dict with DOI of the study or dataset as well as the types. - """ - doi_ops = DOIHandler() - _doi_data = await doi_ops.create_draft(prefix=schema_type) - - LOG.debug(f"doi created with doi: {_doi_data['fullDOI']}") - - data = { - "identifier": { - "identifierType": "DOI", - "doi": _doi_data["fullDOI"], - } - } - if schema_type == "study": - data["types"] = { - "bibtex": "misc", - "citeproc": "collection", - "schemaOrg": "Collection", - "resourceTypeGeneral": "Collection", - } - elif schema_type == "dataset": - data["types"] = { - "ris": "DATA", - "bibtex": "misc", - "citeproc": "dataset", - "schemaOrg": "Dataset", - "resourceTypeGeneral": "Dataset", - } - - return data - async def _handle_query(self, req: Request) -> Response: """Handle query results. @@ -229,12 +191,6 @@ async def post_object(self, req: Request) -> Response: patch = self._prepare_folder_patch_new_object(collection, ids, patch_params) await folder_op.update_folder(folder_id, patch, schema_type) - # we don't create DOIs for drafts and we restrict doi creation to - # study and datasets - if not req.path.startswith("/drafts") and schema_type in _allowed_doi: - doi_patch = await self._prepare_folder_patch_doi(schema_type, ids) - await folder_op.update_folder(folder_id, doi_patch) - body = ujson.dumps(data, escape_forward_slashes=False) return web.Response( @@ -538,6 +494,7 @@ async def _update_metax_dataset(self, req: Request, collection: str, accession_i """ metax_service = MetaxServiceHandler(req) operator = Operator(req.app["db_client"]) + object_data, _ = await operator.read_metadata_object(collection, accession_id) # MYPY related if statement, Operator (when not XMLOperator) always returns object_data as dict if isinstance(object_data, Dict): @@ -545,6 +502,7 @@ async def _update_metax_dataset(self, req: Request, collection: str, accession_i metax_id = await metax_service.update_draft_dataset(collection, object_data) else: raise ValueError("Object's data must be dictionary") + return metax_id async def _delete_metax_dataset(self, req: Request, metax_id: str) -> None: @@ -557,18 +515,23 @@ async def _delete_metax_dataset(self, req: Request, metax_id: str) -> None: metax_service = MetaxServiceHandler(req) await metax_service.delete_draft_dataset(metax_id) - # TODO: to be replaced with real doi fetching - async def create_doi(self) -> str: - """Temporary function for random DOI creation. + async def _draft_doi(self, schema_type: str) -> str: + """Create draft DOI for study and dataset. + + The Draft DOI will be created only on POST and the data added to the + folder. Any update of this should not be possible. - :returns: Temporary DOI string + :param schema_type: schema can be either study or dataset + :returns: Dict with DOI of the study or dataset as well as the types. """ - from uuid import uuid4 + doi_ops = DOIHandler() + _doi_data = await doi_ops.create_draft(prefix=schema_type) - rand = str(uuid4()).split("-")[1:3] - return f"10.{rand[0]}/{rand[1]}" + LOG.debug(f"doi created with doi: {_doi_data['fullDOI']}") - async def _prepare_folder_patch_doi(self, schema: str, ids: List) -> List: + return _doi_data["fullDOI"] + + def _prepare_folder_patch_doi(self, schema: str, doi: str, url: str) -> List: """Prepare patch operation for updating object's doi information in a folder. :param schema: schema of object to be updated @@ -576,14 +539,19 @@ async def _prepare_folder_patch_doi(self, schema: str, ids: List) -> List: :returns: dict with patch operation """ patch = [] - for id in ids: - _data = await self._draft_doi(schema) - _data["accessionId"] = id["accessionId"] - if schema == "study": - patch_op = {"op": "add", "path": "/extraInfo/studyIdentifier", "value": _data} - patch.append(patch_op) - elif schema == "dataset": - patch_op = {"op": "add", "path": "/extraInfo/datasetIdentifiers/-", "value": _data} - patch.append(patch_op) + + data = { + "identifier": { + "identifierType": "DOI", + "doi": doi, + }, + "url": url, + } + if schema == "study": + patch_op = {"op": "add", "path": "/extraInfo/studyIdentifier", "value": data} + patch.append(patch_op) + elif schema == "dataset": + patch_op = {"op": "add", "path": "/extraInfo/datasetIdentifiers/-", "value": data} + patch.append(patch_op) return patch diff --git a/metadata_backend/helpers/schemas/folders.json b/metadata_backend/helpers/schemas/folders.json index 2b91ad8dc..968e6aaba 100644 --- a/metadata_backend/helpers/schemas/folders.json +++ b/metadata_backend/helpers/schemas/folders.json @@ -946,7 +946,6 @@ "studyIdentifier": { "type": "object", "required": [ - "accessionId", "identifier", "url" ], @@ -969,10 +968,6 @@ } } }, - "accessionId": { - "type": "string", - "title": "Internal accessionId for the study" - }, "url": { "type": "string", "title": "URL of the digital location of the object" @@ -988,7 +983,6 @@ "items": { "type": "object", "required": [ - "accessionId", "identifier", "url" ], @@ -1011,10 +1005,6 @@ } } }, - "accessionId": { - "type": "string", - "title": "Internal accessionid for the dataset" - }, "url": { "type": "string", "title": "URL of the digital location of the object" diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index 9fbb3d703..f05ac3bfd 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -869,13 +869,14 @@ async def test_getting_all_objects_from_schema_works(sess, folder_id): await asyncio.gather(*[delete_object(sess, "sample", accession_id) for accession_id, _ in files]) -async def test_metax_crud(sess, folder_id): +async def test_metax_crud(sess, metax_folder): """Test Metax service with study and dataset POST, PATCH, PUBLISH and DELETE reqs. :param sess: HTTP session in which request call is made :param folder_id: id of the folder where objects reside """ # POST to object endpoint creates draft dataset in Metax for Study and Dataset + folder_id = await post_folder(sess, metax_folder) ids = [] xml_files = set() for schema, filename, update_filename in { @@ -886,15 +887,6 @@ async def test_metax_crud(sess, folder_id): xml_files.add((schema, accession_id, update_filename)) ids.append([schema, accession_id]) - json_files = set() - for schema, filename, update_filename in { - ("study", "SRP000539.json", "patch.json"), - ("dataset", "dataset.json", "dataset_patch.json"), - }: - accession_id = await post_object_json(sess, schema, folder_id, filename) - json_files.add((schema, accession_id, filename, update_filename)) - ids.append([schema, accession_id]) - for object in ids: schema, accession_id = object async with sess.get(f"{objects_url}/{schema}/{accession_id}") as resp: @@ -916,10 +908,6 @@ async def test_metax_crud(sess, folder_id): # PUT and PATCH to object endpoint updates draft dataset in Metax for Study and Dataset for schema, accession_id, filename in xml_files: await put_object_xml(sess, schema, accession_id, filename) - for schema, accession_id, filename, _ in json_files: - await put_object_json(sess, schema, accession_id, filename) - for schema, accession_id, _, filename in json_files: - await patch_object_json(sess, schema, accession_id, filename) for _, _, metax_id in ids: async with sess.get(f"{metax_url}/{metax_id}") as metax_resp: @@ -932,11 +920,45 @@ async def test_metax_crud(sess, folder_id): # DELETE object from Metax for schema, accession_id, _ in xml_files: await delete_object(sess, schema, accession_id) - for schema, accession_id, _, _ in json_files: - await delete_object(sess, schema, accession_id) + for _, _, metax_id in ids: async with sess.get(f"{metax_url}/{metax_id}") as metax_resp: assert metax_resp.status == 404, f"HTTP Status code error - expected 404 Not Found, got {resp.status}" + ids2 = [] + json_files = set() + for schema, filename, update_filename in { + ("study", "SRP000539.json", "patch.json"), + ("dataset", "dataset.json", "dataset_patch.json"), + }: + accession_id = await post_object_json(sess, schema, folder_id, filename) + json_files.add((schema, accession_id, filename, update_filename)) + ids2.append([schema, accession_id]) + + for object in ids2: + schema, accession_id = object + async with sess.get(f"{objects_url}/{schema}/{accession_id}") as resp: + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + res = await resp.json() + try: + metax_id = res["metaxIdentifier"]["identifier"] + except KeyError: + assert False, "Metax ID was not in response data" + object.append(metax_id) + async with sess.get(f"{metax_url}/{metax_id}") as metax_resp: + assert metax_resp.status == 200, f"HTTP Status code error, got {resp.status}" + metax_res = await metax_resp.json() + assert ( + res.get("doi", None) == metax_res["research_dataset"]["preferred_identifier"] + ), "Object's DOI was not in Metax response data preferred_identifier" + assert metax_res.get("date_modified", None) is None + + for schema, accession_id, filename, _ in json_files: + await put_object_json(sess, schema, accession_id, filename) + for schema, accession_id, _, filename in json_files: + await patch_object_json(sess, schema, accession_id, filename) + + for schema, accession_id, _, _ in json_files: + await delete_object(sess, schema, accession_id) async def test_metax_id_not_updated_on_patch(sess, folder_id): @@ -979,6 +1001,13 @@ async def test_metax_publish_dataset(sess, folder_id): res = await resp.json() object.append(res["metaxIdentifier"]["identifier"]) + # Publish the folder + # add a study and dataset for publishing a folder + doi_data_raw = await create_request_json_data("doi", "test_doi.json") + doi_data = json.loads(doi_data_raw) + patch_add_doi = [{"op": "add", "path": "/doiInfo", "value": doi_data}] + folder_id = await patch_folder(sess, folder_id, patch_add_doi) + await publish_folder(sess, folder_id) # TODO: This must be updated as Metax identifier will be moved to folder from object after publishing @@ -1769,10 +1798,11 @@ async def main(): "name": "basic test pagination", "description": "basic test pagination folder", } + await test_metax_crud(sess, metax_folder) metax_folder_id = await post_folder(sess, metax_folder) - await test_metax_crud(sess, metax_folder_id) await test_metax_id_not_updated_on_patch(sess, metax_folder_id) - await test_metax_publish_dataset(sess, metax_folder_id) + metax_folder_id2 = await post_folder(sess, metax_folder) + await test_metax_publish_dataset(sess, metax_folder_id2) # Test add, modify, validate and release action with submissions LOG.debug("=== Testing actions within submissions ===") From 7c09c824e5b6d1236d9fb75c2e5435030046d8ee Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Mon, 7 Mar 2022 09:20:06 +0200 Subject: [PATCH 295/336] rebase with metax integrations --- metadata_backend/api/handlers/object.py | 19 +++++++++++-------- metadata_backend/api/operators.py | 2 +- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index c6eddd4ed..f9c25336b 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -185,11 +185,8 @@ async def post_object(self, req: Request) -> Response: await folder_op.update_folder(folder_id, patch) # Create draft dataset to Metax catalog - if collection in {"study", "dataset"}: - [await self._create_metax_dataset(req, collection, item) for item in objects] - - patch = self._prepare_folder_patch_new_object(collection, ids, patch_params) - await folder_op.update_folder(folder_id, patch, schema_type) + if collection in _allowed_doi: + [await self._create_metax_dataset(req, collection, item, folder_id) for item in objects] body = ujson.dumps(data, escape_forward_slashes=False) @@ -272,6 +269,8 @@ async def put_object(self, req: Request) -> Response: :raises: HTTPUnsupportedMediaType if JSON replace is attempted :returns: JSON response containing accessionId for submitted object """ + _allowed_doi = {"study", "dataset"} + schema_type = req.match_info["schema"] accession_id = req.match_info["accessionId"] self._check_schema_exists(schema_type) @@ -310,7 +309,7 @@ async def put_object(self, req: Request) -> Response: await folder_op.update_folder(folder_id, patch) # Update draft dataset to Metax catalog - if collection in {"study", "dataset"}: + if collection in _allowed_doi: await self._update_metax_dataset(req, collection, accession_id) body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) @@ -452,7 +451,7 @@ def _prepare_folder_patch_update_object(self, schema: str, data: Dict, filename: return [patch_op] # TODO: update doi related code - async def _create_metax_dataset(self, req: Request, collection: str, object: Dict) -> str: + async def _create_metax_dataset(self, req: Request, collection: str, object: Dict, folder_id: str) -> str: """Handle connection to Metax api handler. Sends Dataset or Study object's data to Metax api handler. @@ -470,10 +469,14 @@ async def _create_metax_dataset(self, req: Request, collection: str, object: Dic # MYPY related if statement, Operator (when not XMLOperator) always returns object_data as dict if isinstance(object, Dict): LOG.info("Creating draft dataset to Metax.") - object["doi"] = await self.create_doi() + object["doi"] = await self._draft_doi(collection) metax_id = await metax_service.post_dataset_as_draft(collection, object) new_info = {"doi": object["doi"], "metaxIdentifier": {"identifier": metax_id, "status": "draft"}} await operator.update_metadata_object(collection, object["accessionId"], new_info) + if folder_id: + folder_op = FolderOperator(req.app["db_client"]) + doi_patch = self._prepare_folder_patch_doi(collection, object["doi"], metax_id) + await folder_op.update_folder(folder_id, doi_patch) else: raise ValueError("Object's data must be dictionary") return metax_id diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index bf46f7939..db5c204ba 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -505,7 +505,7 @@ async def _format_data_to_update_and_add_to_db(self, schema_type: str, accession read_data = await self.db_service.read(schema_type, accession_id) # on firs write db doesnt have yet metaxIdentifier if read_data.get("metaxIdentifier", None): - forbidden_keys.extend(["metaxIdentifier"]) + forbidden_keys.add("metaxIdentifier") if any(i in data for i in forbidden_keys): reason = f"Some items (e.g: {', '.join(forbidden_keys)}) cannot be changed." LOG.error(reason) From e3874df38e88035de784ce5677271a29ac5790d0 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Mon, 7 Mar 2022 12:15:30 +0000 Subject: [PATCH 296/336] Small refactoring Remove unnecessary return of title and metax service function call from handlers tests. Separate metax tests for xml and json files to own functions. --- metadata_backend/api/handlers/object.py | 30 +++++++++---------------- tests/integration/run_tests.py | 29 +++++++++++++++--------- tests/test_handlers.py | 9 ++++---- 3 files changed, 34 insertions(+), 34 deletions(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index f9c25336b..23a896334 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -7,13 +7,13 @@ from aiohttp.web import Request, Response from multidict import CIMultiDict +from ...helpers.doi import DOIHandler from ...helpers.logger import LOG from ...helpers.metax_api_handler import MetaxServiceHandler from ...helpers.validator import JSONValidator from ..operators import FolderOperator, Operator, XMLOperator from .common import multipart_content from .restapi import RESTAPIHandler -from ...helpers.doi import DOIHandler class ObjectAPIHandler(RESTAPIHandler): @@ -176,11 +176,9 @@ async def post_object(self, req: Request) -> Response: LOG.info( f"POST object with accesssion ID {json_data['accessionId']} in schema {collection} was successful." ) + objects = [json_data] # Gathering data for object to be added to folder - if not isinstance(data, List): - objects = [json_data] - folder_op = FolderOperator(db_client) patch = self._prepare_folder_patch_new_object(collection, objects, patch_params) await folder_op.update_folder(folder_id, patch) @@ -450,18 +448,17 @@ def _prepare_folder_patch_update_object(self, schema: str, data: Dict, filename: ) return [patch_op] - # TODO: update doi related code async def _create_metax_dataset(self, req: Request, collection: str, object: Dict, folder_id: str) -> str: - """Handle connection to Metax api handler. + """Handle connection to Metax api handler for dataset creation. - Sends Dataset or Study object's data to Metax api handler. - If creating new dataset, object is updated with returned metax ID to database. - Object's data has to be fetched first from db in case of XML data in request. - Has temporary DOI fetching, will be chaged with real data. + Dataset or Study object is assigned with DOI + and it's data is sent to Metax api handler. + Object database entry is updated with metax ID returned by Metax service. :param req: HTTP request :param collection: object's schema :param object: metadata object + :param folder_id: folder ID where metadata object belongs to :returns: Metax ID """ metax_service = MetaxServiceHandler(req) @@ -473,22 +470,18 @@ async def _create_metax_dataset(self, req: Request, collection: str, object: Dic metax_id = await metax_service.post_dataset_as_draft(collection, object) new_info = {"doi": object["doi"], "metaxIdentifier": {"identifier": metax_id, "status": "draft"}} await operator.update_metadata_object(collection, object["accessionId"], new_info) - if folder_id: - folder_op = FolderOperator(req.app["db_client"]) - doi_patch = self._prepare_folder_patch_doi(collection, object["doi"], metax_id) - await folder_op.update_folder(folder_id, doi_patch) + folder_op = FolderOperator(req.app["db_client"]) + doi_patch = self._prepare_folder_patch_doi(collection, object["doi"], metax_id) + await folder_op.update_folder(folder_id, doi_patch) else: raise ValueError("Object's data must be dictionary") return metax_id - # TODO: update doi related code async def _update_metax_dataset(self, req: Request, collection: str, accession_id: str) -> str: - """Handle connection to Metax api handler. + """Handle connection to Metax api handler for dataset update. Sends Dataset or Study object's data to Metax api handler. - If creating new dataset, object is updated with returned metax ID to database. Object's data has to be fetched first from db in case of XML data in request. - Has temporary DOI fetching, will be chaged with real data. :param req: HTTP request :param collection: object's schema @@ -497,7 +490,6 @@ async def _update_metax_dataset(self, req: Request, collection: str, accession_i """ metax_service = MetaxServiceHandler(req) operator = Operator(req.app["db_client"]) - object_data, _ = await operator.read_metadata_object(collection, accession_id) # MYPY related if statement, Operator (when not XMLOperator) always returns object_data as dict if isinstance(object_data, Dict): diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index f05ac3bfd..ff7672612 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -869,14 +869,13 @@ async def test_getting_all_objects_from_schema_works(sess, folder_id): await asyncio.gather(*[delete_object(sess, "sample", accession_id) for accession_id, _ in files]) -async def test_metax_crud(sess, metax_folder): - """Test Metax service with study and dataset POST, PATCH, PUBLISH and DELETE reqs. +async def test_metax_crud_with_xml(sess, folder_id): + """Test Metax service with study and dataset xml files POST, PATCH, PUBLISH and DELETE reqs. :param sess: HTTP session in which request call is made :param folder_id: id of the folder where objects reside """ # POST to object endpoint creates draft dataset in Metax for Study and Dataset - folder_id = await post_folder(sess, metax_folder) ids = [] xml_files = set() for schema, filename, update_filename in { @@ -924,7 +923,15 @@ async def test_metax_crud(sess, metax_folder): for _, _, metax_id in ids: async with sess.get(f"{metax_url}/{metax_id}") as metax_resp: assert metax_resp.status == 404, f"HTTP Status code error - expected 404 Not Found, got {resp.status}" - ids2 = [] + + +async def test_metax_crud_with_json(sess, folder_id): + """Test Metax service with study and dataset json data POST, PATCH, PUBLISH and DELETE reqs. + + :param sess: HTTP session in which request call is made + :param folder_id: id of the folder where objects reside + """ + ids = [] json_files = set() for schema, filename, update_filename in { ("study", "SRP000539.json", "patch.json"), @@ -932,9 +939,9 @@ async def test_metax_crud(sess, metax_folder): }: accession_id = await post_object_json(sess, schema, folder_id, filename) json_files.add((schema, accession_id, filename, update_filename)) - ids2.append([schema, accession_id]) + ids.append([schema, accession_id]) - for object in ids2: + for object in ids: schema, accession_id = object async with sess.get(f"{objects_url}/{schema}/{accession_id}") as resp: assert resp.status == 200, f"HTTP Status code error, got {resp.status}" @@ -975,9 +982,11 @@ async def test_metax_id_not_updated_on_patch(sess, folder_id): async with sess.patch( f"{objects_url}/{schema}/{accession_id}", data={"metaxIdentifier": {"identifier": "12345"}} ) as resp: - LOG.debug(f"Try to patch object in {schema}") + LOG.debug(f"Trying to patch object in {schema}") assert resp.status == 400 + await delete_object(sess, schema, accession_id) + async def test_metax_publish_dataset(sess, folder_id): """Test publishing dataset to Metax service after folder(submission) is published. @@ -1798,11 +1807,11 @@ async def main(): "name": "basic test pagination", "description": "basic test pagination folder", } - await test_metax_crud(sess, metax_folder) metax_folder_id = await post_folder(sess, metax_folder) + await test_metax_crud_with_xml(sess, metax_folder_id) + await test_metax_crud_with_json(sess, metax_folder_id) await test_metax_id_not_updated_on_patch(sess, metax_folder_id) - metax_folder_id2 = await post_folder(sess, metax_folder) - await test_metax_publish_dataset(sess, metax_folder_id2) + await test_metax_publish_dataset(sess, metax_folder_id) # Test add, modify, validate and release action with submissions LOG.debug("=== Testing actions within submissions ===") diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 745fd4224..e9eb8601f 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -120,7 +120,6 @@ async def setUpAsync(self): } RESTAPIHandler._handle_check_ownedby_user = make_mocked_coro(True) - ObjectAPIHandler.create_or_update_metax_dataset = make_mocked_coro("111-222-333") ObjectAPIHandler._delete_metax_dataset = make_mocked_coro() async def tearDownAsync(self): @@ -180,15 +179,15 @@ async def fake_xmloperator_read_metadata_object(self, schema_type, accession_id) async def fake_xmloperator_create_metadata_object(self, schema_type, content): """Fake create operation to return mocked accessionId.""" - return {"accessionId": self.test_ega_string, "title": "title"} + return {"accessionId": self.test_ega_string} async def fake_xmloperator_replace_metadata_object(self, schema_type, accession_id, content): """Fake replace operation to return mocked accessionId.""" - return {"accessionId": self.test_ega_string, "title": "title"} + return {"accessionId": self.test_ega_string} async def fake_operator_create_metadata_object(self, schema_type, content): """Fake create operation to return mocked accessionId.""" - return {"accessionId": self.test_ega_string, "title": "title"} + return {"accessionId": self.test_ega_string} async def fake_operator_update_metadata_object(self, schema_type, accession_id, content): """Fake update operation to return mocked accessionId.""" @@ -196,7 +195,7 @@ async def fake_operator_update_metadata_object(self, schema_type, accession_id, async def fake_operator_replace_metadata_object(self, schema_type, accession_id, content): """Fake replace operation to return mocked accessionId.""" - return {"accessionId": self.test_ega_string, "title": "title"} + return {"accessionId": self.test_ega_string} async def fake_operator_delete_metadata_object(self, schema_type, accession_id): """Fake delete operation to await successful operation indicator.""" From ba49229a811475822f81f4d921dfb2d4d7317041 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Mon, 7 Mar 2022 13:57:20 +0000 Subject: [PATCH 297/336] Remove reading DB on replace object to find metax id exists Make metaxIdentifier always forbidden key to replace. Now db service takes care of populating existing metax identifier to object. --- metadata_backend/api/operators.py | 16 +++------------- metadata_backend/database/db_service.py | 3 +++ tests/test_operators.py | 7 +------ 3 files changed, 7 insertions(+), 19 deletions(-) diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index db5c204ba..5424c9978 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -461,7 +461,8 @@ async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accessio We will not replace accessionId, publishDate or dateCreated, as these are generated when created. - + Will not replace metaxIdentifier for study and dataset + as it is generated when created. We will keep also publisDate and dateCreated from old object. :param schema_type: Schema type of the object to replace. @@ -469,24 +470,13 @@ async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accessio :param data: Metadata object :returns: Accession Id for object inserted to database """ - forbidden_keys = {"accessionId", "publishDate", "dateCreated"} - # when replacing from xml file there are no (not supposed to be) metax data in content data - # therefore we need to check if the object already exists in database and has metax id - if schema_type in {"study", "dataset"}: - read_data = await self.db_service.read(schema_type, accession_id) - forbidden_keys.add("metaxIdentifier") + forbidden_keys = {"accessionId", "publishDate", "dateCreated", "metaxIdentifier"} if any(i in data for i in forbidden_keys): reason = f"Some items (e.g: {', '.join(forbidden_keys)}) cannot be changed." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) data["accessionId"] = accession_id data["dateModified"] = datetime.utcnow() - if schema_type in {"study", "dataset"}: - try: - data["metaxIdentifier"] = read_data["metaxIdentifier"] - data["doi"] = read_data["doi"] - except KeyError: - pass LOG.debug(f"Operator formatted data for {schema_type} to add to DB") await self._replace_object_from_db(schema_type, accession_id, data) return data diff --git a/metadata_backend/database/db_service.py b/metadata_backend/database/db_service.py index b50576c47..9a119cca1 100644 --- a/metadata_backend/database/db_service.py +++ b/metadata_backend/database/db_service.py @@ -262,6 +262,9 @@ async def replace(self, collection: str, accession_id: str, new_data: Dict) -> b old_data = await self.database[collection].find_one(find_by_id) if not (len(new_data) == 2 and new_data["content"].startswith("<")): new_data["dateCreated"] = old_data["dateCreated"] + if collection in {"study", "dataset"}: + new_data["metaxIdentifier"] = old_data["metaxIdentifier"] + new_data["doi"] = old_data["doi"] if "publishDate" in old_data: new_data["publishDate"] = old_data["publishDate"] result = await self.database[collection].replace_one(find_by_id, new_data) diff --git a/tests/test_operators.py b/tests/test_operators.py index 86ec388bf..7178365ea 100644 --- a/tests/test_operators.py +++ b/tests/test_operators.py @@ -341,17 +341,12 @@ async def test_correct_data_is_set_to_json_when_replacing(self): self.MockedDbService().read.return_value = { "accessionId": self.accession_id, "dateModified": datetime.datetime(2020, 4, 14), - "metaxIdentifier": {"identifier": 12345}, } acc = await (operator._format_data_to_replace_and_add_to_db("study", self.accession_id, {})) mocked_insert.assert_called_once_with( "study", self.accession_id, - { - "accessionId": self.accession_id, - "dateModified": datetime.datetime(2020, 4, 14), - "metaxIdentifier": {"identifier": 12345}, - }, + {"accessionId": self.accession_id, "dateModified": datetime.datetime(2020, 4, 14)}, ) self.assertEqual(acc["accessionId"], self.accession_id) From 92a58096ef86c627db7361e0caa78b2f75dede02 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 8 Mar 2022 19:36:12 +0000 Subject: [PATCH 298/336] Update how DB is red on object patch Reading object to extract data for Metax service is necessary only while patching object as only partial data coming in and Metax service always uses replacing for updating. Make metaxIdentifier always forbidden key while updating data in operators. Created function in operators dedicated to add metax info to object db entry. --- metadata_backend/api/handlers/object.py | 44 +++++++++++-------------- metadata_backend/api/operators.py | 33 +++++++++++++++---- tests/test_handlers.py | 6 +++- 3 files changed, 51 insertions(+), 32 deletions(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 23a896334..f2c573114 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -308,7 +308,7 @@ async def put_object(self, req: Request) -> Response: # Update draft dataset to Metax catalog if collection in _allowed_doi: - await self._update_metax_dataset(req, collection, accession_id) + await self._update_metax_dataset(req, collection, data) body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) LOG.info(f"PUT object with accession ID {accession_id} in schema {collection} was successful.") @@ -361,7 +361,12 @@ async def patch_object(self, req: Request) -> Response: # Update draft dataset to Metax catalog if collection in {"study", "dataset"}: - await self._update_metax_dataset(req, collection, accession_id) + object_data, _ = await operator.read_metadata_object(collection, accession_id) + # MYPY related if statement, Operator (when not XMLOperator) always returns object_data as dict + if isinstance(object_data, Dict): + await self._update_metax_dataset(req, collection, object_data) + else: + raise ValueError("Object's data must be dictionary") body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) LOG.info(f"PATCH object with accession ID {accession_id} in schema {collection} was successful.") @@ -463,21 +468,19 @@ async def _create_metax_dataset(self, req: Request, collection: str, object: Dic """ metax_service = MetaxServiceHandler(req) operator = Operator(req.app["db_client"]) - # MYPY related if statement, Operator (when not XMLOperator) always returns object_data as dict - if isinstance(object, Dict): - LOG.info("Creating draft dataset to Metax.") - object["doi"] = await self._draft_doi(collection) - metax_id = await metax_service.post_dataset_as_draft(collection, object) - new_info = {"doi": object["doi"], "metaxIdentifier": {"identifier": metax_id, "status": "draft"}} - await operator.update_metadata_object(collection, object["accessionId"], new_info) - folder_op = FolderOperator(req.app["db_client"]) - doi_patch = self._prepare_folder_patch_doi(collection, object["doi"], metax_id) - await folder_op.update_folder(folder_id, doi_patch) - else: - raise ValueError("Object's data must be dictionary") + LOG.info("Creating draft dataset to Metax.") + object["doi"] = await self._draft_doi(collection) + metax_id = await metax_service.post_dataset_as_draft(collection, object) + + new_info = {"doi": object["doi"], "metaxIdentifier": {"identifier": metax_id, "status": "draft"}} + await operator.create_metax_info(collection, object["accessionId"], new_info) + + folder_op = FolderOperator(req.app["db_client"]) + doi_patch = self._prepare_folder_patch_doi(collection, object["doi"], metax_id) + await folder_op.update_folder(folder_id, doi_patch) return metax_id - async def _update_metax_dataset(self, req: Request, collection: str, accession_id: str) -> str: + async def _update_metax_dataset(self, req: Request, collection: str, data: Dict) -> str: """Handle connection to Metax api handler for dataset update. Sends Dataset or Study object's data to Metax api handler. @@ -489,15 +492,8 @@ async def _update_metax_dataset(self, req: Request, collection: str, accession_i :returns: Metax ID """ metax_service = MetaxServiceHandler(req) - operator = Operator(req.app["db_client"]) - object_data, _ = await operator.read_metadata_object(collection, accession_id) - # MYPY related if statement, Operator (when not XMLOperator) always returns object_data as dict - if isinstance(object_data, Dict): - LOG.info("Updating draft dataset to Metax.") - metax_id = await metax_service.update_draft_dataset(collection, object_data) - else: - raise ValueError("Object's data must be dictionary") - + LOG.info("Updating draft dataset to Metax.") + metax_id = await metax_service.update_draft_dataset(collection, data) return metax_id async def _delete_metax_dataset(self, req: Request, metax_id: str) -> None: diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 5424c9978..757cc4711 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -431,6 +431,31 @@ async def query_metadata_database( ) return data, page_num, page_size, total_objects[0]["total"] + async def create_metax_info(self, schema_type: str, accession_id: str, data: Dict) -> bool: + """Update study or dataset object with metax info. + + :param schema_type: Schema type of the object to replace. + :param accession_id: Identifier of object to replace. + :param data: Metadata object + :returns: True on successed database update + """ + if schema_type not in {"study", "dataset"}: + LOG.error("Object schema type must be either study or dataset") + return False + try: + create_success = await self.db_service.update(schema_type, accession_id, data) + except (ConnectionFailure, OperationFailure) as error: + reason = f"Error happened while updating object: {error}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + if not create_success: + reason = "Updating object to database failed for some reason." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + else: + LOG.info(f"Object {schema_type} with id {accession_id} opdated with metax info.") + return True + async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Dict) -> Dict: """Format JSON metadata object and add it to db. @@ -489,13 +514,7 @@ async def _format_data_to_update_and_add_to_db(self, schema_type: str, accession :param data: Metadata object :returns: Accession Id for object inserted to database """ - forbidden_keys = {"accessionId", "publishDate", "dateCreated"} - # check if object already has metax id or is it first time writing it - if schema_type in {"study", "dataset"}: - read_data = await self.db_service.read(schema_type, accession_id) - # on firs write db doesnt have yet metaxIdentifier - if read_data.get("metaxIdentifier", None): - forbidden_keys.add("metaxIdentifier") + forbidden_keys = {"accessionId", "publishDate", "dateCreated", "metaxIdentifier"} if any(i in data for i in forbidden_keys): reason = f"Some items (e.g: {', '.join(forbidden_keys)}) cannot be changed." LOG.error(reason) diff --git a/tests/test_handlers.py b/tests/test_handlers.py index e9eb8601f..5b822e37b 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -96,6 +96,7 @@ async def setUpAsync(self): "delete_metadata_object.side_effect": self.fake_operator_delete_metadata_object, "update_metadata_object.side_effect": self.fake_operator_update_metadata_object, "replace_metadata_object.side_effect": self.fake_operator_replace_metadata_object, + "create_metax_info.side_effect": self.fake_operator_create_metax_info, } self.xmloperator_config = { "read_metadata_object.side_effect": self.fake_xmloperator_read_metadata_object, @@ -201,6 +202,10 @@ async def fake_operator_delete_metadata_object(self, schema_type, accession_id): """Fake delete operation to await successful operation indicator.""" return True + async def fake_operator_create_metax_info(self, schema_type, accession_id, data): + """Fake update operation to await successful operation indicator.""" + return True + async def fake_folderoperator_create_folder(self, content): """Fake create operation to return mocked folderId.""" return self.folder_id @@ -904,7 +909,6 @@ async def test_folder_is_published(self): self.MockedMetaxHandler().publish_dataset.return_value = None with patch(self._mock_prepare_doi, return_value=({}, [{}])): response = await self.client.patch("/publish/FOL12345678") - # self.MockedFolderOperator().update_folder.assert_called_once() self.assertEqual(response.status, 200) json_resp = await response.json() self.assertEqual(json_resp["folderId"], self.folder_id) From 35938078679534a4ac409c684f0ae80b2fbbe5fc Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 9 Mar 2022 07:33:48 +0000 Subject: [PATCH 299/336] Remove status from metax info in object Object's metax id will be moved to folder after folder publishing so no metax status is needed. --- metadata_backend/api/handlers/object.py | 4 ++-- metadata_backend/helpers/metax_api_handler.py | 6 +++--- tests/integration/run_tests.py | 10 ++++------ 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index f2c573114..2718ac2ed 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -244,7 +244,7 @@ async def delete_object(self, req: Request) -> Response: object_data, _ = await operator.read_metadata_object(collection, accession_id) # MYPY related if statement, Operator (when not XMLOperator) always returns object_data as dict if isinstance(object_data, dict): - metax_id = object_data["metaxIdentifier"]["identifier"] + metax_id = object_data["metaxIdentifier"] except KeyError: LOG.warning(f"MetadataObject {collection} {accession_id} was never added to Metax service.") @@ -472,7 +472,7 @@ async def _create_metax_dataset(self, req: Request, collection: str, object: Dic object["doi"] = await self._draft_doi(collection) metax_id = await metax_service.post_dataset_as_draft(collection, object) - new_info = {"doi": object["doi"], "metaxIdentifier": {"identifier": metax_id, "status": "draft"}} + new_info = {"doi": object["doi"], "metaxIdentifier": metax_id} await operator.create_metax_info(collection, object["accessionId"], new_info) folder_op = FolderOperator(req.app["db_client"]) diff --git a/metadata_backend/helpers/metax_api_handler.py b/metadata_backend/helpers/metax_api_handler.py index 4af8d13b1..c5cf06929 100644 --- a/metadata_backend/helpers/metax_api_handler.py +++ b/metadata_backend/helpers/metax_api_handler.py @@ -136,7 +136,7 @@ async def update_draft_dataset(self, collection: str, data: Dict) -> str: async with aiohttp.ClientSession() as sess: resp = await sess.put( - f'{self.metax_url}{self.rest_route}/{data["metaxIdentifier"]["identifier"]}', + f'{self.metax_url}{self.rest_route}/{data["metaxIdentifier"]}', params="draft", json=metax_dataset, auth=aiohttp.BasicAuth(self.username, self.password), @@ -144,7 +144,7 @@ async def update_draft_dataset(self, collection: str, data: Dict) -> str: status = resp.status if status == 200: metax_data = await resp.json() - LOG.info(f"Updated Metax draft dataset with ID {metax_data['identifier']} with data: {metax_data}") + LOG.info(f"Updated Metax draft dataset with ID {metax_data['identifier']} with data: {metax_dataset}") return metax_data["identifier"] else: # TODO: how front end should react on this?? @@ -183,7 +183,7 @@ async def publish_dataset(self, folder_id: str) -> None: if object["schema"] in {"study", "dataset"}: data, _ = await operator.read_metadata_object(object["schema"], object["accessionId"]) if isinstance(data, dict): - metax_id = data["metaxIdentifier"]["identifier"] + metax_id = data["metaxIdentifier"] doi = data["doi"] async with aiohttp.ClientSession() as sess: resp = await sess.post( diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index ff7672612..dbe4db853 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -892,7 +892,7 @@ async def test_metax_crud_with_xml(sess, folder_id): assert resp.status == 200, f"HTTP Status code error, got {resp.status}" res = await resp.json() try: - metax_id = res["metaxIdentifier"]["identifier"] + metax_id = res["metaxIdentifier"] except KeyError: assert False, "Metax ID was not in response data" object.append(metax_id) @@ -947,7 +947,7 @@ async def test_metax_crud_with_json(sess, folder_id): assert resp.status == 200, f"HTTP Status code error, got {resp.status}" res = await resp.json() try: - metax_id = res["metaxIdentifier"]["identifier"] + metax_id = res["metaxIdentifier"] except KeyError: assert False, "Metax ID was not in response data" object.append(metax_id) @@ -979,9 +979,7 @@ async def test_metax_id_not_updated_on_patch(sess, folder_id): ("dataset", "dataset.json"), }: accession_id = await post_object_json(sess, schema, folder_id, filename) - async with sess.patch( - f"{objects_url}/{schema}/{accession_id}", data={"metaxIdentifier": {"identifier": "12345"}} - ) as resp: + async with sess.patch(f"{objects_url}/{schema}/{accession_id}", data={"metaxIdentifier": "12345"}) as resp: LOG.debug(f"Trying to patch object in {schema}") assert resp.status == 400 @@ -1008,7 +1006,7 @@ async def test_metax_publish_dataset(sess, folder_id): async with sess.get(f"{objects_url}/{schema}/{object_id}") as resp: assert resp.status == 200, f"HTTP Status code error, got {resp.status}" res = await resp.json() - object.append(res["metaxIdentifier"]["identifier"]) + object.append(res["metaxIdentifier"]) # Publish the folder # add a study and dataset for publishing a folder From 908cd7871fb17eeeb6befac6f0d77cf371db8878 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 9 Mar 2022 07:59:16 +0000 Subject: [PATCH 300/336] Remove unnecessary metax service calling functions Connection to metax service handler for update and delete is simple enougth to be handled from object PUT, PATCH and DELETE endpoints itself --- metadata_backend/api/handlers/object.py | 35 ++++--------------- metadata_backend/helpers/metax_api_handler.py | 5 --- 2 files changed, 6 insertions(+), 34 deletions(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 2718ac2ed..a7645ca52 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -252,7 +252,8 @@ async def delete_object(self, req: Request) -> Response: # Delete draft dataset from Metax catalog if collection in {"study", "dataset"}: - await self._delete_metax_dataset(req, metax_id) + metax_service = MetaxServiceHandler(req) + await metax_service.delete_draft_dataset(metax_id) LOG.info(f"DELETE object with accession ID {accession_id} in schema {collection} was successful.") return web.Response(status=204) @@ -308,7 +309,8 @@ async def put_object(self, req: Request) -> Response: # Update draft dataset to Metax catalog if collection in _allowed_doi: - await self._update_metax_dataset(req, collection, data) + metax_service = MetaxServiceHandler(req) + await metax_service.update_draft_dataset(collection, data) body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) LOG.info(f"PUT object with accession ID {accession_id} in schema {collection} was successful.") @@ -364,7 +366,8 @@ async def patch_object(self, req: Request) -> Response: object_data, _ = await operator.read_metadata_object(collection, accession_id) # MYPY related if statement, Operator (when not XMLOperator) always returns object_data as dict if isinstance(object_data, Dict): - await self._update_metax_dataset(req, collection, object_data) + metax_service = MetaxServiceHandler(req) + await metax_service.update_draft_dataset(collection, object_data) else: raise ValueError("Object's data must be dictionary") @@ -480,32 +483,6 @@ async def _create_metax_dataset(self, req: Request, collection: str, object: Dic await folder_op.update_folder(folder_id, doi_patch) return metax_id - async def _update_metax_dataset(self, req: Request, collection: str, data: Dict) -> str: - """Handle connection to Metax api handler for dataset update. - - Sends Dataset or Study object's data to Metax api handler. - Object's data has to be fetched first from db in case of XML data in request. - - :param req: HTTP request - :param collection: object's schema - :param accession_id: object's accession ID - :returns: Metax ID - """ - metax_service = MetaxServiceHandler(req) - LOG.info("Updating draft dataset to Metax.") - metax_id = await metax_service.update_draft_dataset(collection, data) - return metax_id - - async def _delete_metax_dataset(self, req: Request, metax_id: str) -> None: - """Handle deletion of Study or Dataset object from Metax service. - - :param req: HTTP request - :param metax_id: object's Metax ID - :returns: True if request succeded, else raises error - """ - metax_service = MetaxServiceHandler(req) - await metax_service.delete_draft_dataset(metax_id) - async def _draft_doi(self, schema_type: str) -> str: """Create draft DOI for study and dataset. diff --git a/metadata_backend/helpers/metax_api_handler.py b/metadata_backend/helpers/metax_api_handler.py index c5cf06929..67c2326cd 100644 --- a/metadata_backend/helpers/metax_api_handler.py +++ b/metadata_backend/helpers/metax_api_handler.py @@ -55,11 +55,6 @@ def __init__(self, req: Request) -> None: }, } - # TODO - def authenticate(self) -> None: - """Handle authentication to Metax.""" - pass - async def get_metadata_provider_user(self) -> str: """Get current user's external id. From 529c309484f41341c2a5e50a647025300f5da910 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 9 Mar 2022 08:14:20 +0000 Subject: [PATCH 301/336] Update changelog --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e9dc5103..636075a41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,12 +8,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added -- Integration with Metax service +- Integration with Metax service #356 - Adds new local container for testing against mocked Metax API - Introduces new env vars: METAX_USER, METAX_PASS, METAX_URL - - Adds new key metaxIdentifier to Study and Dataset collections with dict {identifier: , status: "draft"|"published"} + - Adds new key metaxIdentifier to Study and Dataset collections containing metax id returned from Metax API - Adds new handler MetaxServiceHandler to take care of mapping Submitter metadata to Metax metadata and to connect to Metax API -- Add patching of folders after object save and update operations +- Add patching of folders after object save and update operations #354 - Object is added or updated to folder(submission) where it belongs with it's accession ID, schema, submission type, title and filename in the case of CSV and XML upload - Adds configuration for mypy linting to VScode devcontainer setup - Templates API #256 From e7f71b81209b4dc2e674802f6f6a093c341905f5 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 10 Mar 2022 08:53:42 +0200 Subject: [PATCH 302/336] add option to delete doi on object delete --- metadata_backend/api/handlers/object.py | 10 ++++-- metadata_backend/helpers/doi.py | 42 ++++++++++++++++++++----- tests/integration/mock_doi_api.py | 11 +++++-- tests/test_handlers.py | 13 ++++++-- 4 files changed, 61 insertions(+), 15 deletions(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index a7645ca52..595a80c47 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -213,6 +213,8 @@ async def delete_object(self, req: Request) -> Response: :raises: HTTPUnprocessableEntity if object does not belong to current user :returns: HTTPNoContent response """ + _allowed_doi = {"study", "dataset"} + schema_type = req.match_info["schema"] self._check_schema_exists(schema_type) collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type @@ -239,21 +241,25 @@ async def delete_object(self, req: Request) -> Response: raise web.HTTPUnprocessableEntity(reason=reason) metax_id: str = "" - if collection in {"study", "dataset"}: + doi_id: str = "" + if collection in _allowed_doi: try: object_data, _ = await operator.read_metadata_object(collection, accession_id) # MYPY related if statement, Operator (when not XMLOperator) always returns object_data as dict if isinstance(object_data, dict): metax_id = object_data["metaxIdentifier"] + doi_id = object_data["doi"] except KeyError: LOG.warning(f"MetadataObject {collection} {accession_id} was never added to Metax service.") accession_id = await operator.delete_metadata_object(collection, accession_id) # Delete draft dataset from Metax catalog - if collection in {"study", "dataset"}: + if collection in _allowed_doi: metax_service = MetaxServiceHandler(req) await metax_service.delete_draft_dataset(metax_id) + doi_service = DOIHandler() + await doi_service.delete(doi_id) LOG.info(f"DELETE object with accession ID {accession_id} in schema {collection} was successful.") return web.Response(status=204) diff --git a/metadata_backend/helpers/doi.py b/metadata_backend/helpers/doi.py index 5b3da3f28..99173ace3 100644 --- a/metadata_backend/helpers/doi.py +++ b/metadata_backend/helpers/doi.py @@ -26,7 +26,12 @@ def __init__(self) -> None: self.headers = {"Content-Type": "application/vnd.api+json"} async def create_draft(self, prefix: Union[str, None] = None) -> Dict: - """Generate random suffix and POST request a draft DOI to DataCite DOI API.""" + """Generate random suffix and POST request a draft DOI to DataCite DOI API. + + :param prefix: Custom prefix to add to the DOI e.g. study/dataset + :raises: HTTPInternalServerError if we the Datacite DOI draft registration fails + :returns: Dictionary with DOI and URL + """ suffix = uuid4().hex[:10] doi_suffix = f"{prefix}.{suffix[:4]}-{suffix[4:]}" if prefix else f"{suffix[:4]}-{suffix[4:]}" # this payload is sufficient to get a draft DOI @@ -47,27 +52,48 @@ async def create_draft(self, prefix: Union[str, None] = None) -> Dict: else: reason = f"DOI API draft creation request failed with code: {response.status}" LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) # 400 might not be the correct error for this + raise web.HTTPInternalServerError(reason=reason) return doi_data - async def set_state(self, doi_payload: dict) -> None: + async def set_state(self, doi_payload: Dict) -> None: """Set DOI and associated metadata. We will only support publish event type, and we expect the data to be prepared for the update. Partial updates are possible. - :param doi_suffix: DOI to do operations on. - :param state: can be publish, register or hide. + :param doi_payload: Dictionary with payload to send to Datacite + :raises: HTTPInternalServerError if we the Datacite DOI update fails + :returns: None """ auth = BasicAuth(login=self.doi_user, password=self.doi_key) async with ClientSession(headers=self.headers, auth=auth) as session: async with session.put(f"{self.doi_api}/{doi_payload['id']}", json=doi_payload) as response: if response.status == 200: - draft_resp = await response.json() - LOG.debug(f"Datacite doi response: {draft_resp}") + _resp = await response.json() + LOG.info(f"Datacite doi {doi_payload['id']} updated ") + LOG.debug(f"Datacite doi {doi_payload['id']} updated, response: {_resp}") else: reason = f"DOI API set state request failed with code: {response.status}" LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) # 400 might not be the correct error for this + raise web.HTTPInternalServerError(reason=reason) + + async def delete(self, doi: str) -> None: + """Delete DOI and associated metadata. + + Datacite only support deleting draft DOIs. + + :param doi: identifier to be utilized for deleting draft DOI + :raises: HTTPInternalServerError if we the Datacite draft DOI delete fails + :returns: None + """ + auth = BasicAuth(login=self.doi_user, password=self.doi_key) + async with ClientSession(headers=self.headers, auth=auth) as session: + async with session.delete(f"{self.doi_api}/{doi}") as response: + if response.status == 204: + LOG.info(f"Datacite doi {doi} deleted.") + else: + reason = f"DOI API delete request failed with code: {response.status}" + LOG.error(reason) + raise web.HTTPInternalServerError(reason=reason) diff --git a/tests/integration/mock_doi_api.py b/tests/integration/mock_doi_api.py index f0befb386..4d8e045b9 100644 --- a/tests/integration/mock_doi_api.py +++ b/tests/integration/mock_doi_api.py @@ -97,7 +97,7 @@ def update_dict(d, u): async def create(req: web.Request) -> web.Response: - """DOI endpoint.""" + """DOI draft creation endpoint.""" try: content = await req.json() except json.decoder.JSONDecodeError as e: @@ -129,7 +129,7 @@ async def create(req: web.Request) -> web.Response: async def update(req: web.Request) -> web.Response: - """DOI endpoint.""" + """DOI update endpoint.""" try: content = await req.json() except json.decoder.JSONDecodeError as e: @@ -150,11 +150,18 @@ async def update(req: web.Request) -> web.Response: return web.json_response(data, status=200) +async def delete(req: web.Request) -> web.Response: + """DOI delete endpoint.""" + + return web.json_response(status=204) + + def init() -> web.Application: """Start server.""" app = web.Application() app.router.add_post("/dois", create) app.router.add_put("/dois/{id:.*}", update) + app.router.add_delete("/dois/{id:.*}", delete) return app diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 5b822e37b..fce7ebf93 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -118,6 +118,7 @@ async def setUpAsync(self): self.doi_handler = { "create_draft.side_effect": self.fake_doi_create_draft, "set_state.side_effect": self.fake_doi_set_state, + "delete.side_effect": self.fake_doi_delete, } RESTAPIHandler._handle_check_ownedby_user = make_mocked_coro(True) @@ -165,6 +166,11 @@ async def fake_doi_create_draft(self, prefix): async def fake_doi_set_state(self, data): """.""" + return {"fullDOI": "10.xxxx/yyyyy", "dataset": "https://doi.org/10.xxxx/yyyyy"} + + async def fake_doi_delete(self, doi): + """.""" + return None async def fake_operator_read_metadata_object(self, schema_type, accession_id): """Fake read operation to return mocked JSON.""" @@ -660,9 +666,10 @@ async def test_query_is_called_and_returns_json_in_correct_format(self): async def test_delete_is_called(self): """Test query method calls operator and returns status correctly.""" url = "/objects/study/EGA123456" - response = await self.client.delete(url) - self.assertEqual(response.status, 204) - self.MockedOperator().delete_metadata_object.assert_called_once() + with patch("metadata_backend.api.handlers.object.DOIHandler.delete", return_value=None): + response = await self.client.delete(url) + self.assertEqual(response.status, 204) + self.MockedOperator().delete_metadata_object.assert_called_once() async def test_query_fails_with_xml_format(self): """Test query method calls operator and returns status correctly.""" From d833057405f946c28b0e7f2dd4bae5818aee8d7c Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 10 Mar 2022 10:10:39 +0200 Subject: [PATCH 303/336] update changelog with changes from #332 --- .github/config/.wordlist.txt | 3 ++- CHANGELOG.md | 13 +++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/.github/config/.wordlist.txt b/.github/config/.wordlist.txt index f207ff7d5..4a0bef904 100644 --- a/.github/config/.wordlist.txt +++ b/.github/config/.wordlist.txt @@ -165,6 +165,7 @@ dockerfile docstrings doi doiinfo +dois dt dzongkha ean @@ -343,8 +344,8 @@ metagenomics metatranscriptome metatranscriptomic metax -metaxservicehandler metaxidentifier +metaxservicehandler methylation methylcytidine mf diff --git a/CHANGELOG.md b/CHANGELOG.md index 636075a41..a47b71b82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,10 +18,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Adds configuration for mypy linting to VScode devcontainer setup - Templates API #256 - use `ujson` as default json library -- Creating draft Datacite DOI for folders #257 +- Creating draft Datacite DOI for folders #257 #332 - created a mock web app, which would act similarly to DataCite REST API - altered `publish_folder` endpoint so that `extraInfo` containing the DOI data is added upon publishing - added `datePublished` key to folders which takes in the date/time, when folder is published +- DOI Publishing and deletion to Datacite #332 #369 + - create draft DOIs for both Study and Datasets and add them to the folder `extraInfo` when published + - delete draft DOIs on object delete + - update DOI info at Datacite when folder is published - VScode Dev environment #287 - Add VS Code development container - Update docker for development @@ -50,6 +54,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - new endpoint `GET /templates` to replace `GET /users/current` `{"templates":[...]}` - new JSON keys `index` and `tags` to `PATCH /templates/schema/templateId`, same values as were previously used in `PATCH /user` which is now removed - WARNING: breaking change that requires fresh database, because "project" is new information that did not exist before, and it can't be migrated to existing user-owned hierarchy +- Multilevel add patch objects to support `/extraInfo/datasetIdentifiers/-` which needs dot notation for mongodb to work e.g. `extraInfo.datasetIdentifiers` #332 ### Changed @@ -63,7 +68,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - README updated with tox command, development build instructions, and prettify Dockerfile. - Update ENA XML and JSON schemas #299 - Github actions changed the use of https://git.io/misspell to rojopolis/spellcheck-github-actions #316 -- Separated most of the handlers to own files inside the handlers folder #319 +- Separated most of the handlers to own files inside the handlers folder #319 +- allow inserting only one study in folder #332 +- JSON schemas #332 + - introduce `keywords` required for Metax in `doiInfo` + - dataset `description` and study `studyAbstract` are now mandatory ### Fixed From c818abf2960cb1d21f8a88744be80b308044a532 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 10 Mar 2022 22:00:43 +0200 Subject: [PATCH 304/336] streamline doi config into a dict --- metadata_backend/conf/conf.py | 15 +++++++++------ metadata_backend/helpers/doi.py | 12 ++++++------ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/metadata_backend/conf/conf.py b/metadata_backend/conf/conf.py index edcd87ac5..ce058bd55 100644 --- a/metadata_backend/conf/conf.py +++ b/metadata_backend/conf/conf.py @@ -153,12 +153,15 @@ def create_db_client() -> AsyncIOMotorClient: # 6) Set the DataCite REST API values -doi_api = os.getenv("DOI_API", "") -doi_prefix = os.getenv("DOI_PREFIX", "") -doi_user = os.getenv("DOI_USER", "") -doi_key = os.getenv("DOI_KEY", "") -datacite_url = os.getenv("DATACITE_URL", "https://doi.org") -publisher = "CSC - IT Center for Science" +doi_config = { + "api": os.getenv("DOI_API", ""), + "prefix": os.getenv("DOI_PREFIX", ""), + "user": os.getenv("DOI_USER", ""), + "key": os.getenv("DOI_KEY", ""), + "url": os.getenv("DATACITE_URL", "https://doi.org"), + "publisher": "CSC - IT Center for Science", + "discovery_url": "https://etsin.fairdata.fi/dataset/", +} metax_config = { "username": os.getenv("METAX_USER", "sd"), diff --git a/metadata_backend/helpers/doi.py b/metadata_backend/helpers/doi.py index 99173ace3..ca6c3389c 100644 --- a/metadata_backend/helpers/doi.py +++ b/metadata_backend/helpers/doi.py @@ -9,7 +9,7 @@ from aiohttp import web, ClientSession, BasicAuth, ClientTimeout from ..helpers.logger import LOG -from ..conf import conf +from ..conf.conf import doi_config class DOIHandler: @@ -17,11 +17,11 @@ class DOIHandler: def __init__(self) -> None: """Get DOI credentials from config.""" - self.doi_api = conf.doi_api - self.doi_prefix = conf.doi_prefix - self.doi_user = conf.doi_user - self.doi_key = conf.doi_key - self.doi_url = f"{conf.datacite_url.rstrip('/')}/{self.doi_prefix}" + self.doi_api = doi_config["api"] + self.doi_prefix = doi_config["prefix"] + self.doi_user = doi_config["user"] + self.doi_key = doi_config["key"] + self.doi_url = f"{doi_config['url'].rstrip('/')}/{self.doi_prefix}" self.timeout = ClientTimeout(total=2 * 60) # 2 minutes timeout self.headers = {"Content-Type": "application/vnd.api+json"} From d882f2ab017672d3e94709f20a93af10a40cb316 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 10 Mar 2022 22:01:12 +0200 Subject: [PATCH 305/336] specify which schema the object was not found in --- metadata_backend/api/operators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 757cc4711..d7a2fe438 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -100,7 +100,7 @@ async def read_metadata_object(self, schema_type: str, accession_id: str) -> Tup try: data_raw = await self.db_service.read(schema_type, accession_id) if not data_raw: - LOG.error(f"Object with {accession_id} not found.") + LOG.error(f"Object with {accession_id} not found in schema: {schema_type}.") raise web.HTTPNotFound() data = await self._format_read_data(schema_type, data_raw) except (ConnectionFailure, OperationFailure) as error: From 96092932867ea4f253fc4ae6ac55dca647f3d80f Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 10 Mar 2022 22:03:17 +0200 Subject: [PATCH 306/336] publish metax ids no iterations We are going to iterate over the objects when we prepare the doi information so no need to iterate multiple times --- metadata_backend/helpers/metax_api_handler.py | 76 +++++++------------ 1 file changed, 29 insertions(+), 47 deletions(-) diff --git a/metadata_backend/helpers/metax_api_handler.py b/metadata_backend/helpers/metax_api_handler.py index 67c2326cd..a1e7a5ead 100644 --- a/metadata_backend/helpers/metax_api_handler.py +++ b/metadata_backend/helpers/metax_api_handler.py @@ -1,11 +1,11 @@ """Class for handling calls to METAX API.""" -from typing import Any, Dict +from typing import Any, Dict, List import aiohttp from aiohttp.web import HTTPBadRequest, HTTPError, HTTPForbidden, HTTPNotFound, Request from ..api.middlewares import get_session -from ..api.operators import FolderOperator, Operator, UserOperator +from ..api.operators import UserOperator from ..conf.conf import metax_config from .logger import LOG @@ -164,56 +164,38 @@ async def delete_draft_dataset(self, metax_id: str) -> None: reason = await resp.text() raise self.process_error(status, reason) - async def publish_dataset(self, folder_id: str) -> None: + async def publish_dataset(self, _metax_ids: List[Dict]) -> None: """Publish draft dataset to Metax service. - Fetch metadataObjects for published folder. Publish each object within Metax service and - update object's Metax status to db. + Iterate over the metax ids that need to be published. - :param folder_id: Folder ID where metadata objects to publish resides + :param _metax_ids: List of metax IDs that include study and datasets """ - folder = await FolderOperator(self.db_client).read_folder(folder_id) - operator = Operator(self.db_client) - for object in folder["metadataObjects"]: - if object["schema"] in {"study", "dataset"}: - data, _ = await operator.read_metadata_object(object["schema"], object["accessionId"]) - if isinstance(data, dict): - metax_id = data["metaxIdentifier"] - doi = data["doi"] - async with aiohttp.ClientSession() as sess: - resp = await sess.post( - f"{self.metax_url}{self.publish_route}", - params={"identifier": metax_id}, - auth=aiohttp.BasicAuth(self.username, self.password), - ) - status = resp.status - if status == 200: - preferred_id = await resp.json() - if doi != preferred_id["preferred_identifier"]: - LOG.warning( - f"Metax Preferred Identifier {preferred_id['preferred_identifier']} " - f"does not match object's DOI {doi}" - ) - LOG.debug( - f"Object {object['schema']} with accession ID {object['accessionId']} is " - "published to Metax service." + for object in _metax_ids: + metax_id = object["metaxIdentifier"] + doi = object["doi"] + async with aiohttp.ClientSession() as sess: + resp = await sess.post( + f"{self.metax_url}{self.publish_route}", + params={"identifier": metax_id}, + auth=aiohttp.BasicAuth(self.username, self.password), + ) + status = resp.status + if status == 200: + preferred_id = await resp.json() + if doi != preferred_id["preferred_identifier"]: + LOG.warning( + f"Metax Preferred Identifier {preferred_id['preferred_identifier']} " + f"does not match object's DOI {doi}" ) - # This must be updated as Metax identifier will be moved to folder from object after publishing - # await operator.update_metadata_object( - # object["schema"], - # object["accessionId"], - # { - # "metaxIdentifier": { - # "identifier": metax_id, - # "status": "published", - # } - # }, - # ) - else: - # TODO: how front end should react on this?? - reason = await resp.text() - raise self.process_error(status, reason) - LOG.info(f"Folder's {folder_id} metadata objects are published to Metax service.") + LOG.debug( + f"Object with metax ID {object['metaxIdentifier']} and DOI {object['doi']} is " + "published to Metax service." + ) + else: + reason = await resp.text() + raise self.process_error(status, reason) + LOG.info(f"Metax ID {object['metaxIdentifier']} was published to Metax service.") async def create_metax_dataset_data_from_study(self, data: Dict) -> Dict: """Construct Metax dataset's research dataset dictionary from Submitters Study. From ea1f911a044e088c5b0445afc0b77c00fabebfcd Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 10 Mar 2022 22:03:52 +0200 Subject: [PATCH 307/336] don't update the folder when identifiers created --- metadata_backend/api/handlers/object.py | 32 ++----------------------- 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 595a80c47..230fb1d69 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -184,7 +184,7 @@ async def post_object(self, req: Request) -> Response: # Create draft dataset to Metax catalog if collection in _allowed_doi: - [await self._create_metax_dataset(req, collection, item, folder_id) for item in objects] + [await self._create_metax_dataset(req, collection, item) for item in objects] body = ujson.dumps(data, escape_forward_slashes=False) @@ -462,7 +462,7 @@ def _prepare_folder_patch_update_object(self, schema: str, data: Dict, filename: ) return [patch_op] - async def _create_metax_dataset(self, req: Request, collection: str, object: Dict, folder_id: str) -> str: + async def _create_metax_dataset(self, req: Request, collection: str, object: Dict) -> str: """Handle connection to Metax api handler for dataset creation. Dataset or Study object is assigned with DOI @@ -484,9 +484,6 @@ async def _create_metax_dataset(self, req: Request, collection: str, object: Dic new_info = {"doi": object["doi"], "metaxIdentifier": metax_id} await operator.create_metax_info(collection, object["accessionId"], new_info) - folder_op = FolderOperator(req.app["db_client"]) - doi_patch = self._prepare_folder_patch_doi(collection, object["doi"], metax_id) - await folder_op.update_folder(folder_id, doi_patch) return metax_id async def _draft_doi(self, schema_type: str) -> str: @@ -504,28 +501,3 @@ async def _draft_doi(self, schema_type: str) -> str: LOG.debug(f"doi created with doi: {_doi_data['fullDOI']}") return _doi_data["fullDOI"] - - def _prepare_folder_patch_doi(self, schema: str, doi: str, url: str) -> List: - """Prepare patch operation for updating object's doi information in a folder. - - :param schema: schema of object to be updated - :param ids: object IDs - :returns: dict with patch operation - """ - patch = [] - - data = { - "identifier": { - "identifierType": "DOI", - "doi": doi, - }, - "url": url, - } - if schema == "study": - patch_op = {"op": "add", "path": "/extraInfo/studyIdentifier", "value": data} - patch.append(patch_op) - elif schema == "dataset": - patch_op = {"op": "add", "path": "/extraInfo/datasetIdentifiers/-", "value": data} - patch.append(patch_op) - - return patch From 6c821856a6707849910f67869d8d74ff38c6718f Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 10 Mar 2022 23:06:09 +0200 Subject: [PATCH 308/336] prepare DOI data for publishing and move to folder Move metax publishing before patching the folder split preparing DOI data for study and dataset to separate functions --- metadata_backend/api/handlers/folder.py | 319 +++++++++++++++++------- tests/test_handlers.py | 12 +- 2 files changed, 240 insertions(+), 91 deletions(-) diff --git a/metadata_backend/api/handlers/folder.py b/metadata_backend/api/handlers/folder.py index 05cc07f90..8d04b8073 100644 --- a/metadata_backend/api/handlers/folder.py +++ b/metadata_backend/api/handlers/folder.py @@ -10,7 +10,7 @@ from aiohttp.web import Request, Response from multidict import CIMultiDict -from ...conf.conf import publisher +from ...conf.conf import doi_config from ...helpers.doi import DOIHandler from ...helpers.logger import LOG from ...helpers.metax_api_handler import MetaxServiceHandler @@ -23,30 +23,151 @@ class FolderAPIHandler(RESTAPIHandler): """API Handler for folders.""" - def _prepare_doi_update(self, folder: Dict) -> Tuple[Dict, List]: - """Prepare dictionary with values for the Datacite DOI update. + def _prepare_published_study(self, study_data: Dict, general_info: Dict) -> Dict: + """Prepare Study object for publishing. - We need to prepare data for Study and Datasets, publish doi for each, - and create links (relatedIdentifiers) between Study and Datasets. - All the required information should be in the folder ``doiInfo``, - as well as ``extraInfo`` which contains the draft DOIs created for the Study - and each Dataset. + :param study_data: Study Object read from the database + :param general_info: General information that is captured in front-end and set in ``doiInfo`` key + :returns: Study Object ready to publish to Datacite + """ - :param folder: Folder data - :returns: Tuple with the Study and list of Datasets. + study = { + "attributes": { + "publisher": doi_config["publisher"], + "publicationYear": date.today().year, + "event": "publish", + "schemaVersion": "https://schema.datacite.org/meta/kernel-4", + "doi": study_data["doi"], + "prefix": study_data["doi"].split("/")[0], + "suffix": study_data["doi"].split("/")[1], + "types": { + "bibtex": "misc", + "citeproc": "collection", + "schemaOrg": "Collection", + "resourceTypeGeneral": "Collection", + }, + "url": f"{doi_config['discovery_url']}{study_data['metaxIdentifier']}", + "identifiers": [ + { + "identifierType": "DOI", + "doi": study_data["doi"], + } + ], + "descriptions": [], + "titles": [], + }, + "id": study_data["doi"], + "type": "dois", + } + + study["attributes"]["titles"].append( + {"lang": None, "title": study_data["descriptor"]["studyTitle"], "titleType": None}, + ) + + study["attributes"]["descriptions"].append( + { + "lang": None, + "description": study_data["descriptor"]["studyAbstract"], + "descriptionType": "Abstract", + } + ) + + if "studyDescription" in study_data: + study["attributes"]["descriptions"].append( + {"lang": None, "description": study_data["studyDescription"], "descriptionType": "Other"} + ) + + study["attributes"].update(general_info) + LOG.debug(f"prepared study info: {study}") + + return study + + def _prepare_published_dataset(self, study_doi: str, dataset_data: Dict, general_info: Dict) -> Dict: + """Prepare Dataset object for publishing. + + :param study_doi: Study DOI to link dataset to study at Datacite + :param dataset_data: Dataset Object read from the database + :param general_info: General information that is captured in front-end and set in `doiInfo` key + :returns: Dataset Object ready to publish to Datacite """ - _general_info = { + dataset = { "attributes": { - "publisher": publisher, + "publisher": doi_config["publisher"], "publicationYear": date.today().year, "event": "publish", "schemaVersion": "https://schema.datacite.org/meta/kernel-4", + "doi": dataset_data["doi"], + "prefix": dataset_data["doi"].split("/")[0], + "suffix": dataset_data["doi"].split("/")[1], + "types": { + "ris": "DATA", + "bibtex": "misc", + "citeproc": "dataset", + "schemaOrg": "Dataset", + "resourceTypeGeneral": "Dataset", + }, + "url": f"{doi_config['discovery_url']}{dataset_data['metaxIdentifier']}", + "identifiers": [ + { + "identifierType": "DOI", + "doi": dataset_data["doi"], + } + ], + "descriptions": [], + "titles": [], }, + "id": dataset_data["doi"], + "type": "dois", } + dataset["attributes"]["titles"].append( + {"lang": None, "title": dataset_data["title"], "titleType": None}, + ) + + dataset["attributes"]["descriptions"].append( + { + "lang": None, + "description": dataset_data["description"], + "descriptionType": "Other", + } + ) + + # A Dataset is described by a Study + if "relatedIdentifiers" not in dataset["attributes"]: + dataset["attributes"]["relatedIdentifiers"] = [] + + dataset["attributes"]["relatedIdentifiers"].append( + { + "relationType": "IsDescribedBy", + "relatedIdentifier": study_doi, + "resourceTypeGeneral": "Collection", + "relatedIdentifierType": "DOI", + } + ) + + dataset["attributes"].update(general_info) + LOG.debug(f"prepared dataset info: {dataset}") + + return dataset + + async def _prepare_doi_update(self, obj_op: Operator, folder: Dict) -> Tuple[Dict, List, List]: + """Prepare dictionary with values for the Datacite DOI update. + + We need to prepare data for Study and Datasets, publish doi for each, + and create links (relatedIdentifiers) between Study and Datasets. + All the required information should be in the folder ``doiInfo``, + as well as ``extraInfo`` which contains the draft DOIs created for the Study + and each Dataset. + + :param obj_op: Operator for reading objects from database. + :param folder: Folder data + :returns: Tuple with the Study and list of Datasets and list of identifiers for publishing to Metax + """ + + metax_ids = [] study = {} - datasets = [] + datasets: List = [] # we need to re-format these for Datacite, as in the JSON schemas # we split the words so that front-end will display them nicely @@ -66,91 +187,80 @@ def _prepare_doi_update(self, folder: Dict) -> Tuple[Dict, List]: if "fundingReferences" in _info: for d in _info["fundingReferences"]: d.update((k, "".join(v.split())) for k, v in d.items() if k == "funderIdentifierType") - # need to add titles and descriptions for datasets and study + try: # keywords are only required for Metax integration # thus we remove them _info.pop("keywords", None) - _general_info["attributes"].update(_info) - - _study = folder["extraInfo"]["studyIdentifier"] - _study_doi = _study["identifier"]["doi"] - study = { - "attributes": { - "doi": _study_doi, - "prefix": _study_doi.split("/")[0], - "suffix": _study_doi.split("/")[1], - "types": { - "bibtex": "misc", - "citeproc": "collection", - "schemaOrg": "Collection", - "resourceTypeGeneral": "Collection", - }, - "url": _study["url"], - "identifiers": [_study["identifier"]], - }, - "id": _study_doi, - "type": "dois", - } - study.update(_general_info) - - _datasets = folder["extraInfo"]["datasetIdentifiers"] - for ds in _datasets: - _doi = ds["identifier"]["doi"] - _tmp = { - "attributes": { - "doi": _doi, - "prefix": _doi.split("/")[0], - "suffix": _doi.split("/")[1], - "types": { - "ris": "DATA", - "bibtex": "misc", - "citeproc": "dataset", - "schemaOrg": "Dataset", - "resourceTypeGeneral": "Dataset", - }, - "url": ds["url"], - "identifiers": [ds["identifier"]], - }, - "id": _doi, - "type": "dois", - } - _tmp.update(_general_info) + _study_doi = "" - # A Dataset is described by a Study - if "relatedIdentifiers" not in _tmp["attributes"]: - _tmp["attributes"]["relatedIdentifiers"] = [] + for _obj in folder["metadataObjects"]: - _tmp["attributes"]["relatedIdentifiers"].append( - { - "relationType": "IsDescribedBy", - "relatedIdentifier": _study_doi, - "resourceTypeGeneral": "Collection", - "relatedIdentifierType": "DOI", - } - ) + if _obj["schema"] == "study": - datasets.append(_tmp) + # we need the study for the title, abstract and description + study_data, _ = await obj_op.read_metadata_object("study", _obj["accessionId"]) - # A Study describes a Dataset - if "relatedIdentifiers" not in study["attributes"]: - study["attributes"]["relatedIdentifiers"] = [] + if isinstance(study_data, dict): - study["attributes"]["relatedIdentifiers"].append( - { - "relationType": "Describes", - "relatedIdentifier": _doi, - "resourceTypeGeneral": "Dataset", - "relatedIdentifierType": "DOI", - } - ) + study = self._prepare_published_study(study_data, _info) + + _study_doi = study_data["doi"] + + metax_ids.append({"doi": study_data["doi"], "metaxIdentifier": study_data["metaxIdentifier"]}) + + # there are cases where datasets are added first + if len(datasets) > 0: + LOG.info(datasets) + for ds in datasets: + if "relatedIdentifiers" not in study["attributes"]: + study["attributes"]["relatedIdentifiers"] = [] + + study["attributes"]["relatedIdentifiers"].append( + { + "relationType": "Describes", + "relatedIdentifier": ds["attributes"]["doi"], + "resourceTypeGeneral": "Dataset", + "relatedIdentifierType": "DOI", + } + ) + + elif _obj["schema"] == "dataset": + + # we need the dataset title and description + ds_data, _ = await obj_op.read_metadata_object("dataset", _obj["accessionId"]) + + if isinstance(ds_data, dict): + dataset = self._prepare_published_dataset(_study_doi, ds_data, _info) + + datasets.append(dataset) + metax_ids.append({"doi": ds_data["doi"], "metaxIdentifier": ds_data["metaxIdentifier"]}) + + # A Study describes a Dataset + # there are cases where datasets are added first + if "attributes" in study: + if "relatedIdentifiers" not in study["attributes"]: + study["attributes"]["relatedIdentifiers"] = [] + + study["attributes"]["relatedIdentifiers"].append( + { + "relationType": "Describes", + "relatedIdentifier": ds_data["doi"], + "resourceTypeGeneral": "Dataset", + "relatedIdentifierType": "DOI", + } + ) + else: + pass + # we catch all errors, if we missed even a key, that means some information is not + # properly recorded except Exception as e: reason = f"Could not construct DOI data, reason: {e}" LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) + raise web.HTTPInternalServerError(reason=reason) - return (study, datasets) + return (study, datasets, metax_ids) def _check_patch_folder(self, patch_ops: Any) -> None: """Check patch operations in request are valid. @@ -158,6 +268,7 @@ def _check_patch_folder(self, patch_ops: Any) -> None: We check that ``metadataObjects`` and ``drafts`` have ``_required_values``. For tags we check that the ``submissionType`` takes either ``CSV``, ``XML`` or ``Form`` as values. + :param patch_ops: JSON patch request :raises: HTTPBadRequest if request does not fullfil one of requirements :raises: HTTPUnauthorized if request tries to do anything else than add or replace @@ -402,33 +513,61 @@ async def publish_folder(self, req: Request) -> Response: folder = await operator.read_folder(folder_id) # we first try to publish the DOI before actually publishing the folder - study, datasets = self._prepare_doi_update(folder) + obj_ops = Operator(db_client) + study, datasets, metax_ids = await self._prepare_doi_update(obj_ops, folder) doi_ops = DOIHandler() + datasets_patch = [] + await doi_ops.set_state(study) + for ds in datasets: await doi_ops.set_state(ds) - - obj_ops = Operator(db_client) + patch_ds = { + "op": "add", + "path": "/extraInfo/datasetIdentifiers/-", + "value": { + "identifier": { + "identifierType": "DOI", + "doi": ds["id"], + }, + "url": ds["attributes"]["url"], + "types": ds["attributes"]["types"], + }, + } + datasets_patch.append(patch_ds) # Create draft DOI and delete draft objects from the folder for obj in folder["drafts"]: await obj_ops.delete_metadata_object(obj["schema"], obj["accessionId"]) + await MetaxServiceHandler(req).publish_dataset(metax_ids) + # Patch the folder into a published state patch = [ {"op": "replace", "path": "/published", "value": True}, {"op": "replace", "path": "/drafts", "value": []}, {"op": "add", "path": "/datePublished", "value": int(datetime.now().timestamp())}, - {"op": "add", "path": "/extraInfo/publisher", "value": publisher}, + {"op": "add", "path": "/extraInfo/publisher", "value": doi_config["publisher"]}, {"op": "add", "path": "/extraInfo/publicationYear", "value": date.today().year}, + { + "op": "add", + "path": "/extraInfo/studyIdentifier", + "value": { + "identifier": { + "identifierType": "DOI", + "doi": study["id"], + }, + "url": study["attributes"]["url"], + "types": study["attributes"]["types"], + }, + }, ] + patch.extend(datasets_patch) new_folder = await operator.update_folder(folder_id, patch) - await MetaxServiceHandler(req).publish_dataset(new_folder) - body = ujson.dumps({"folderId": new_folder}, escape_forward_slashes=False) LOG.info(f"Patching folder with ID {new_folder} was successful.") return web.Response(body=body, status=200, content_type="application/json") diff --git a/tests/test_handlers.py b/tests/test_handlers.py index fce7ebf93..7bd2573fc 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -914,7 +914,17 @@ async def test_folder_is_published(self): self.MockedDoiHandler().set_state.return_value = None self.MockedFolderOperator().update_folder.return_value = self.folder_id self.MockedMetaxHandler().publish_dataset.return_value = None - with patch(self._mock_prepare_doi, return_value=({}, [{}])): + with patch( + self._mock_prepare_doi, + return_value=( + {"id": "prefix/suffix-study", "attributes": {"url": "http://metax_id", "types": {}}}, + [{"id": "prefix/suffix-dataset", "attributes": {"url": "http://metax_id", "types": {}}}], + [ + {"doi": "prefix/suffix-study", "metaxIdentifier": "metax_id"}, + {"doi": "prefix/suffix-dataset", "metaxIdentifier": "metax_id"}, + ], + ), + ): response = await self.client.patch("/publish/FOL12345678") self.assertEqual(response.status, 200) json_resp = await response.json() From 9bd46729b928ab4cec785d10f683bb3bd5fb4475 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 10 Mar 2022 23:06:15 +0200 Subject: [PATCH 309/336] some metax functions don't need to be async --- metadata_backend/helpers/metax_api_handler.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/metadata_backend/helpers/metax_api_handler.py b/metadata_backend/helpers/metax_api_handler.py index a1e7a5ead..4c70b55c3 100644 --- a/metadata_backend/helpers/metax_api_handler.py +++ b/metadata_backend/helpers/metax_api_handler.py @@ -80,9 +80,9 @@ async def post_dataset_as_draft(self, collection: str, data: Dict) -> str: metax_dataset = self.minimal_dataset_template metax_dataset["metadata_provider_user"] = await self.get_metadata_provider_user() if collection == "dataset": - dataset_data = await self.create_metax_dataset_data_from_dataset(data) + dataset_data = self.create_metax_dataset_data_from_dataset(data) else: - dataset_data = await self.create_metax_dataset_data_from_study(data) + dataset_data = self.create_metax_dataset_data_from_study(data) metax_dataset["research_dataset"] = dataset_data LOG.debug( f"Creating draft dataset to Metax service from Submitter {collection} with accession ID " @@ -123,9 +123,9 @@ async def update_draft_dataset(self, collection: str, data: Dict) -> str: # TODO: should this be changed if person updating data is different from data creator? metax_dataset["metadata_provider_user"] = await self.get_metadata_provider_user() if collection == "dataset": - dataset_data = await self.create_metax_dataset_data_from_dataset(data) + dataset_data = self.create_metax_dataset_data_from_dataset(data) else: - dataset_data = await self.create_metax_dataset_data_from_study(data) + dataset_data = self.create_metax_dataset_data_from_study(data) metax_dataset["research_dataset"] = dataset_data LOG.info(f"Sending updated {collection} object data to Metax service.") @@ -197,7 +197,7 @@ async def publish_dataset(self, _metax_ids: List[Dict]) -> None: raise self.process_error(status, reason) LOG.info(f"Metax ID {object['metaxIdentifier']} was published to Metax service.") - async def create_metax_dataset_data_from_study(self, data: Dict) -> Dict: + def create_metax_dataset_data_from_study(self, data: Dict) -> Dict: """Construct Metax dataset's research dataset dictionary from Submitters Study. :param data: Study data @@ -211,7 +211,7 @@ async def create_metax_dataset_data_from_study(self, data: Dict) -> Dict: LOG.debug(f"Created Metax dataset from Study with data: {research_dataset}") return research_dataset - async def create_metax_dataset_data_from_dataset(self, data: Dict) -> Dict: + def create_metax_dataset_data_from_dataset(self, data: Dict) -> Dict: """Construct Metax dataset's research dataset dictionary from Submitters Dataset. :param data: Dataset data From 3d27fda9c9735fa7f813049ff8e0b5fa10847a2f Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 11 Mar 2022 11:36:36 +0200 Subject: [PATCH 310/336] fix typo for set state doi raises docs --- metadata_backend/helpers/doi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata_backend/helpers/doi.py b/metadata_backend/helpers/doi.py index ca6c3389c..e99c6972a 100644 --- a/metadata_backend/helpers/doi.py +++ b/metadata_backend/helpers/doi.py @@ -64,7 +64,7 @@ async def set_state(self, doi_payload: Dict) -> None: Partial updates are possible. :param doi_payload: Dictionary with payload to send to Datacite - :raises: HTTPInternalServerError if we the Datacite DOI update fails + :raises: HTTPInternalServerError if the Datacite DOI update fails :returns: None """ auth = BasicAuth(login=self.doi_user, password=self.doi_key) From d8a7bf1a570b9827ce51878baabe2f25093e9856 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 16 Mar 2022 07:04:37 +0000 Subject: [PATCH 311/336] Add bulk patching to Metax mock api Adds bulk patching to Metax mock api for bulk patching of datasets which is used to update metadata of datasets for publishing. Also adds validating of datasets on dataset publishing. --- tests/integration/mock_metax_api.py | 111 +++++++++++++++++++++++----- 1 file changed, 92 insertions(+), 19 deletions(-) diff --git a/tests/integration/mock_metax_api.py b/tests/integration/mock_metax_api.py index bb0624eae..b1c75f567 100644 --- a/tests/integration/mock_metax_api.py +++ b/tests/integration/mock_metax_api.py @@ -4,6 +4,7 @@ import logging import os from datetime import datetime +from typing import Dict from uuid import uuid4 import ujson @@ -74,7 +75,19 @@ async def post_dataset(req: web.Request) -> web.Response: :return: HTTP response with mocked Metax dataset data """ LOG.info("Creating Metax dataset") - content = await validate_payload(req) + try: + content = await req.json() + except json.decoder.JSONDecodeError as e: + reason = f"JSON is not correctly formatted. See: {e}" + LOG.error(f"Error while validating payload: {reason}") + raise web.HTTPBadRequest( + reason={ + "detail": reason, + "error_identifier": datetime.now(), + } + ) + validate_data(content) + metax_id = str(uuid4()) metax_additions = { "identifier": metax_id, @@ -116,7 +129,18 @@ async def update_dataset(req: web.Request) -> web.Response: LOG.error(f"No dataset found with identifier {metax_id}") raise web.HTTPNotFound(reason={"detail": "Not found."}) - content = await validate_payload(req) + try: + content = await req.json() + except json.decoder.JSONDecodeError as e: + reason = f"JSON is not correctly formatted. See: {e}" + LOG.error(f"Error while validating payload: {reason}") + raise web.HTTPBadRequest( + reason={ + "detail": reason, + "error_identifier": datetime.now(), + } + ) + validate_data(content) for key, value in content.items(): drafts[metax_id][key] = value @@ -131,6 +155,66 @@ async def update_dataset(req: web.Request) -> web.Response: ) +async def patch_datasets(req: web.Request) -> web.Response: + """Mock endpoint for patching bulk Metax datasets. + + :params req: HTTP request with data for Metax datasets + :return: HTTP response with IDs of patched Metax datasets and possible errors + """ + LOG.info("Patching Metax datasets") + + success = [] + failed = [] + try: + content = await req.json() + except json.decoder.JSONDecodeError as e: + reason = f"JSON is not correctly formatted. See: {e}" + LOG.error(f"Error while validating payload: {reason}") + raise web.HTTPBadRequest( + reason={ + "detail": reason, + "error_identifier": datetime.now(), + } + ) + for dataset in content: + try: + metax_id = dataset["identifier"] + _ = dataset["research_dataset"]["preferred_identifier"] + except KeyError: + raise web.HTTPBadRequest( + reason={ + "detail": "Dataset is missing required identifiers", + "error_identifier": datetime.now(), + } + ) + if metax_id not in drafts.keys(): + reason = f"No dataset found with identifier {metax_id}" + LOG.error(reason) + failed.append( + { + "object": { + "detail": reason, + "error_identifier": datetime.now(), + } + } + ) + continue + + for key, value in dataset.items(): + drafts[metax_id][key] = value + + drafts[metax_id]["date_modified"] = str(datetime.now()) + success.append({"object": drafts[metax_id]}) + + LOG.info("Metax datasets patched") + body = {"success": success, "failed": failed} + return web.Response( + body=ujson.dumps(body, escape_forward_slashes=False), + status=200, + content_type="application/json", + ) + + async def publish_dataset(req: web.Request) -> web.Response: """Mock endpoint for publishing Metax dataset. @@ -156,6 +240,7 @@ async def publish_dataset(req: web.Request) -> web.Response: raise web.HTTPNotFound(reason={"detail": "Not found."}) data = drafts[metax_id] + validate_data(data, draft=True) published[metax_id] = data del drafts[metax_id] published[metax_id]["state"] = "published" @@ -193,42 +278,29 @@ async def delete_dataset(req: web.Request) -> web.Response: return web.HTTPNoContent() -async def validate_payload(req: web.Request, draft=True) -> dict: +def validate_data(data: Dict, draft=True) -> None: """Check for required fields in dataset. - :param req: HTTP Request with data for dataset creation + :param data: Metax data to be validated :param draft: Indicator if dataset needs to be validated as draft or not; default true """ LOG.info("Validating payload") - try: - content = await req.json() - except json.decoder.JSONDecodeError as e: - reason = f"JSON is not correctly formatted. See: {e}" - LOG.error(f"Error while validating payload: {reason}") - raise web.HTTPBadRequest( - reason={ - "detail": reason, - "error_identifier": datetime.now(), - } - ) required = ["data_catalog", "metadata_provider_org", "metadata_provider_user", "research_dataset"] rd_required = ["title", "description", "preferred_identifier", "access_rights", "publisher"] if not draft: rd_required = rd_required + ["creator"] - - if not all(key in content.keys() for key in required): + if not all(key in data.keys() for key in required): reason = {"detail": [f"Dataset did not include all required fields: {', '.join(required)}."]} reason = json.dumps(reason) LOG.error(f"Error while validating payload: {reason}") raise web.HTTPBadRequest(reason=reason, content_type="application/json") - if not all(key in content["research_dataset"].keys() for key in rd_required): + if not all(key in data["research_dataset"].keys() for key in rd_required): reason = {"detail": [f"Research dataset did not include all required fields: {', '.join(rd_required)}."]} reason = json.dumps(reason) LOG.error(f"Error while validating payload: {reason}") raise web.HTTPBadRequest(reason=reason, content_type="application/json") - return content def init() -> web.Application: @@ -240,6 +312,7 @@ def init() -> web.Application: web.delete("/rest/v2/datasets/{metax_id}", delete_dataset), web.post("/rpc/v2/datasets/publish_dataset", publish_dataset), web.get("/rest/v2/datasets/{metax_id}", get_dataset), + web.patch("/rest/v2/datasets", patch_datasets), ] app.router.add_routes(api_routes) LOG.info("Metax mock API started") From 11f702611d4fc56bff2ef4d782fbde69a7b72c34 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 16 Mar 2022 07:08:14 +0000 Subject: [PATCH 312/336] Add required fields from doi info There is required and additional information coming from doi info which needs to be updated to object metax metadata before it's published. Creator is the only missing required field from Metax dataseset data and it's now mapped to it. More field mapping is going to be implemented in separate PR. --- metadata_backend/api/handlers/folder.py | 5 +- metadata_backend/helpers/metax_api_handler.py | 79 +++++++++++++++++++ tests/test_handlers.py | 2 + 3 files changed, 85 insertions(+), 1 deletion(-) diff --git a/metadata_backend/api/handlers/folder.py b/metadata_backend/api/handlers/folder.py index 8d04b8073..5dd2838bc 100644 --- a/metadata_backend/api/handlers/folder.py +++ b/metadata_backend/api/handlers/folder.py @@ -543,7 +543,10 @@ async def publish_folder(self, req: Request) -> Response: for obj in folder["drafts"]: await obj_ops.delete_metadata_object(obj["schema"], obj["accessionId"]) - await MetaxServiceHandler(req).publish_dataset(metax_ids) + # update study to metax with data comming from doi info + metax_handler = MetaxServiceHandler(req) + await metax_handler.update_dataset_with_doi_info(folder["doiInfo"], metax_ids) + await metax_handler.publish_dataset(metax_ids) # Patch the folder into a published state patch = [ diff --git a/metadata_backend/helpers/metax_api_handler.py b/metadata_backend/helpers/metax_api_handler.py index 4c70b55c3..6769f8f65 100644 --- a/metadata_backend/helpers/metax_api_handler.py +++ b/metadata_backend/helpers/metax_api_handler.py @@ -164,6 +164,49 @@ async def delete_draft_dataset(self, metax_id: str) -> None: reason = await resp.text() raise self.process_error(status, reason) + async def update_dataset_with_doi_info(self, doi_info: Dict, metax_ids: List) -> None: + """Update dataset for publishing. + + :param doi_info: Dict containing info to complete metax dataset metadata + :param metax_id: Metax id of dataset to be updated + """ + LOG.info("Updating object metax metadata with doi info") + bulk_data = [] + for id in metax_ids: + async with aiohttp.ClientSession() as sess: + resp = await sess.get( + f"{self.metax_url}{self.rest_route}/{id['metaxIdentifier']}", + auth=aiohttp.BasicAuth(self.username, self.password), + ) + status = resp.status + if status == 200: + metax_data = await resp.json() + else: + reason = await resp.text() + raise self.process_error(status, reason) + + # Map fields from doi info to Metax schema + + # creator is required field + metax_data["research_dataset"]["creator"] = self.map_creators(doi_info["creators"]) + bulk_data.append( + {"identifier": id["metaxIdentifier"], "research_dataset": metax_data["research_dataset"]} + ) + + # for id in metax_ids: + async with aiohttp.ClientSession() as sess: + resp = await sess.patch( + f"{self.metax_url}{self.rest_route}", + json=bulk_data, + auth=aiohttp.BasicAuth(self.username, self.password), + ) + if resp.status == 200: + LOG.info("Objects metadata are updated to Metax for publishing") + return await resp.json() + else: + reason = await resp.text() + raise self.process_error(status, reason) + async def publish_dataset(self, _metax_ids: List[Dict]) -> None: """Publish draft dataset to Metax service. @@ -224,6 +267,42 @@ def create_metax_dataset_data_from_dataset(self, data: Dict) -> Dict: LOG.debug(f"Created Metax dataset from Dataset with data: {research_dataset}") return research_dataset + def map_creators(self, creators: List) -> List: + """Map creators. + + :param submitter_data: Data comming from metadata submitter + :returns: Constructed creator data for Metax + """ + + metax_creators = [] + for creator in creators: + metax_creator: Dict[str, Any] = { + "name": "", + "@type": "Person", + "member_of": {"name": {"en": ""}, "@type": "Organization"}, + "identifier": "", + } + metax_creator["name"] = creator["name"] + metax_creator["@type"] = "Person" + # Metax schema accepts only one affiliation per creator + # so we take first one + if creator.get("affiliation", None): + affiliation = creator["affiliation"][0] + metax_creator["member_of"]["name"]["en"] = affiliation["name"] + metax_creator["member_of"]["@type"] = "Organization" + if affiliation.get("affiliationIdentifier"): + metax_creator["member_of"]["identifier"] = affiliation["affiliationIdentifier"] + else: + metax_creator.pop("member_of") + # Metax schema accepts only one identifier per creator + # so we take first one + if creator.get("nameIdentifiers", None) and creator["nameIdentifiers"][0].get("nameIdentifier", None): + metax_creator["identifier"] = creator["nameIdentifiers"][0]["nameIdentifier"] + else: + metax_creator.pop("identifier") + metax_creators.append(metax_creator) + return metax_creators + # we dont know exactly what is comming from Metax so we try it all def process_error(self, status: int, resp_json: str) -> HTTPError: """Construct Metax dataset's research dataset dictionary from Submitters Dataset. diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 7bd2573fc..077794d65 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -69,6 +69,7 @@ async def setUpAsync(self): {"accessionId": "EGA123456", "schema": "sample"}, ], "drafts": [], + "doiInfo": {"creators": [{"name": "Creator, Test"}]}, } self.user_id = "USR12345678" self.test_user = { @@ -913,6 +914,7 @@ async def test_folder_is_published(self): """Test that folder would be published and DOI would be added.""" self.MockedDoiHandler().set_state.return_value = None self.MockedFolderOperator().update_folder.return_value = self.folder_id + self.MockedMetaxHandler().update_dataset_with_doi_info.return_value = None self.MockedMetaxHandler().publish_dataset.return_value = None with patch( self._mock_prepare_doi, From 4005a50cfa4d8ac9e691d6b2adf599f126282dc2 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 16 Mar 2022 07:45:44 +0000 Subject: [PATCH 313/336] Update integration tests --- tests/integration/run_tests.py | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index dbe4db853..c19e0d782 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -897,7 +897,7 @@ async def test_metax_crud_with_xml(sess, folder_id): assert False, "Metax ID was not in response data" object.append(metax_id) async with sess.get(f"{metax_url}/{metax_id}") as metax_resp: - assert metax_resp.status == 200, f"HTTP Status code error, got {resp.status}" + assert metax_resp.status == 200, f"HTTP Status code error, got {metax_resp.status}" metax_res = await metax_resp.json() assert ( res.get("doi", None) == metax_res["research_dataset"]["preferred_identifier"] @@ -910,7 +910,7 @@ async def test_metax_crud_with_xml(sess, folder_id): for _, _, metax_id in ids: async with sess.get(f"{metax_url}/{metax_id}") as metax_resp: - assert metax_resp.status == 200, f"HTTP Status code error, got {resp.status}" + assert metax_resp.status == 200, f"HTTP Status code error, got {metax_resp.status}" metax_res = await metax_resp.json() assert ( metax_res.get("date_modified", None) is not None @@ -922,7 +922,7 @@ async def test_metax_crud_with_xml(sess, folder_id): for _, _, metax_id in ids: async with sess.get(f"{metax_url}/{metax_id}") as metax_resp: - assert metax_resp.status == 404, f"HTTP Status code error - expected 404 Not Found, got {resp.status}" + assert metax_resp.status == 404, f"HTTP Status code error - expected 404 Not Found, got {metax_resp.status}" async def test_metax_crud_with_json(sess, folder_id): @@ -952,7 +952,7 @@ async def test_metax_crud_with_json(sess, folder_id): assert False, "Metax ID was not in response data" object.append(metax_id) async with sess.get(f"{metax_url}/{metax_id}") as metax_resp: - assert metax_resp.status == 200, f"HTTP Status code error, got {resp.status}" + assert metax_resp.status == 200, f"HTTP Status code error, got {metax_resp.status}" metax_res = await metax_resp.json() assert ( res.get("doi", None) == metax_res["research_dataset"]["preferred_identifier"] @@ -1017,19 +1017,16 @@ async def test_metax_publish_dataset(sess, folder_id): await publish_folder(sess, folder_id) - # TODO: This must be updated as Metax identifier will be moved to folder from object after publishing - # for schema, object_id, metax_id in objects: - # async with sess.get(f"{objects_url}/{schema}/{object_id}") as resp: - # assert resp.status == 200, f"HTTP Status code error, got {resp.status}" - # res = await resp.json() - # actual = res["metaxIdentifier"] - # expected = {"identifier": metax_id, "status": "published"} - # assert expected == actual - - # async with sess.get(f"{metax_url}/{metax_id}") as metax_resp: - # assert metax_resp.status == 200, f"HTTP Status code error, got {resp.status}" - # metax_res = await metax_resp.json() - # assert metax_res["state"] == "published" + for schema, object_id, metax_id in objects: + async with sess.get(f"{objects_url}/{schema}/{object_id}") as resp: + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + res = await resp.json() + assert res["metaxIdentifier"] == metax_id + + async with sess.get(f"{metax_url}/{metax_id}") as metax_resp: + assert metax_resp.status == 200, f"HTTP Status code error, got {metax_resp.status}" + metax_res = await metax_resp.json() + assert metax_res["state"] == "published" async def test_crud_folders_works(sess): From 51a21998ca4b563c22531809b2877a8eb5c4f1b8 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Mon, 14 Mar 2022 08:14:13 +0000 Subject: [PATCH 314/336] Add metax patching on object creation Creating metax dataset as draft forces preferred identifier overwrite with metax generated temporary id. We want to update this with submitter created DOI. --- metadata_backend/helpers/metax_api_handler.py | 23 ++++++++- tests/integration/mock_metax_api.py | 47 ++++++++++++++++++- 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/metadata_backend/helpers/metax_api_handler.py b/metadata_backend/helpers/metax_api_handler.py index 6769f8f65..2d35f7b92 100644 --- a/metadata_backend/helpers/metax_api_handler.py +++ b/metadata_backend/helpers/metax_api_handler.py @@ -102,12 +102,33 @@ async def post_dataset_as_draft(self, collection: str, data: Dict) -> str: f"Created Metax draft dataset {metax_data['identifier']} from Submitter {collection} " f"{data['accessionId']} with data: {metax_dataset}." ) - return metax_data["identifier"] + metax_id = metax_data["identifier"] else: # TODO: how front end should react on this?? reason = await resp.text() raise self.process_error(status, reason) + # Metax service overwrites preferred id (DOI) with temporary id for draft datasets + # Patching dataset with full research_dataset data updates preferred id to the real one + async with aiohttp.ClientSession() as sess: + resp = await sess.patch( + f"{self.metax_url}{self.rest_route}/{metax_id}", + json={"research_dataset": dataset_data}, + auth=aiohttp.BasicAuth(self.username, self.password), + ) + status = resp.status + if status == 200: + metax_data = await resp.json() + LOG.debug( + f"Updated Metax draft dataset {metax_data['identifier']} with permanent preferred " + "identifier." + ) + return metax_id + else: + # TODO: how front end should react on this?? + reason = await resp.text() + raise self.process_error(status, reason) + async def update_draft_dataset(self, collection: str, data: Dict) -> str: """Update draft dataset to Metax. diff --git a/tests/integration/mock_metax_api.py b/tests/integration/mock_metax_api.py index b1c75f567..c37cd8295 100644 --- a/tests/integration/mock_metax_api.py +++ b/tests/integration/mock_metax_api.py @@ -69,7 +69,7 @@ async def get_dataset(req: web.Request) -> web.Response: async def post_dataset(req: web.Request) -> web.Response: - """Mock endpoint for creating Metax dataset. + """Mock endpoint for creating draft Metax dataset. :params req: HTTP request with data for Metax dataset :return: HTTP response with mocked Metax dataset data @@ -88,6 +88,7 @@ async def post_dataset(req: web.Request) -> web.Response: ) validate_data(content) + content["research_dataset"]["preferred_identifier"] = f"draft:{str(uuid4())}" metax_id = str(uuid4()) metax_additions = { "identifier": metax_id, @@ -215,6 +216,49 @@ async def patch_datasets(req: web.Request) -> web.Response: ) +async def patch_dataset(req: web.Request) -> web.Response: + """Mock endpoint for patching Metax dataset. + + :params req: HTTP request with data for Metax dataset + :return: HTTP response with mocked Metax dataset data + """ + LOG.info("Patching Metax dataset") + metax_id = req.match_info["metax_id"] + if not metax_id: + raise web.HTTPBadRequest( + reason={ + "detail": ["Query params missing Metax ID."], + "error_identifier": datetime.now(), + } + ) + if metax_id not in drafts.keys(): + LOG.error(f"No dataset found with identifier {metax_id}") + raise web.HTTPNotFound(reason={"detail": "Not found."}) + + try: + content = await req.json() + except json.decoder.JSONDecodeError as e: + reason = f"JSON is not correctly formatted. See: {e}" + LOG.error(f"Error while validating payload: {reason}") + raise web.HTTPBadRequest( + reason={ + "detail": reason, + "error_identifier": datetime.now(), + } + ) + for key, value in content.items(): + drafts[metax_id][key] = value + + drafts[metax_id]["date_modified"] = str(datetime.now()) + + LOG.info(f'Updated Metax dataset with identifier {drafts[metax_id]["identifier"]}') + return web.Response( + body=ujson.dumps(drafts[metax_id], escape_forward_slashes=False), + status=200, + content_type="application/json", + ) + + async def publish_dataset(req: web.Request) -> web.Response: """Mock endpoint for publishing Metax dataset. @@ -313,6 +357,7 @@ def init() -> web.Application: web.post("/rpc/v2/datasets/publish_dataset", publish_dataset), web.get("/rest/v2/datasets/{metax_id}", get_dataset), web.patch("/rest/v2/datasets", patch_datasets), + web.patch("/rest/v2/datasets/{metax_id}", patch_dataset), ] app.router.add_routes(api_routes) LOG.info("Metax mock API started") From f8a74bdbd9c9270992cd9e705e5dd93c94cd2a9e Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 16 Mar 2022 10:19:13 +0000 Subject: [PATCH 315/336] Fix integration tests Because metax datasets are updated after creation there is also modified field in metax dataset now. Also adds authentication to integration tests so they can be run against metax test environment. --- tests/integration/run_tests.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index c19e0d782..560813d99 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -62,6 +62,7 @@ submit_url = f"{base_url}/submit" publish_url = f"{base_url}/publish" metax_url = f"{os.getenv('METAX_URL', 'http://localhost:8002')}/rest/v2/datasets" +auth = aiohttp.BasicAuth(os.getenv("METAX_USER", "sd"), os.getenv("METAX_PASS", "test")) # to form direct contact to db with create_folder() DATABASE = os.getenv("MONGO_DATABASE", "default") AUTHDB = os.getenv("MONGO_AUTHDB", "admin") @@ -896,20 +897,19 @@ async def test_metax_crud_with_xml(sess, folder_id): except KeyError: assert False, "Metax ID was not in response data" object.append(metax_id) - async with sess.get(f"{metax_url}/{metax_id}") as metax_resp: + async with sess.get(f"{metax_url}/{metax_id}", auth=auth) as metax_resp: assert metax_resp.status == 200, f"HTTP Status code error, got {metax_resp.status}" metax_res = await metax_resp.json() assert ( res.get("doi", None) == metax_res["research_dataset"]["preferred_identifier"] ), "Object's DOI was not in Metax response data preferred_identifier" - assert metax_res.get("date_modified", None) is None # PUT and PATCH to object endpoint updates draft dataset in Metax for Study and Dataset for schema, accession_id, filename in xml_files: await put_object_xml(sess, schema, accession_id, filename) for _, _, metax_id in ids: - async with sess.get(f"{metax_url}/{metax_id}") as metax_resp: + async with sess.get(f"{metax_url}/{metax_id}", auth=auth) as metax_resp: assert metax_resp.status == 200, f"HTTP Status code error, got {metax_resp.status}" metax_res = await metax_resp.json() assert ( @@ -921,7 +921,7 @@ async def test_metax_crud_with_xml(sess, folder_id): await delete_object(sess, schema, accession_id) for _, _, metax_id in ids: - async with sess.get(f"{metax_url}/{metax_id}") as metax_resp: + async with sess.get(f"{metax_url}/{metax_id}", auth=auth) as metax_resp: assert metax_resp.status == 404, f"HTTP Status code error - expected 404 Not Found, got {metax_resp.status}" @@ -951,13 +951,12 @@ async def test_metax_crud_with_json(sess, folder_id): except KeyError: assert False, "Metax ID was not in response data" object.append(metax_id) - async with sess.get(f"{metax_url}/{metax_id}") as metax_resp: + async with sess.get(f"{metax_url}/{metax_id}", auth=auth) as metax_resp: assert metax_resp.status == 200, f"HTTP Status code error, got {metax_resp.status}" metax_res = await metax_resp.json() assert ( res.get("doi", None) == metax_res["research_dataset"]["preferred_identifier"] ), "Object's DOI was not in Metax response data preferred_identifier" - assert metax_res.get("date_modified", None) is None for schema, accession_id, filename, _ in json_files: await put_object_json(sess, schema, accession_id, filename) From ad33e8148d18c41b637896928799b18d783eb7de Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 16 Mar 2022 11:47:33 +0000 Subject: [PATCH 316/336] Simplify aiohttp calls in metax_api_handler --- metadata_backend/helpers/metax_api_handler.py | 34 +++++++++---------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/metadata_backend/helpers/metax_api_handler.py b/metadata_backend/helpers/metax_api_handler.py index 2d35f7b92..283ae70e5 100644 --- a/metadata_backend/helpers/metax_api_handler.py +++ b/metadata_backend/helpers/metax_api_handler.py @@ -1,7 +1,7 @@ """Class for handling calls to METAX API.""" from typing import Any, Dict, List -import aiohttp +from aiohttp import BasicAuth, ClientSession from aiohttp.web import HTTPBadRequest, HTTPError, HTTPForbidden, HTTPNotFound, Request from ..api.middlewares import get_session @@ -23,9 +23,7 @@ def __init__(self, req: Request) -> None: """ self.req = req self.db_client = self.req.app["db_client"] - - self.username = metax_config["username"] - self.password = metax_config["password"] + self.auth = BasicAuth(metax_config["username"], metax_config["password"]) self.metax_url = metax_config["url"] self.rest_route = metax_config["rest_route"] self.publish_route = metax_config["publish_route"] @@ -88,12 +86,12 @@ async def post_dataset_as_draft(self, collection: str, data: Dict) -> str: f"Creating draft dataset to Metax service from Submitter {collection} with accession ID " f"{data['accessionId']}" ) - async with aiohttp.ClientSession() as sess: + async with ClientSession() as sess: resp = await sess.post( f"{self.metax_url}{self.rest_route}", params="draft", json=metax_dataset, - auth=aiohttp.BasicAuth(self.username, self.password), + auth=self.auth, ) status = resp.status if status == 201: @@ -110,11 +108,11 @@ async def post_dataset_as_draft(self, collection: str, data: Dict) -> str: # Metax service overwrites preferred id (DOI) with temporary id for draft datasets # Patching dataset with full research_dataset data updates preferred id to the real one - async with aiohttp.ClientSession() as sess: + async with ClientSession() as sess: resp = await sess.patch( f"{self.metax_url}{self.rest_route}/{metax_id}", json={"research_dataset": dataset_data}, - auth=aiohttp.BasicAuth(self.username, self.password), + auth=self.auth, ) status = resp.status if status == 200: @@ -150,12 +148,12 @@ async def update_draft_dataset(self, collection: str, data: Dict) -> str: metax_dataset["research_dataset"] = dataset_data LOG.info(f"Sending updated {collection} object data to Metax service.") - async with aiohttp.ClientSession() as sess: + async with ClientSession() as sess: resp = await sess.put( f'{self.metax_url}{self.rest_route}/{data["metaxIdentifier"]}', params="draft", json=metax_dataset, - auth=aiohttp.BasicAuth(self.username, self.password), + auth=self.auth, ) status = resp.status if status == 200: @@ -172,10 +170,10 @@ async def delete_draft_dataset(self, metax_id: str) -> None: :param metax_id: Identification string pointing to Metax dataset to be deleted """ - async with aiohttp.ClientSession() as sess: + async with ClientSession() as sess: resp = await sess.delete( f"{self.metax_url}{self.rest_route}/{metax_id}", - auth=aiohttp.BasicAuth(self.username, self.password), + auth=self.auth, ) status = resp.status if status == 204: @@ -194,10 +192,10 @@ async def update_dataset_with_doi_info(self, doi_info: Dict, metax_ids: List) -> LOG.info("Updating object metax metadata with doi info") bulk_data = [] for id in metax_ids: - async with aiohttp.ClientSession() as sess: + async with ClientSession() as sess: resp = await sess.get( f"{self.metax_url}{self.rest_route}/{id['metaxIdentifier']}", - auth=aiohttp.BasicAuth(self.username, self.password), + auth=self.auth, ) status = resp.status if status == 200: @@ -215,11 +213,11 @@ async def update_dataset_with_doi_info(self, doi_info: Dict, metax_ids: List) -> ) # for id in metax_ids: - async with aiohttp.ClientSession() as sess: + async with ClientSession() as sess: resp = await sess.patch( f"{self.metax_url}{self.rest_route}", json=bulk_data, - auth=aiohttp.BasicAuth(self.username, self.password), + auth=self.auth, ) if resp.status == 200: LOG.info("Objects metadata are updated to Metax for publishing") @@ -238,11 +236,11 @@ async def publish_dataset(self, _metax_ids: List[Dict]) -> None: for object in _metax_ids: metax_id = object["metaxIdentifier"] doi = object["doi"] - async with aiohttp.ClientSession() as sess: + async with ClientSession() as sess: resp = await sess.post( f"{self.metax_url}{self.publish_route}", params={"identifier": metax_id}, - auth=aiohttp.BasicAuth(self.username, self.password), + auth=self.auth, ) status = resp.status if status == 200: From 05cab09c2042de5b2b70bd5315d0d2798f696dfe Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Thu, 10 Mar 2022 13:31:36 +0000 Subject: [PATCH 317/336] Fix bug with multipart content filename extraction It is possible to submit several metadata objects with each of own file eg. submit endpoint. Till now only last file's filename was extracted. That is now fixed. --- metadata_backend/api/handlers/common.py | 16 ++++----- metadata_backend/api/handlers/object.py | 37 +++++++++++---------- metadata_backend/api/handlers/submission.py | 6 ++-- 3 files changed, 30 insertions(+), 29 deletions(-) diff --git a/metadata_backend/api/handlers/common.py b/metadata_backend/api/handlers/common.py index c575c3d58..00aa94df7 100644 --- a/metadata_backend/api/handlers/common.py +++ b/metadata_backend/api/handlers/common.py @@ -15,7 +15,7 @@ async def multipart_content( req: Request, extract_one: bool = False, expect_xml: bool = False -) -> Tuple[List[Tuple[Any, str]], str, str]: +) -> Tuple[List[Tuple[Any, str, str]], str]: """Get content(s) and schema type(s) of a multipart request (from either csv or xml format). Note: for multiple files support check: https://docs.aiohttp.org/en/stable/multipart.html#hacking-multipart @@ -26,8 +26,8 @@ async def multipart_content( :raises: HTTPBadRequest for multiple different reasons :returns: content and schema type for each uploaded file and file type of the upload """ - xml_files: List[Tuple[str, str]] = [] - csv_files: List[Tuple[Dict, str]] = [] + xml_files: List[Tuple[str, str, str]] = [] + csv_files: List[Tuple[Dict, str, str]] = [] try: reader = await req.multipart() except AssertionError: @@ -59,20 +59,20 @@ async def multipart_content( if expect_xml or part.headers[hdrs.CONTENT_TYPE] == "text/xml": content, schema_type = await _extract_upload(part) _check_xml(content) - xml_files.append((content, schema_type)) + xml_files.append((content, schema_type, filename)) elif part.headers[hdrs.CONTENT_TYPE] == "text/csv": content, schema_type = await _extract_upload(part) _check_csv(content) csv_content = CSVToJSONParser().parse(schema_type, content) for row in csv_content: - csv_files.append((row, schema_type)) + csv_files.append((row, schema_type, filename)) else: reason = "Submitted file was not proper XML nor CSV." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) # Return extracted content - return _get_content_with_type(xml_files, csv_files) + (filename,) + return _get_content_with_type(xml_files, csv_files) async def _extract_upload(part: BodyPartReader) -> Tuple[str, str]: @@ -137,8 +137,8 @@ def _check_xml(content: str) -> bool: def _get_content_with_type( - xml_files: List[Tuple[str, str]], csv_files: List[Tuple[Dict, str]] -) -> Tuple[List[Tuple[Any, str]], str]: + xml_files: List[Tuple[str, str, str]], csv_files: List[Tuple[Dict, str, str]] +) -> Tuple[List[Tuple[Any, str, str]], str]: """Return either list of XML or CSV files with the file type info. :param xml_files: List of xml contents with schema types diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 230fb1d69..680419a27 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -103,8 +103,9 @@ async def post_object(self, req: Request) -> Response: """ _allowed_csv = {"sample"} _allowed_doi = {"study", "dataset"} - schema_type = req.match_info["schema"] + filename = "" + cont_type = "" folder_id = req.query.get("folder", "") if not folder_id: @@ -130,22 +131,21 @@ async def post_object(self, req: Request) -> Response: reason = "Only one study is allowed per submission." raise web.HTTPBadRequest(reason=reason) - content: Union[Dict[str, Any], str, List[Tuple[Any, str]]] + content: Union[Dict[str, Any], str, List[Tuple[Any, str, str]]] operator: Union[Operator, XMLOperator] if req.content_type == "multipart/form-data": _only_xml = False if schema_type in _allowed_csv else True - files, cont_type, filename = await multipart_content(req, extract_one=True, expect_xml=_only_xml) + files, cont_type = await multipart_content(req, extract_one=True, expect_xml=_only_xml) if cont_type == "xml": # from this tuple we only care about the content # files should be of form (content, schema) - content, _ = files[0] + content, _, filename = files[0] else: # for CSV files we need to treat this as a list of tuples (content, schema) content = files # If multipart request contains XML, XML operator is used. # Else the multipart request is expected to contain CSV file(s) which are converted into JSON. operator = XMLOperator(db_client) if cont_type == "xml" else Operator(db_client) - patch_params = {"cont_type": cont_type, "filename": filename} else: content = await self._get_data(req) if not req.path.startswith("/drafts"): @@ -157,16 +157,17 @@ async def post_object(self, req: Request) -> Response: data: Union[List[Dict[str, str]], Dict[str, str]] if isinstance(content, List): LOG.debug(f"Inserting multiple objects for {schema_type}.") - objects: List[Dict[str, Any]] = [] + objects: List[Tuple[Dict[str, Any], str]] = [] for item in content: json_data = await operator.create_metadata_object(collection, item[0]) - objects.append(json_data) + filename = item[2] + objects.append((json_data, filename)) LOG.info( f"POST object with accesssion ID {json_data['accessionId']} in schema {collection} was successful." ) # we format like this to make it consistent with the response from /submit endpoint - data = [dict({"accessionId": item["accessionId"]}, **{"schema": schema_type}) for item in objects] + data = [dict({"accessionId": item["accessionId"]}, **{"schema": schema_type}) for item, _ in objects] # we take the first result if we get multiple location_headers = CIMultiDict(Location=f"{url}/{data[0]['accessionId']}") else: @@ -176,15 +177,15 @@ async def post_object(self, req: Request) -> Response: LOG.info( f"POST object with accesssion ID {json_data['accessionId']} in schema {collection} was successful." ) - objects = [json_data] + objects = [(json_data, filename)] # Gathering data for object to be added to folder - patch = self._prepare_folder_patch_new_object(collection, objects, patch_params) + patch = self._prepare_folder_patch_new_object(collection, objects, cont_type) await folder_op.update_folder(folder_id, patch) # Create draft dataset to Metax catalog if collection in _allowed_doi: - [await self._create_metax_dataset(req, collection, item) for item in objects] + [await self._create_metax_dataset(req, collection, item) for item, _ in objects] body = ujson.dumps(data, escape_forward_slashes=False) @@ -286,8 +287,8 @@ async def put_object(self, req: Request) -> Response: operator: Union[Operator, XMLOperator] filename = "" if req.content_type == "multipart/form-data": - files, _, filename = await multipart_content(req, extract_one=True, expect_xml=True) - content, _ = files[0] + files, _ = await multipart_content(req, extract_one=True, expect_xml=True) + content, _, _ = files[0] operator = XMLOperator(db_client) else: content = await self._get_data(req) @@ -381,7 +382,7 @@ async def patch_object(self, req: Request) -> Response: LOG.info(f"PATCH object with accession ID {accession_id} in schema {collection} was successful.") return web.Response(body=body, status=200, content_type="application/json") - def _prepare_folder_patch_new_object(self, schema: str, objects: List, params: Dict[str, str]) -> List: + def _prepare_folder_patch_new_object(self, schema: str, objects: List, cont_type: str) -> List: """Prepare patch operations list for adding an object or objects to a folder. :param schema: schema of objects to be added to the folder @@ -389,10 +390,10 @@ def _prepare_folder_patch_new_object(self, schema: str, objects: List, params: D :param params: addidtional data required for db entry :returns: list of patch operations """ - if not params.get("cont_type", None): + if not cont_type: submission_type = "Form" else: - submission_type = params["cont_type"].upper() + submission_type = cont_type.upper() if schema.startswith("draft"): path = "/drafts/-" @@ -401,7 +402,7 @@ def _prepare_folder_patch_new_object(self, schema: str, objects: List, params: D patch = [] patch_ops: Dict[str, Any] = {} - for object in objects: + for object, filename in objects: try: title = object["descriptor"]["studyTitle"] if schema in ["study", "draft-study"] else object["title"] except (TypeError, KeyError): @@ -420,7 +421,7 @@ def _prepare_folder_patch_new_object(self, schema: str, objects: List, params: D }, } if submission_type != "Form": - patch_ops["value"]["tags"]["fileName"] = params["filename"] + patch_ops["value"]["tags"]["fileName"] = filename patch.append(patch_ops) return patch diff --git a/metadata_backend/api/handlers/submission.py b/metadata_backend/api/handlers/submission.py index 9c741cfee..a66e2a514 100644 --- a/metadata_backend/api/handlers/submission.py +++ b/metadata_backend/api/handlers/submission.py @@ -31,7 +31,7 @@ async def submit(self, req: Request) -> Response: :raises: HTTPBadRequest if request is missing some parameters or cannot be processed :returns: XML-based receipt from submission """ - files, _, _ = await multipart_content(req, expect_xml=True) + files, _ = await multipart_content(req, expect_xml=True) schema_types = Counter(file[1] for file in files) if "submission" not in schema_types: reason = "There must be a submission.xml file in submission." @@ -92,8 +92,8 @@ async def validate(self, req: Request) -> Response: :param req: Multipart POST request with submission.xml and files :returns: JSON response indicating if validation was successful or not """ - files, _, _ = await multipart_content(req, extract_one=True, expect_xml=True) - xml_content, schema_type = files[0] + files, _ = await multipart_content(req, extract_one=True, expect_xml=True) + xml_content, schema_type, _ = files[0] validator = await self._perform_validation(schema_type, xml_content) return web.Response(body=validator.resp_body, content_type="application/json") From f003db9ec04c3223b6db6b09368877ad50f0571a Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Thu, 10 Mar 2022 13:50:54 +0000 Subject: [PATCH 318/336] Update submit endpoint Separate add and modify actions to own functions. Add folder patching after submission add and modify actions. Add integration with metax on submission add and modify actions. Update integration tests. --- metadata_backend/api/handlers/object.py | 4 +- metadata_backend/api/handlers/submission.py | 155 +++++++++++++++----- tests/integration/run_tests.py | 67 +++++++-- 3 files changed, 177 insertions(+), 49 deletions(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 680419a27..85a176c6b 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -185,7 +185,7 @@ async def post_object(self, req: Request) -> Response: # Create draft dataset to Metax catalog if collection in _allowed_doi: - [await self._create_metax_dataset(req, collection, item) for item, _ in objects] + [await self.create_metax_dataset(req, collection, item) for item, _ in objects] body = ujson.dumps(data, escape_forward_slashes=False) @@ -463,7 +463,7 @@ def _prepare_folder_patch_update_object(self, schema: str, data: Dict, filename: ) return [patch_op] - async def _create_metax_dataset(self, req: Request, collection: str, object: Dict) -> str: + async def create_metax_dataset(self, req: Request, collection: str, object: Dict) -> str: """Handle connection to Metax api handler for dataset creation. Dataset or Study object is assigned with DOI diff --git a/metadata_backend/api/handlers/submission.py b/metadata_backend/api/handlers/submission.py index a66e2a514..ac461ef58 100644 --- a/metadata_backend/api/handlers/submission.py +++ b/metadata_backend/api/handlers/submission.py @@ -5,19 +5,20 @@ import ujson from aiohttp import web from aiohttp.web import Request, Response -from motor.motor_asyncio import AsyncIOMotorClient from multidict import MultiDict, MultiDictProxy from xmlschema import XMLSchemaException from ...helpers.logger import LOG +from ...helpers.metax_api_handler import MetaxServiceHandler from ...helpers.parser import XMLToJSONParser from ...helpers.schema_loader import SchemaNotFoundException, XMLSchemaLoader from ...helpers.validator import XMLValidator -from ..operators import Operator, XMLOperator +from ..operators import FolderOperator, Operator, XMLOperator from .common import multipart_content +from .object import ObjectAPIHandler -class SubmissionAPIHandler: +class SubmissionAPIHandler(ObjectAPIHandler): """Handler for non-rest API methods.""" async def submit(self, req: Request) -> Response: @@ -66,20 +67,20 @@ async def submit(self, req: Request) -> Response: # Go through parsed files and do the actual action results: List[Dict] = [] - db_client = req.app["db_client"] for file in files: content_xml = file[0] schema_type = file[1] + filename = file[2] if schema_type == "submission": LOG.debug("file has schema of submission type, continuing ...") continue # No need to use submission xml action = actions[schema_type] if isinstance(action, List): for item in action: - result = await self._execute_action(schema_type, content_xml, db_client, item) + result = await self._execute_action(req, schema_type, content_xml, item, filename) results.append(result) else: - result = await self._execute_action(schema_type, content_xml, db_client, action) + result = await self._execute_action(req, schema_type, content_xml, action, filename) results.append(result) body = ujson.dumps(results, escape_forward_slashes=False) @@ -115,7 +116,7 @@ async def _perform_validation(self, schema_type: str, xml_content: str) -> XMLVa LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - async def _execute_action(self, schema: str, content: str, db_client: AsyncIOMotorClient, action: str) -> Dict: + async def _execute_action(self, req: Request, schema: str, content: str, action: str, filename: str) -> Dict: """Complete the command in the action set of the submission file. Only "add/modify/validate" actions are supported. @@ -128,34 +129,10 @@ async def _execute_action(self, schema: str, content: str, db_client: AsyncIOMot :returns: Dict containing specific action that was completed """ if action == "add": - json_data = await XMLOperator(db_client).create_metadata_object(schema, content) - result = { - "accessionId": json_data["accessionId"], - "schema": schema, - } - LOG.debug(f"added some content in {schema} ...") - return result + return await self._execute_action_add(req, schema, content, filename) elif action == "modify": - data_as_json = XMLToJSONParser().parse(schema, content) - if "accessionId" in data_as_json: - accession_id = data_as_json["accessionId"] - else: - alias = data_as_json["alias"] - query = MultiDictProxy(MultiDict([("alias", alias)])) - data, _, _, _ = await Operator(db_client).query_metadata_database(schema, query, 1, 1, []) - if len(data) > 1: - reason = "Alias in provided XML file corresponds with more than one existing metadata object." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - accession_id = data[0]["accessionId"] - data_as_json.pop("accessionId", None) - result = { - "accessionId": await Operator(db_client).update_metadata_object(schema, accession_id, data_as_json), - "schema": schema, - } - LOG.debug(f"modified some content in {schema} ...") - return result + return await self._execute_action_modify(req, schema, content, filename) elif action == "validate": validator = await self._perform_validation(schema, content) @@ -165,3 +142,115 @@ async def _execute_action(self, schema: str, content: str, db_client: AsyncIOMot reason = f"Action {action} in XML is not supported." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) + + async def _execute_action_add(self, req: Request, schema: str, content: str, filename: str) -> Dict: + """Complete the command in the action set of the submission file. + + Only "add/modify/validate" actions are supported. + + :param schema: Schema type of the object in question + :param content: Metadata object referred to in submission + :param db_client: Database client for database operations + :param action: Type of action to be done + :raises: HTTPBadRequest if an incorrect or non-supported action is called + :returns: Dict containing specific action that was completed + """ + _allowed_doi = {"study", "dataset"} + db_client = req.app["db_client"] + folder_op = FolderOperator(db_client) + + folder_id = req.query.get("folder", "") + if not folder_id: + reason = "Folder is required query parameter. Please provide folder id where object is added to." + raise web.HTTPBadRequest(reason=reason) + + # we need to check if there is already a study in a folder + # we only allow one study per folder + # this is not enough to catch duplicate entries if updates happen in parallel + # that is why we check in db_service.update_study + if not req.path.startswith("/drafts") and schema == "study": + _ids = await folder_op.get_collection_objects(folder_id, schema) + if len(_ids) == 1: + reason = "Only one study is allowed per submission." + raise web.HTTPBadRequest(reason=reason) + + json_data = await XMLOperator(db_client).create_metadata_object(schema, content) + + result = { + "accessionId": json_data["accessionId"], + "schema": schema, + } + LOG.debug(f"added some content in {schema} ...") + + # Gathering data for object to be added to folder + patch = self._prepare_folder_patch_new_object(schema, [(json_data, filename)], "xml") + await folder_op.update_folder(folder_id, patch) + + # Create draft dataset to Metax catalog + if schema in _allowed_doi: + await self.create_metax_dataset(req, schema, json_data) + + return result + + async def _execute_action_modify(self, req: Request, schema: str, content: str, filename: str) -> Dict: + """Complete the command in the action set of the submission file. + + Only "add/modify/validate" actions are supported. + + :param schema: Schema type of the object in question + :param content: Metadata object referred to in submission + :param db_client: Database client for database operations + :param action: Type of action to be done + :raises: HTTPBadRequest if an incorrect or non-supported action is called + :returns: Dict containing specific action that was completed + """ + _allowed_doi = {"study", "dataset"} + db_client = req.app["db_client"] + folder_op = FolderOperator(db_client) + operator = Operator(db_client) + data_as_json = XMLToJSONParser().parse(schema, content) + if "accessionId" in data_as_json: + accession_id = data_as_json["accessionId"] + else: + alias = data_as_json["alias"] + query = MultiDictProxy(MultiDict([("alias", alias)])) + data, _, _, _ = await operator.query_metadata_database(schema, query, 1, 1, []) + if len(data) > 1: + reason = "Alias in provided XML file corresponds with more than one existing metadata object." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + accession_id = data[0]["accessionId"] + data_as_json.pop("accessionId", None) + result = { + # should here be replace_metadata_object ?? + "accessionId": await operator.update_metadata_object(schema, accession_id, data_as_json), + "schema": schema, + } + + exists, folder_id, published = await folder_op.check_object_in_folder(schema, result["accessionId"]) + if exists: + if published: + reason = "Published objects cannot be updated." + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + + # If there's changed title it will be updated to folder + try: + _ = data_as_json["descriptor"]["studyTitle"] if schema == "study" else data_as_json["title"] + # should we overwrite filename as it is the name of file with partial update data + patch = self._prepare_folder_patch_update_object(schema, data_as_json, filename) + await folder_op.update_folder(folder_id, patch) + except (TypeError, KeyError): + pass + + # Update draft dataset to Metax catalog + if schema in _allowed_doi: + object_data, _ = await operator.read_metadata_object(schema, accession_id) + # MYPY related if statement, Operator (when not XMLOperator) always returns object_data as dict + if isinstance(object_data, Dict): + await MetaxServiceHandler(req).update_draft_dataset(schema, object_data) + else: + raise ValueError("Object's data must be dictionary") + + LOG.debug(f"modified some content in {schema} ...") + return result diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index 560813d99..af4833f07 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -1589,6 +1589,8 @@ async def test_get_folders_objects(sess, folder_id: str, project_id: str): assert response["folders"][0]["metadataObjects"][0]["accessionId"] == accession_id assert response["folders"][0]["metadataObjects"][0]["tags"]["submissionType"] == "XML" + await delete_object(sess, "study", accession_id) + async def test_submissions_work(sess, folder_id): """Test actions in submission XML files. @@ -1599,7 +1601,8 @@ async def test_submissions_work(sess, folder_id): # Post original submission with two 'add' actions sub_files = [("submission", "ERA521986_valid.xml"), ("study", "SRP000539.xml"), ("sample", "SRS001433.xml")] submission_data = await create_multi_file_request_data(sub_files) - async with sess.post(f"{submit_url}", data=submission_data) as resp: + + async with sess.post(f"{submit_url}", params={"folder": folder_id}, data=submission_data) as resp: LOG.debug("Checking initial submission worked") assert resp.status == 200, f"HTTP Status code error, got {resp.status}" res = await resp.json() @@ -1607,19 +1610,7 @@ async def test_submissions_work(sess, folder_id): assert res[0]["schema"] == "study", "expected first element to be study" assert res[1]["schema"] == "sample", "expected second element to be sample" study_access_id = res[0]["accessionId"] - patch = [ - { - "op": "add", - "path": "/metadataObjects/-", - "value": {"accessionId": res[0]["accessionId"], "schema": res[0]["schema"]}, - }, - { - "op": "add", - "path": "/metadataObjects/-", - "value": {"accessionId": res[1]["accessionId"], "schema": res[1]["schema"]}, - }, - ] - await patch_folder(sess, folder_id, patch) + sample_access_id = res[1]["accessionId"] # Sanity check that the study object was inserted correctly before modifying it async with sess.get(f"{objects_url}/study/{study_access_id}") as resp: @@ -1631,6 +1622,37 @@ async def test_submissions_work(sess, folder_id): assert res["descriptor"]["studyTitle"] == ( "Highly integrated epigenome maps in Arabidopsis - whole genome shotgun bisulfite sequencing" ), "study title does not match" + metax_id = res.get("metaxIdentifier", None) + doi = res.get("doi", None) + assert metax_id is not None + assert doi is not None + + # check that objects are added to folder + async with sess.get(f"{folders_url}/{folder_id}") as resp: + LOG.debug(f"Checking that folder {folder_id} was patched") + res = await resp.json() + expected_study = { + "accessionId": study_access_id, + "schema": "study", + "tags": { + "submissionType": "XML", + "displayTitle": ( + "Highly integrated epigenome maps in Arabidopsis - whole genome shotgun bisulfite sequencing" + ), + "fileName": "SRP000539.xml", + }, + } + assert expected_study in res["metadataObjects"], "folder metadataObjects content mismatch" + expected_sample = { + "accessionId": sample_access_id, + "schema": "sample", + "tags": { + "submissionType": "XML", + "displayTitle": "HapMap sample from Homo sapiens", + "fileName": "SRS001433.xml", + }, + } + assert expected_sample in res["metadataObjects"], "folder metadataObjects content mismatch" # Give test file the correct accession id LOG.debug("Sharing the correct accession ID created in this test instance") @@ -1662,6 +1684,23 @@ async def test_submissions_work(sess, folder_id): assert res["descriptor"]["studyTitle"] == ( "Different title for testing purposes" ), "updated study title does not match" + assert res["metaxIdentifier"] == metax_id + assert res["doi"] == doi + + # check that study is updated to folder + async with sess.get(f"{folders_url}/{folder_id}") as resp: + LOG.debug(f"Checking that folder {folder_id} was patched") + res = await resp.json() + expected_study = { + "accessionId": study_access_id, + "schema": "study", + "tags": { + "submissionType": "XML", + "displayTitle": "Different title for testing purposes", + "fileName": "SRP000539_modified.xml", + }, + } + assert expected_study in res["metadataObjects"], "folder metadataObjects content mismatch" # Remove the accession id that was used for testing from test file LOG.debug("Sharing the correct accession ID created in this test instance") From e362c0cabf8051f3c6f6afcfb45ef5d2f31a5400 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Thu, 10 Mar 2022 14:53:42 +0000 Subject: [PATCH 319/336] Update changelog and openapi specs Also updates submission handler's functions' docstrings --- CHANGELOG.md | 6 +++++- docs/specification.yml | 13 ++++++++----- metadata_backend/api/handlers/submission.py | 19 ++++++++----------- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a47b71b82..2f7cde3a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,12 +8,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- Submission endpoint update #371 + - Adds mandatory query parameter `folder` for submit endpoint POST + - On actions add and modify object is added or updated to folder(submission) where it belongs with it's accession ID, schema, submission type, title and filename + - Adds metax integration to submit endpoint - Integration with Metax service #356 - Adds new local container for testing against mocked Metax API - Introduces new env vars: METAX_USER, METAX_PASS, METAX_URL - Adds new key metaxIdentifier to Study and Dataset collections containing metax id returned from Metax API - Adds new handler MetaxServiceHandler to take care of mapping Submitter metadata to Metax metadata and to connect to Metax API - Add patching of folders after object save and update operations #354 + - Adds mandatory query parameter `folder` for objects endpoint POST - Object is added or updated to folder(submission) where it belongs with it's accession ID, schema, submission type, title and filename in the case of CSV and XML upload - Adds configuration for mypy linting to VScode devcontainer setup - Templates API #256 @@ -57,7 +62,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Multilevel add patch objects to support `/extraInfo/datasetIdentifiers/-` which needs dot notation for mongodb to work e.g. `extraInfo.datasetIdentifiers` #332 ### Changed - - Refactor auth.py package by removing custom OIDC code and replacing it with https://github.com/IdentityPython/JWTConnect-Python-OidcRP. #315 - New mandatory ENV `OIDC_URL` - New optional ENVs `OIDC_SCOPE`, `AUTH_METHOD` diff --git a/docs/specification.yml b/docs/specification.yml index 9cc3555d3..88b0ce820 100644 --- a/docs/specification.yml +++ b/docs/specification.yml @@ -21,6 +21,14 @@ paths: tags: - Submission summary: XML submission endpoint, will also trigger validation. + parameters: + - name: folder + in: query + schema: + type: string + description: The folder ID where object belongs to. + required: true + example: "folder=12345" requestBody: content: multipart/form-data: @@ -293,11 +301,6 @@ paths: - Submission summary: Submit data to a specific schema parameters: - - in: query - name: folder - schema: - type: string - description: The folder ID where object belongs to. - name: schema in: path description: Name of the Metadata schema. diff --git a/metadata_backend/api/handlers/submission.py b/metadata_backend/api/handlers/submission.py index ac461ef58..c0a935d15 100644 --- a/metadata_backend/api/handlers/submission.py +++ b/metadata_backend/api/handlers/submission.py @@ -121,10 +121,11 @@ async def _execute_action(self, req: Request, schema: str, content: str, action: Only "add/modify/validate" actions are supported. + :param req: Multipart POST request :param schema: Schema type of the object in question :param content: Metadata object referred to in submission - :param db_client: Database client for database operations :param action: Type of action to be done + :param filename: Name of file being processed :raises: HTTPBadRequest if an incorrect or non-supported action is called :returns: Dict containing specific action that was completed """ @@ -144,14 +145,12 @@ async def _execute_action(self, req: Request, schema: str, content: str, action: raise web.HTTPBadRequest(reason=reason) async def _execute_action_add(self, req: Request, schema: str, content: str, filename: str) -> Dict: - """Complete the command in the action set of the submission file. - - Only "add/modify/validate" actions are supported. + """Complete the add action. + :param req: Multipart POST request :param schema: Schema type of the object in question :param content: Metadata object referred to in submission - :param db_client: Database client for database operations - :param action: Type of action to be done + :param filename: Name of file being processed :raises: HTTPBadRequest if an incorrect or non-supported action is called :returns: Dict containing specific action that was completed """ @@ -193,14 +192,12 @@ async def _execute_action_add(self, req: Request, schema: str, content: str, fil return result async def _execute_action_modify(self, req: Request, schema: str, content: str, filename: str) -> Dict: - """Complete the command in the action set of the submission file. - - Only "add/modify/validate" actions are supported. + """Complete the modify action. + :param req: Multipart POST request :param schema: Schema type of the object in question :param content: Metadata object referred to in submission - :param db_client: Database client for database operations - :param action: Type of action to be done + :param filename: Name of file being processed :raises: HTTPBadRequest if an incorrect or non-supported action is called :returns: Dict containing specific action that was completed """ From 91691d5f7f974ad5c835d8664ffc24dbeadcbb57 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 29 Mar 2022 14:37:50 +0000 Subject: [PATCH 320/336] Fix merge related bugs --- metadata_backend/api/handlers/object.py | 1 - tests/integration/run_tests.py | 45 +++++++++++++++---------- tests/test_handlers.py | 2 +- 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 85a176c6b..2d32501e2 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -113,7 +113,6 @@ async def post_object(self, req: Request) -> Response: raise web.HTTPBadRequest(reason=reason) await self._handle_check_ownership(req, "folders", folder_id) - patch_params = {} self._check_schema_exists(schema_type) collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index af4833f07..6205f7328 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -652,28 +652,29 @@ async def test_csv(sess, folder_id): """ _schema = "sample" _filename = "EGAformat.csv" - accession_id = await post_object(sess, _schema, folder_id, _filename) + samples = await post_object(sess, _schema, folder_id, _filename) # there are 3 rows and we expected to get 3rd - assert len(accession_id[0]) == 3, f"expected nb of CSV entries does not match, we got: {len(accession_id)}" - _first_csv_row_id = accession_id[0][0]["accessionId"] + assert len(samples[0]) == 3, f"expected nb of CSV entries does not match, we got: {len(samples[0])}" + # _first_csv_row_id = accession_id[0][0]["accessionId"] + first_sample = samples[0][0]["accessionId"] - async with sess.get(f"{objects_url}/{_schema}/{_first_csv_row_id}") as resp: - LOG.debug(f"Checking that {_first_csv_row_id} JSON is in {_schema}") + async with sess.get(f"{objects_url}/{_schema}/{first_sample}") as resp: + LOG.debug(f"Checking that {first_sample} JSON is in {_schema}") assert resp.status == 200, f"HTTP Status code error, got {resp.status}" res = await resp.json() title = res.get("title", "") - await check_folders_object_patch(sess, folder_id, _schema, accession_id, title, _filename) + await check_folders_object_patch(sess, folder_id, _schema, samples, title, _filename) - await delete_object(sess, _schema, _first_csv_row_id) - async with sess.get(f"{objects_url}/{_schema}/{_first_csv_row_id}") as resp: - LOG.debug(f"Checking that JSON object {_first_csv_row_id} was deleted") + await delete_object(sess, _schema, first_sample) + async with sess.get(f"{objects_url}/{_schema}/{first_sample}") as resp: + LOG.debug(f"Checking that JSON object {first_sample} was deleted") assert resp.status == 404, f"HTTP Status code error, got {resp.status}" async with sess.get(f"{folders_url}/{folder_id}") as resp: - LOG.debug(f"Checking that object {_first_csv_row_id} was deleted from folder {folder_id}") + LOG.debug(f"Checking that object {first_sample} was deleted from folder {folder_id}") res = await resp.json() - expected_true = not any(d["accessionId"] == _first_csv_row_id for d in res["metadataObjects"]) - assert expected_true, f"object {_first_csv_row_id} still exists" + expected_true = not any(d["accessionId"] == first_sample for d in res["metadataObjects"]) + assert expected_true, f"object {first_sample} still exists" _filename = "empty.csv" # status should be 400 @@ -681,8 +682,11 @@ async def test_csv(sess, folder_id): _filename = "EGA_sample_w_issue.csv" # status should be 201 but we expect 3 rows, as the CSV has 4 rows one of which is empty - accession_id = await post_object_expect_status(sess, _schema, folder_id, _filename, 201) - assert len(accession_id[0]) == 3, f"expected nb of CSV entries does not match, we got: {len(accession_id)}" + samples_2 = await post_object_expect_status(sess, _schema, folder_id, _filename, 201) + assert len(samples_2[0]) == 3, f"expected nb of CSV entries does not match, we got: {len(samples_2[0])}" + + for sample in samples_2[0] + samples[0][1:]: + await delete_object(sess, _schema, sample["accessionId"]) async def test_put_objects(sess, folder_id): @@ -706,6 +710,7 @@ async def test_put_objects(sess, folder_id): "Highly integrated epigenome maps in Arabidopsis - whole genome shotgun bisulfite sequencing", "SRP000539_put.xml", ) + await delete_object(sess, "study", accession_id[0]) async def test_crud_drafts_works(sess, schema, orginal_file, update_file, folder_id): @@ -847,7 +852,7 @@ async def test_getting_all_objects_from_schema_works(sess, folder_id): assert ans["page"]["page"] == 1 assert ans["page"]["size"] == 10 assert ans["page"]["totalPages"] == 2 - assert ans["page"]["totalObjects"] == 18, ans["page"]["totalObjects"] + assert ans["page"]["totalObjects"] == 13, ans["page"]["totalObjects"] assert len(ans["objects"]) == 10 # Test with custom pagination values @@ -856,8 +861,8 @@ async def test_getting_all_objects_from_schema_works(sess, folder_id): ans = await resp.json() assert ans["page"]["page"] == 2 assert ans["page"]["size"] == 3 - assert ans["page"]["totalPages"] == 6, ans["page"]["totalPages"] - assert ans["page"]["totalObjects"] == 18, ans["page"]["totalObjects"] + assert ans["page"]["totalPages"] == 5, ans["page"]["totalPages"] + assert ans["page"]["totalObjects"] == 13, ans["page"]["totalObjects"] assert len(ans["objects"]) == 3 # Test with wrong pagination values @@ -1028,7 +1033,7 @@ async def test_metax_publish_dataset(sess, folder_id): assert metax_res["state"] == "published" -async def test_crud_folders_works(sess): +async def test_crud_folders_works(sess, project_id): """Test folders REST api POST, GET, PATCH, PUBLISH and DELETE reqs. :param sess: HTTP session in which request call is made @@ -1702,6 +1707,9 @@ async def test_submissions_work(sess, folder_id): } assert expected_study in res["metadataObjects"], "folder metadataObjects content mismatch" + await delete_object(sess, "sample", sample_access_id) + await delete_object(sess, "study", study_access_id) + # Remove the accession id that was used for testing from test file LOG.debug("Sharing the correct accession ID created in this test instance") mod_study = testfiles_root / "study" / "SRP000539_modified.xml" @@ -1839,6 +1847,7 @@ async def main(): metax_folder = { "name": "basic test pagination", "description": "basic test pagination folder", + "projectId": project_id, } metax_folder_id = await post_folder(sess, metax_folder) await test_metax_crud_with_xml(sess, metax_folder_id) diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 077794d65..db1aaf03b 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -122,7 +122,7 @@ async def setUpAsync(self): "delete.side_effect": self.fake_doi_delete, } - RESTAPIHandler._handle_check_ownedby_user = make_mocked_coro(True) + RESTAPIHandler._handle_check_ownership = make_mocked_coro(True) ObjectAPIHandler._delete_metax_dataset = make_mocked_coro() async def tearDownAsync(self): From 4cd36fe218a77924b01c5304fc3a4a486705ff45 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 29 Mar 2022 17:13:06 +0000 Subject: [PATCH 321/336] Clean metax handler from TODOs --- metadata_backend/helpers/metax_api_handler.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/metadata_backend/helpers/metax_api_handler.py b/metadata_backend/helpers/metax_api_handler.py index 283ae70e5..3abe9e56e 100644 --- a/metadata_backend/helpers/metax_api_handler.py +++ b/metadata_backend/helpers/metax_api_handler.py @@ -102,7 +102,6 @@ async def post_dataset_as_draft(self, collection: str, data: Dict) -> str: ) metax_id = metax_data["identifier"] else: - # TODO: how front end should react on this?? reason = await resp.text() raise self.process_error(status, reason) @@ -123,7 +122,6 @@ async def post_dataset_as_draft(self, collection: str, data: Dict) -> str: ) return metax_id else: - # TODO: how front end should react on this?? reason = await resp.text() raise self.process_error(status, reason) @@ -139,7 +137,6 @@ async def update_draft_dataset(self, collection: str, data: Dict) -> str: :returns: Metax ID for dataset returned by Metax API """ metax_dataset = self.minimal_dataset_template - # TODO: should this be changed if person updating data is different from data creator? metax_dataset["metadata_provider_user"] = await self.get_metadata_provider_user() if collection == "dataset": dataset_data = self.create_metax_dataset_data_from_dataset(data) @@ -161,7 +158,6 @@ async def update_draft_dataset(self, collection: str, data: Dict) -> str: LOG.info(f"Updated Metax draft dataset with ID {metax_data['identifier']} with data: {metax_dataset}") return metax_data["identifier"] else: - # TODO: how front end should react on this?? reason = await resp.text() raise self.process_error(status, reason) @@ -179,7 +175,6 @@ async def delete_draft_dataset(self, metax_id: str) -> None: if status == 204: LOG.info(f"Deleted draft dataset {metax_id} from Metax service") else: - # TODO: how front end should react on this?? reason = await resp.text() raise self.process_error(status, reason) From 199285a150a1a8672600056ebe46cc9b11d734c4 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Fri, 1 Apr 2022 09:27:54 +0300 Subject: [PATCH 322/336] keywords as list instead of array --- CHANGELOG.md | 3 +++ metadata_backend/helpers/schemas/datacite.json | 8 ++------ metadata_backend/helpers/schemas/folders.json | 8 ++------ tests/test_files/doi/test_doi.json | 5 +---- 4 files changed, 8 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f7cde3a6..66456790c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added + - Submission endpoint update #371 - Adds mandatory query parameter `folder` for submit endpoint POST - On actions add and modify object is added or updated to folder(submission) where it belongs with it's accession ID, schema, submission type, title and filename @@ -62,6 +63,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Multilevel add patch objects to support `/extraInfo/datasetIdentifiers/-` which needs dot notation for mongodb to work e.g. `extraInfo.datasetIdentifiers` #332 ### Changed + - Refactor auth.py package by removing custom OIDC code and replacing it with https://github.com/IdentityPython/JWTConnect-Python-OidcRP. #315 - New mandatory ENV `OIDC_URL` - New optional ENVs `OIDC_SCOPE`, `AUTH_METHOD` @@ -77,6 +79,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - JSON schemas #332 - introduce `keywords` required for Metax in `doiInfo` - dataset `description` and study `studyAbstract` are now mandatory +- `keywords` will be comma separated values, that will require splitting when adding to Metax API ### Fixed diff --git a/metadata_backend/helpers/schemas/datacite.json b/metadata_backend/helpers/schemas/datacite.json index 0e205c93c..045c7fd0f 100644 --- a/metadata_backend/helpers/schemas/datacite.json +++ b/metadata_backend/helpers/schemas/datacite.json @@ -163,13 +163,9 @@ "uniqueItems": true }, "keywords": { - "type": "array", + "type": "string", "title": "Keywords", - "description": "A keyword or tag describing the resources. It is recommended to use a controlled vocabulary, ontology or classification when choosing keywords. At least one keyword is required.", - "items": { - "minLength": 1, - "type": "string" - } + "description": "A list of keywords or tags describing the resources. It is recommended to use a controlled vocabulary, ontology or classification when choosing keywords. Multiple keywords can be added, separating them by comma." }, "contributors": { "type": "array", diff --git a/metadata_backend/helpers/schemas/folders.json b/metadata_backend/helpers/schemas/folders.json index 968e6aaba..1b1bfb711 100644 --- a/metadata_backend/helpers/schemas/folders.json +++ b/metadata_backend/helpers/schemas/folders.json @@ -213,13 +213,9 @@ "uniqueItems": true }, "keywords": { - "type": "array", + "type": "string", "title": "Keywords", - "description": "A keyword or tag describing the resources. It is recommended to use a controlled vocabulary, ontology or classification when choosing keywords. At least one keyword is required.", - "items": { - "minLength": 1, - "type": "string" - } + "description": "A list of keywords or tags describing the resources. It is recommended to use a controlled vocabulary, ontology or classification when choosing keywords. Multiple keywords can be added, separating them by comma." }, "contributors": { "type": "array", diff --git a/tests/test_files/doi/test_doi.json b/tests/test_files/doi/test_doi.json index 17fa355a7..b2b0469ab 100644 --- a/tests/test_files/doi/test_doi.json +++ b/tests/test_files/doi/test_doi.json @@ -22,10 +22,7 @@ "subjectScheme": "Fields of Science and Technology (FOS)" } ], - "keywords": [ - "test", - "keyword" - ], + "keywords": "test,keyword", "contributors": [ { "name": "Contributor, Test", From edb7992430db3330adfa539cc282bc0a0284fd77 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Mon, 4 Apr 2022 11:03:12 +0000 Subject: [PATCH 323/336] Update schemas with required fields Add givenName and familyName as required fields to datacite and folder schemas for creators and contributor fields items. --- metadata_backend/helpers/schemas/datacite.json | 8 +++++++- metadata_backend/helpers/schemas/folders.json | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/metadata_backend/helpers/schemas/datacite.json b/metadata_backend/helpers/schemas/datacite.json index 045c7fd0f..073a79b3e 100644 --- a/metadata_backend/helpers/schemas/datacite.json +++ b/metadata_backend/helpers/schemas/datacite.json @@ -14,6 +14,10 @@ "items": { "type": "object", "title": "Main researcher(s) involved with data or the author(s) of the publication.", + "required": [ + "givenName", + "familyName" + ], "properties": { "givenName": { "type": "string", @@ -175,6 +179,8 @@ "type": "object", "title": "Contributor", "required": [ + "givenName", + "familyName", "contributorType" ], "properties": { @@ -860,4 +866,4 @@ "uniqueItems": true } } -} \ No newline at end of file +} diff --git a/metadata_backend/helpers/schemas/folders.json b/metadata_backend/helpers/schemas/folders.json index 1b1bfb711..8e9237247 100644 --- a/metadata_backend/helpers/schemas/folders.json +++ b/metadata_backend/helpers/schemas/folders.json @@ -55,6 +55,10 @@ "items": { "type": "object", "title": "Main researcher(s) involved with data or the author(s) of the publication.", + "required": [ + "givenName", + "familyName" + ], "properties": { "givenName": { "type": "string", @@ -225,6 +229,8 @@ "type": "object", "title": "Contributor", "required": [ + "givenName", + "familyName", "contributorType" ], "properties": { @@ -1107,4 +1113,4 @@ } }, "additionalProperties": false -} \ No newline at end of file +} From 341482c74f30d166f385b93e56dbd85abec4939f Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 5 Apr 2022 09:37:31 +0000 Subject: [PATCH 324/336] Add DISCOVERY_URL to env vars Updated global variable DISCOVERY_URL to be environment variable. For testing purposes .env.example file is set to point to demo env. Default variable is set to production. --- .env.example | 1 + .github/config/.wordlist.txt | 1 + CHANGELOG.md | 3 ++- docker-compose-tls.yml | 1 + docker-compose.yml | 1 + metadata_backend/conf/conf.py | 2 +- 6 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.env.example b/.env.example index ae6980910..fe819d6cc 100644 --- a/.env.example +++ b/.env.example @@ -31,6 +31,7 @@ DOI_API=http://mockdoi:8001/dois DOI_PREFIX=10.xxxx DOI_USER=user DOI_KEY=key +DISCOVERY_URL=https://etsin.demo.fairdata.fi/dataset/ # metax METAX_USER=sd diff --git a/.github/config/.wordlist.txt b/.github/config/.wordlist.txt index 4a0bef904..fb308f3d8 100644 --- a/.github/config/.wordlist.txt +++ b/.github/config/.wordlist.txt @@ -197,6 +197,7 @@ experimentref experimenttype externalid extrainfo +fairdata faire familyname faroese diff --git a/CHANGELOG.md b/CHANGELOG.md index 66456790c..9517568ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Adds metax integration to submit endpoint - Integration with Metax service #356 - Adds new local container for testing against mocked Metax API - - Introduces new env vars: METAX_USER, METAX_PASS, METAX_URL + - Introduces new env vars: METAX_USER, METAX_PASS, METAX_URL for connection to Metax service + - Introduces new env var DISCOVERY_URL for creating link to dataset inside Fairdata SD catalog - Adds new key metaxIdentifier to Study and Dataset collections containing metax id returned from Metax API - Adds new handler MetaxServiceHandler to take care of mapping Submitter metadata to Metax metadata and to connect to Metax API - Add patching of folders after object save and update operations #354 diff --git a/docker-compose-tls.yml b/docker-compose-tls.yml index c7f731643..ee076acd2 100644 --- a/docker-compose-tls.yml +++ b/docker-compose-tls.yml @@ -36,6 +36,7 @@ services: - "DOI_PREFIX=${DOI_PREFIX}" - "DOI_USER=${DOI_USER}" - "DOI_KEY=${DOI_KEY}" + - "DISCOVERY_URL=${DISCOVERY_URL}" - "METAX_USER=${METAX_USER}" - "METAX_PASS=${METAX_PASS}" - "METAX_URL=${METAX_URL}" diff --git a/docker-compose.yml b/docker-compose.yml index 60bc332e5..8d42e5104 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -30,6 +30,7 @@ services: - "DOI_PREFIX=${DOI_PREFIX}" - "DOI_USER=${DOI_USER}" - "DOI_KEY=${DOI_KEY}" + - "DISCOVERY_URL=${DISCOVERY_URL}" - "METAX_USER=${METAX_USER}" - "METAX_PASS=${METAX_PASS}" - "METAX_URL=${METAX_URL}" diff --git a/metadata_backend/conf/conf.py b/metadata_backend/conf/conf.py index ce058bd55..417861208 100644 --- a/metadata_backend/conf/conf.py +++ b/metadata_backend/conf/conf.py @@ -160,7 +160,7 @@ def create_db_client() -> AsyncIOMotorClient: "key": os.getenv("DOI_KEY", ""), "url": os.getenv("DATACITE_URL", "https://doi.org"), "publisher": "CSC - IT Center for Science", - "discovery_url": "https://etsin.fairdata.fi/dataset/", + "discovery_url": os.getenv("DISCOVERY_URL", "https://etsin.fairdata.fi/dataset/"), } metax_config = { From be1beb7a1d3113dd619e28726b5c4453682febe4 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 6 Apr 2022 08:48:48 +0300 Subject: [PATCH 325/336] restrict the use of doi key for study and dataset --- metadata_backend/api/operators.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index d7a2fe438..d04663978 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -484,18 +484,18 @@ async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accessio Replace information in object before adding to db. - We will not replace accessionId, publishDate or dateCreated, + We will not replace ``accessionId``, ``publishDate`` or ``dateCreated``, as these are generated when created. - Will not replace metaxIdentifier for study and dataset + Will not replace ``metaxIdentifier`` and ``doi`` for ``study`` and ``dataset`` as it is generated when created. - We will keep also publisDate and dateCreated from old object. + We will keep also ``publisDate`` and ``dateCreated`` from old object. :param schema_type: Schema type of the object to replace. :param accession_id: Identifier of object to replace. :param data: Metadata object :returns: Accession Id for object inserted to database """ - forbidden_keys = {"accessionId", "publishDate", "dateCreated", "metaxIdentifier"} + forbidden_keys = {"accessionId", "publishDate", "dateCreated", "metaxIdentifier", "doi"} if any(i in data for i in forbidden_keys): reason = f"Some items (e.g: {', '.join(forbidden_keys)}) cannot be changed." LOG.error(reason) @@ -509,12 +509,15 @@ async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accessio async def _format_data_to_update_and_add_to_db(self, schema_type: str, accession_id: str, data: Any) -> str: """Format and update data in database. + Will not allow to update ``metaxIdentifier`` and ``doi`` for ``study`` and ``dataset`` + as it is generated when created. + :param schema_type: Schema type of the object to replace. :param accession_id: Identifier of object to replace. :param data: Metadata object :returns: Accession Id for object inserted to database """ - forbidden_keys = {"accessionId", "publishDate", "dateCreated", "metaxIdentifier"} + forbidden_keys = {"accessionId", "publishDate", "dateCreated", "metaxIdentifier", "doi"} if any(i in data for i in forbidden_keys): reason = f"Some items (e.g: {', '.join(forbidden_keys)}) cannot be changed." LOG.error(reason) From a21814ee2c374edf4bfe4b87bc6615b7e54f4086 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 30 Mar 2022 07:02:04 +0000 Subject: [PATCH 326/336] Update loggings to track Metax calls --- metadata_backend/api/handlers/object.py | 30 ++++++++------- metadata_backend/helpers/metax_api_handler.py | 37 ++++++++++--------- 2 files changed, 36 insertions(+), 31 deletions(-) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index 2d32501e2..8cd4734a7 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -104,6 +104,7 @@ async def post_object(self, req: Request) -> Response: _allowed_csv = {"sample"} _allowed_doi = {"study", "dataset"} schema_type = req.match_info["schema"] + LOG.debug(f"Creating {schema_type} object") filename = "" cont_type = "" @@ -213,13 +214,13 @@ async def delete_object(self, req: Request) -> Response: :raises: HTTPUnprocessableEntity if object does not belong to current user :returns: HTTPNoContent response """ + schema_type = req.match_info["schema"] + accession_id = req.match_info["accessionId"] + LOG.debug(f"Deleting object {schema_type} {accession_id}") _allowed_doi = {"study", "dataset"} - schema_type = req.match_info["schema"] self._check_schema_exists(schema_type) collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type - - accession_id = req.match_info["accessionId"] db_client = req.app["db_client"] operator = Operator(db_client) @@ -256,8 +257,7 @@ async def delete_object(self, req: Request) -> Response: # Delete draft dataset from Metax catalog if collection in _allowed_doi: - metax_service = MetaxServiceHandler(req) - await metax_service.delete_draft_dataset(metax_id) + await MetaxServiceHandler(req).delete_draft_dataset(metax_id) doi_service = DOIHandler() await doi_service.delete(doi_id) @@ -274,10 +274,11 @@ async def put_object(self, req: Request) -> Response: :raises: HTTPUnsupportedMediaType if JSON replace is attempted :returns: JSON response containing accessionId for submitted object """ - _allowed_doi = {"study", "dataset"} - schema_type = req.match_info["schema"] accession_id = req.match_info["accessionId"] + LOG.debug(f"Replacing object {schema_type} {accession_id}") + _allowed_doi = {"study", "dataset"} + self._check_schema_exists(schema_type) collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type @@ -315,8 +316,7 @@ async def put_object(self, req: Request) -> Response: # Update draft dataset to Metax catalog if collection in _allowed_doi: - metax_service = MetaxServiceHandler(req) - await metax_service.update_draft_dataset(collection, data) + await MetaxServiceHandler(req).update_draft_dataset(collection, data) body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) LOG.info(f"PUT object with accession ID {accession_id} in schema {collection} was successful.") @@ -333,6 +333,8 @@ async def patch_object(self, req: Request) -> Response: """ schema_type = req.match_info["schema"] accession_id = req.match_info["accessionId"] + LOG.debug(f"Patching object {schema_type} {accession_id}") + self._check_schema_exists(schema_type) collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type @@ -372,8 +374,7 @@ async def patch_object(self, req: Request) -> Response: object_data, _ = await operator.read_metadata_object(collection, accession_id) # MYPY related if statement, Operator (when not XMLOperator) always returns object_data as dict if isinstance(object_data, Dict): - metax_service = MetaxServiceHandler(req) - await metax_service.update_draft_dataset(collection, object_data) + await MetaxServiceHandler(req).update_draft_dataset(collection, object_data) else: raise ValueError("Object's data must be dictionary") @@ -389,6 +390,7 @@ def _prepare_folder_patch_new_object(self, schema: str, objects: List, cont_type :param params: addidtional data required for db entry :returns: list of patch operations """ + LOG.info("Preparing folder patch for new objects") if not cont_type: submission_type = "Form" else: @@ -432,6 +434,7 @@ def _prepare_folder_patch_update_object(self, schema: str, data: Dict, filename: :param title: title to be updated :returns: dict with patch operation """ + LOG.info("Preparing folder patch for existing objects") if schema.startswith("draft"): path = "/drafts" else: @@ -475,11 +478,10 @@ async def create_metax_dataset(self, req: Request, collection: str, object: Dict :param folder_id: folder ID where metadata object belongs to :returns: Metax ID """ - metax_service = MetaxServiceHandler(req) - operator = Operator(req.app["db_client"]) LOG.info("Creating draft dataset to Metax.") + operator = Operator(req.app["db_client"]) object["doi"] = await self._draft_doi(collection) - metax_id = await metax_service.post_dataset_as_draft(collection, object) + metax_id = await MetaxServiceHandler(req).post_dataset_as_draft(collection, object) new_info = {"doi": object["doi"], "metaxIdentifier": metax_id} await operator.create_metax_info(collection, object["accessionId"], new_info) diff --git a/metadata_backend/helpers/metax_api_handler.py b/metadata_backend/helpers/metax_api_handler.py index 3abe9e56e..88e1f36c9 100644 --- a/metadata_backend/helpers/metax_api_handler.py +++ b/metadata_backend/helpers/metax_api_handler.py @@ -75,6 +75,10 @@ async def post_dataset_as_draft(self, collection: str, data: Dict) -> str: :raises: HTTPError depending on returned error from Metax :returns: Metax ID for dataset returned by Metax API """ + LOG.debug( + f"Creating draft dataset to Metax service from Submitter {collection} with accession ID " + f"{data['accessionId']}" + ) metax_dataset = self.minimal_dataset_template metax_dataset["metadata_provider_user"] = await self.get_metadata_provider_user() if collection == "dataset": @@ -82,10 +86,6 @@ async def post_dataset_as_draft(self, collection: str, data: Dict) -> str: else: dataset_data = self.create_metax_dataset_data_from_study(data) metax_dataset["research_dataset"] = dataset_data - LOG.debug( - f"Creating draft dataset to Metax service from Submitter {collection} with accession ID " - f"{data['accessionId']}" - ) async with ClientSession() as sess: resp = await sess.post( f"{self.metax_url}{self.rest_route}", @@ -96,10 +96,8 @@ async def post_dataset_as_draft(self, collection: str, data: Dict) -> str: status = resp.status if status == 201: metax_data = await resp.json() - LOG.debug( - f"Created Metax draft dataset {metax_data['identifier']} from Submitter {collection} " - f"{data['accessionId']} with data: {metax_dataset}." - ) + LOG.info(f"Created Metax draft dataset {metax_data['identifier']}") + LOG.debug(f"Created Metax draft dataset {metax_data['identifier']} with data: {metax_dataset}.") metax_id = metax_data["identifier"] else: reason = await resp.text() @@ -116,6 +114,7 @@ async def post_dataset_as_draft(self, collection: str, data: Dict) -> str: status = resp.status if status == 200: metax_data = await resp.json() + LOG.info("Updated Metax draft dataset with permanent preferred identifier.") LOG.debug( f"Updated Metax draft dataset {metax_data['identifier']} with permanent preferred " "identifier." @@ -136,6 +135,7 @@ async def update_draft_dataset(self, collection: str, data: Dict) -> str: :raises: HTTPError depending on returned error from Metax :returns: Metax ID for dataset returned by Metax API """ + LOG.info(f"Updating Metax draft dataset {data['metaxIdentifier']}") metax_dataset = self.minimal_dataset_template metax_dataset["metadata_provider_user"] = await self.get_metadata_provider_user() if collection == "dataset": @@ -143,7 +143,6 @@ async def update_draft_dataset(self, collection: str, data: Dict) -> str: else: dataset_data = self.create_metax_dataset_data_from_study(data) metax_dataset["research_dataset"] = dataset_data - LOG.info(f"Sending updated {collection} object data to Metax service.") async with ClientSession() as sess: resp = await sess.put( @@ -155,7 +154,7 @@ async def update_draft_dataset(self, collection: str, data: Dict) -> str: status = resp.status if status == 200: metax_data = await resp.json() - LOG.info(f"Updated Metax draft dataset with ID {metax_data['identifier']} with data: {metax_dataset}") + LOG.debug(f"Updated Metax draft dataset with ID {metax_data['identifier']} with data: {metax_dataset}") return metax_data["identifier"] else: reason = await resp.text() @@ -166,6 +165,7 @@ async def delete_draft_dataset(self, metax_id: str) -> None: :param metax_id: Identification string pointing to Metax dataset to be deleted """ + LOG.info(f"Deleting Metax draft dataset {metax_id}") async with ClientSession() as sess: resp = await sess.delete( f"{self.metax_url}{self.rest_route}/{metax_id}", @@ -173,20 +173,23 @@ async def delete_draft_dataset(self, metax_id: str) -> None: ) status = resp.status if status == 204: - LOG.info(f"Deleted draft dataset {metax_id} from Metax service") + LOG.debug(f"Deleted draft dataset {metax_id} from Metax service") else: reason = await resp.text() raise self.process_error(status, reason) - async def update_dataset_with_doi_info(self, doi_info: Dict, metax_ids: List) -> None: + async def update_dataset_with_doi_info(self, doi_info: Dict, _metax_ids: List) -> None: """Update dataset for publishing. :param doi_info: Dict containing info to complete metax dataset metadata :param metax_id: Metax id of dataset to be updated """ - LOG.info("Updating object metax metadata with doi info") + LOG.info( + "Updating metadata with datacite info for Metax datasets: " + f"{','.join([id['metaxIdentifier'] for id in _metax_ids])}" + ) bulk_data = [] - for id in metax_ids: + for id in _metax_ids: async with ClientSession() as sess: resp = await sess.get( f"{self.metax_url}{self.rest_route}/{id['metaxIdentifier']}", @@ -215,7 +218,7 @@ async def update_dataset_with_doi_info(self, doi_info: Dict, metax_ids: List) -> auth=self.auth, ) if resp.status == 200: - LOG.info("Objects metadata are updated to Metax for publishing") + LOG.debug("Objects metadata are updated to Metax for publishing") return await resp.json() else: reason = await resp.text() @@ -228,6 +231,7 @@ async def publish_dataset(self, _metax_ids: List[Dict]) -> None: :param _metax_ids: List of metax IDs that include study and datasets """ + LOG.info(f"Publishing Metax datasets {','.join([id['metaxIdentifier'] for id in _metax_ids])}") for object in _metax_ids: metax_id = object["metaxIdentifier"] doi = object["doi"] @@ -252,7 +256,7 @@ async def publish_dataset(self, _metax_ids: List[Dict]) -> None: else: reason = await resp.text() raise self.process_error(status, reason) - LOG.info(f"Metax ID {object['metaxIdentifier']} was published to Metax service.") + LOG.debug(f"Metax ID {object['metaxIdentifier']} was published to Metax service.") def create_metax_dataset_data_from_study(self, data: Dict) -> Dict: """Construct Metax dataset's research dataset dictionary from Submitters Study. @@ -261,7 +265,6 @@ def create_metax_dataset_data_from_study(self, data: Dict) -> Dict: :returns: constructed research dataset """ research_dataset = self.minimal_dataset_template["research_dataset"] - research_dataset["preferred_identifier"] = data["doi"] research_dataset["title"]["en"] = data["descriptor"]["studyTitle"] research_dataset["description"]["en"] = data["descriptor"]["studyAbstract"] From f444f1ae596378372508f162b1ad4b44769bd015 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 30 Mar 2022 07:16:38 +0000 Subject: [PATCH 327/336] Add mapper class --- metadata_backend/helpers/metax_api_handler.py | 9 +- metadata_backend/helpers/metax_mapper.py | 131 ++++++++++++++++++ 2 files changed, 135 insertions(+), 5 deletions(-) create mode 100644 metadata_backend/helpers/metax_mapper.py diff --git a/metadata_backend/helpers/metax_api_handler.py b/metadata_backend/helpers/metax_api_handler.py index 88e1f36c9..d7f76c632 100644 --- a/metadata_backend/helpers/metax_api_handler.py +++ b/metadata_backend/helpers/metax_api_handler.py @@ -8,6 +8,7 @@ from ..api.operators import UserOperator from ..conf.conf import metax_config from .logger import LOG +from .metax_mapper import MetaDataMapper class MetaxServiceHandler: @@ -203,12 +204,10 @@ async def update_dataset_with_doi_info(self, doi_info: Dict, _metax_ids: List) - raise self.process_error(status, reason) # Map fields from doi info to Metax schema - + mapper = MetaDataMapper(metax_data["research_dataset"], doi_info) # creator is required field - metax_data["research_dataset"]["creator"] = self.map_creators(doi_info["creators"]) - bulk_data.append( - {"identifier": id["metaxIdentifier"], "research_dataset": metax_data["research_dataset"]} - ) + mapped_metax_data = mapper.map_metadata() + bulk_data.append({"identifier": id["metaxIdentifier"], "research_dataset": mapped_metax_data}) # for id in metax_ids: async with ClientSession() as sess: diff --git a/metadata_backend/helpers/metax_mapper.py b/metadata_backend/helpers/metax_mapper.py new file mode 100644 index 000000000..99723a81f --- /dev/null +++ b/metadata_backend/helpers/metax_mapper.py @@ -0,0 +1,131 @@ +"""Class for mapping Submitter metadata to Metax metadata.""" +from copy import deepcopy +from typing import Any, Dict, List + +from .logger import LOG + + +class MetaDataMapper: + """Methods for mapping submitter's metadata to METAX service metadata. + + This helpper class maps data from datacite, study and dataset schemas to Metax research_dataset + schema: + https://raw.githubusercontent.com/CSCfi/metax-api/master/src/metax_api/api/rest/v2/schemas/att_dataset_schema.json + """ + + { + "Person": { + "properties": { + "@type": {"type": "string", "enum": ["Person"]}, + "identifier": { + "description": "An unambiguous reference to the resource within a given context.", + "type": "string", + "format": "uri", + "example": ["http://orcid.org/0000-0002-1825-0097"], + }, + "name": { + "title": "Name", + "description": ( + "This property contains a name of the agent. This property can be repeated for different " + "versions of the name (e.g. the name in different languages)" + ), + "type": "string", + }, + "member_of": { + "description": ( + "Indicates that a person is a member of the Organization with no indication of the " + "nature of that membership or the role played." + ), + "type": "object", + "$ref": "#/definitions/Organization", + }, + "contributor_type": { + "description": "Contributor type of the Agent. Reference data from DataCite.", + "type": "array", + "items": {"type": "object", "$ref": "#/definitions/Concept"}, + }, + }, + "required": ["@type", "name", "member_of"], + }, + } + { + "Organization": { + "description": "An organization.", + "type": "object", + "properties": { + "@type": {"type": "string", "enum": ["Organization"]}, + "identifier": { + "type": "string", + "format": "uri", + "example": ["http://orcid.org/0000-0002-1825-0097"], + }, + "name": { + "type": "object", + "$ref": "#/definitions/langString", + }, + "contributor_type": { + "@id": "http://uri.suomi.fi/datamodel/ns/mrd#contributorType", + "description": ( + "Contributor type of the Organization. Based on the subset of the DataCite reference data." + ), + "type": "array", + "items": {"type": "object", "$ref": "#/definitions/Concept"}, + }, + }, + "required": ["@type"], + } + } + + def __init__(self, metax_data: Dict, data: Dict) -> None: + """Set variables. + + :param metax_data: Metax research_dataset metadata + """ + self.person: Dict[str, Any] = { + "name": "", + "@type": "Person", + "member_of": {"name": {"en": ""}, "@type": "Organization"}, + "identifier": "", + } + self.research_dataset = metax_data + self.datacite_data = data + + def map_metadata(self) -> Dict[str, Any]: + """Public class for actual mapping of metadata fields. + + :returns: Research dataset + """ + LOG.info("Mapping datasite data to Metax metadata") + LOG.debug("Data incomming for mapping: ", self.datacite_data) + for key, value in self.datacite_data.items(): + if key == "creators": + self._map_creators(value) + return self.research_dataset + + def _map_creators(self, creators: List) -> None: + """Map creators. + + :param creators: Creators data from datacite + """ + LOG.info("Mapping creator") + LOG.debug(creators) + self.research_dataset["creator"] = [] + for creator in creators: + metax_creator = deepcopy(self.person) + metax_creator["name"] = creator["name"] + # Metax schema accepts only one affiliation per creator + # so we take first one + if creator.get("affiliation", None): + affiliation = creator["affiliation"][0] + metax_creator["member_of"]["name"]["en"] = affiliation["name"] + if affiliation.get("affiliationIdentifier"): + metax_creator["member_of"]["identifier"] = affiliation["affiliationIdentifier"] + # Metax schema accepts only one identifier per creator + # so we take first one + else: + del metax_creator["member_of"] + if creator.get("nameIdentifiers", None) and creator["nameIdentifiers"][0].get("nameIdentifier", None): + metax_creator["identifier"] = creator["nameIdentifiers"][0]["nameIdentifier"] + else: + del metax_creator["identifier"] + self.research_dataset["creator"].append(metax_creator) From 6a7f2dabe2b7c2bbdba2821461ed393891b6f931 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 30 Mar 2022 07:45:57 +0000 Subject: [PATCH 328/336] Add optional fields to mapper --- metadata_backend/conf/conf.py | 9 +- .../metax_references/identifier_types.json | 328 ++++++++++++++ metadata_backend/helpers/metax_mapper.py | 418 +++++++++++++++++- setup.py | 1 + tests/test_files/doi/test_doi.json | 2 +- 5 files changed, 754 insertions(+), 4 deletions(-) create mode 100644 metadata_backend/conf/metax_references/identifier_types.json diff --git a/metadata_backend/conf/conf.py b/metadata_backend/conf/conf.py index 417861208..562fddc93 100644 --- a/metadata_backend/conf/conf.py +++ b/metadata_backend/conf/conf.py @@ -31,10 +31,11 @@ and inserted here in projects Dockerfile. """ +import json import os from distutils.util import strtobool from pathlib import Path -from typing import Tuple +from typing import Dict, Tuple import ujson from motor.motor_asyncio import AsyncIOMotorClient @@ -171,3 +172,9 @@ def create_db_client() -> AsyncIOMotorClient: "publish_route": "/rpc/v2/datasets/publish_dataset", "catalog_pid": "urn:nbn:fi:att:data-catalog-sd", } + +metax_reference_data: Dict = {"identifier_types": {}} +with open("metadata_backend/conf/metax_references/identifier_types.json", "r") as codes: + codes_list = json.load(codes)["codes"] + for code in codes_list: + metax_reference_data["identifier_types"][code["codeValue"].lower()] = code["uri"] diff --git a/metadata_backend/conf/metax_references/identifier_types.json b/metadata_backend/conf/metax_references/identifier_types.json new file mode 100644 index 000000000..687ba58b9 --- /dev/null +++ b/metadata_backend/conf/metax_references/identifier_types.json @@ -0,0 +1,328 @@ +{ + "id": "973426a0-a333-4c70-90db-caea89f4e164", + "codeValue": "identifier_type", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type", + "codesUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/", + "extensionsUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/extensions/", + "codes": [ + { + "id": "7196a8a3-7e12-4fdd-81ef-63de795d1c4f", + "codeValue": "ark", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/ark", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/ark", + "status": "DRAFT", + "order": 61, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:39.852Z", + "modified": "2018-11-12T09:47:41.115Z", + "prefLabel": { + "en": "Archival Resource Key (ARK)" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/ark/members/" + }, + { + "id": "3a484add-3b4e-444d-b24c-c40a97ef0267", + "codeValue": "doi", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/doi", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/doi", + "status": "DRAFT", + "order": 62, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:39.879Z", + "modified": "2018-11-12T09:47:41.115Z", + "prefLabel": { + "en": "Digital Object Identifier" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/doi/members/" + }, + { + "id": "d4172012-04e3-4a5f-bccc-357f1863b5f5", + "codeValue": "arxiv", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/arxiv", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/arxiv", + "status": "DRAFT", + "order": 63, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:39.898Z", + "modified": "2018-11-12T09:47:41.115Z", + "prefLabel": { + "en": "arXiv identifer" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/arxiv/members/" + }, + { + "id": "fdcd5509-cb9e-4a6a-8e52-4ee14b27ca2f", + "codeValue": "bibcode", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/bibcode", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/bibcode", + "status": "DRAFT", + "order": 64, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:39.917Z", + "modified": "2018-11-12T09:47:41.115Z", + "prefLabel": { + "en": "Astrophysics Data System Code" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/bibcode/members/" + }, + { + "id": "42c04d32-ee29-4cb4-b3d0-fd588ecd5f7f", + "codeValue": "ean13", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/ean13", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/ean13", + "status": "DRAFT", + "order": 65, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:39.936Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "European Article Number" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/ean13/members/" + }, + { + "id": "f83ccfcc-9f63-416f-b56f-f77e69aa7b92", + "codeValue": "eissn", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/eissn", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/eissn", + "status": "DRAFT", + "order": 66, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:39.955Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "Electronic International Standard Serial Number" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/eissn/members/" + }, + { + "id": "15b482c5-9a58-4700-8fb3-8ec5a3f630c7", + "codeValue": "handle", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/handle", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/handle", + "status": "DRAFT", + "order": 67, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:39.973Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "Handle - an abstact reference to a resource" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/handle/members/" + }, + { + "id": "6785fe94-32e9-45b6-8d14-8b63cc70f6f8", + "codeValue": "igsn", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/igsn", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/igsn", + "status": "DRAFT", + "order": 68, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:39.994Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "International Geo Sample Number" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/igsn/members/" + }, + { + "id": "b59b4231-a483-49da-ba3f-122394c63d5d", + "codeValue": "isbn", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/isbn", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/isbn", + "status": "DRAFT", + "order": 69, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:40.014Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "International Standard Book Number" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/isbn/members/" + }, + { + "id": "9fd8d7cb-aae9-435b-967e-5e949468774b", + "codeValue": "issn", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/issn", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/issn", + "status": "DRAFT", + "order": 70, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:40.035Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "International Standard Serial Number" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/issn/members/" + }, + { + "id": "4abf013c-40a8-4b86-a8ba-b77376bdf42f", + "codeValue": "istc", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/istc", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/istc", + "status": "DRAFT", + "order": 71, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:40.055Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "International Standard Text Code" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/istc/members/" + }, + { + "id": "eab5a081-0de4-4bcc-b7cf-da3baf527ff7", + "codeValue": "lissn", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/lissn", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/lissn", + "status": "DRAFT", + "order": 72, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:40.075Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "The linking ISSN" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/lissn/members/" + }, + { + "id": "3f02f4e2-602d-446c-b3b6-b116074b27ac", + "codeValue": "lsid", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/lsid", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/lsid", + "status": "DRAFT", + "order": 73, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:40.095Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "Life Science Identifier" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/lsid/members/" + }, + { + "id": "64caad08-8e10-4159-916a-8abdb31c5d4b", + "codeValue": "pmid", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/pmid", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/pmid", + "status": "DRAFT", + "order": 74, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:40.115Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "PubMed Identifier" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/pmid/members/" + }, + { + "id": "ff5bd61a-e1b1-49a2-8f55-768c9e0ef35d", + "codeValue": "purl", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/purl", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/purl", + "status": "DRAFT", + "order": 75, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:40.135Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "Persistent Uniform Resource Locator" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/purl/members/" + }, + { + "id": "f2ab3578-ba82-4fd6-ab19-8acf297d0b3d", + "codeValue": "upc", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/upc", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/upc", + "status": "DRAFT", + "order": 76, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:40.161Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "Universal Product Code" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/upc/members/" + }, + { + "id": "f75192b8-8c42-4a6d-bb2c-927963a4ebb0", + "codeValue": "url", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/url", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/url", + "status": "DRAFT", + "order": 77, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:40.185Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "Universal Resource Locator" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/url/members/" + }, + { + "id": "2914a977-931d-43e1-b5df-e470f1e71378", + "codeValue": "uri", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/uri", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/uri", + "status": "DRAFT", + "order": 78, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:40.206Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "Universal Resource Identifier" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/uri/members/" + }, + { + "id": "8c9ca578-c4cd-4efe-be79-124f3aca9e44", + "codeValue": "urn", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/urn", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/urn", + "status": "DRAFT", + "order": 79, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:40.226Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "Uniform Resource Name" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/urn/members/" + }, + { + "id": "07dbe700-dd43-4660-8759-1850e33622ba", + "codeValue": "orcid", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/orcid", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/orcid", + "status": "DRAFT", + "order": 80, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:40.247Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "Open Researcher and Contributor Identifier" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/orcid/members/" + }, + { + "id": "1e985250-971b-4955-9992-0201e30e2d79", + "codeValue": "virta", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/virta", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/virta", + "status": "DRAFT", + "order": 81, + "hierarchyLevel": 1, + "created": "2020-11-05T07:53:40.581Z", + "modified": "2020-11-05T07:53:40.581Z", + "statusModified": "2020-11-05T07:53:40.581Z", + "prefLabel": { + "en": "VIRTA ID for publication", + "fi": "VIRTA ID julkaisulle" + }, + "shortName": "VIRTA-ID", + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/virta/members/" + } + ] +} diff --git a/metadata_backend/helpers/metax_mapper.py b/metadata_backend/helpers/metax_mapper.py index 99723a81f..3081953b4 100644 --- a/metadata_backend/helpers/metax_mapper.py +++ b/metadata_backend/helpers/metax_mapper.py @@ -2,6 +2,7 @@ from copy import deepcopy from typing import Any, Dict, List +from ..conf.conf import metax_reference_data from .logger import LOG @@ -75,20 +76,274 @@ class MetaDataMapper: "required": ["@type"], } } + { + "Concept": { + "description": "An idea or notion; a unit of thought.", + "type": "object", + "properties": { + "identifier": { + "description": "This is the IRI identifier for the concept", + "type": "string", + "format": "uri", + }, + "pref_label": { + "description": ( + "The preferred lexical label for a resource, in a given language. A resource has no more than " + "one value of skos:prefLabel per language tag, and no more than one value of skos:prefLabel " + "without language tag. The range of skos:prefLabel is the class of RDF plain literals. " + "skos:prefLabel, skos:altLabel and skos:hiddenLabel are pairwise disjoint properties." + ), + "type": "object", + "$ref": "#/definitions/langString", + }, + "definition": { + "description": "A statement or formal explanation of the meaning of a concept.", + "type": "object", + "$ref": "#/definitions/langString", + }, + "in_scheme": { + "description": ( + "Relates a resource (for example a concept) to a concept scheme in which it is included." + ), + "type": "string", # "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/doi", + "format": "uri", + }, + "required": ["identifier"], + }, + } + } + { + "ResearchDataset": { + # DOI + "preferred_identifier": { + "type": "string", + "format": "uri", + }, + # dates - Modified (date-time+zone) + "modified": { + "type": "string", + "format": "date-time", + }, + # dates - Issued (date) + "issued": { + "type": "string", + "format": "date", + }, + # object - title + "title": { + "type": "object", + "$ref": "#/definitions/langString", + }, + # keywords + "keyword": { + "type": "array", + "items": {"minLength": 1, "type": "string"}, + }, + # object - description/abstract + "description": { + "type": "object", + "$ref": "#/definitions/langString", + }, + # alternateIdentifiers + "other_identifier": { + "type": "array", + "items": { + "notation": { + "description": "Literal value of the identifier", + "type": "string", + }, + "type": { + "description": "a type of the identifier", + "type": "object", + "items": { + "identifier": { + "description": "This is the IRI identifier for the concept", + "type": "string", + "format": "uri", + }, + "in_scheme": "http://uri.suomi.fi/codelist/fairdata/identifier_type", + }, + }, + }, + "required": ["notation"], + }, + # CSC / contributors - Distributor + "publisher": { + "type": "object", + "$ref": "#/definitions/ResearchAgent", + }, + # creators + "creator": { + "type": "array", + "items": {"type": "object", "$ref": "#/definitions/ResearchAgent"}, + }, + # contributors (vs rights_holder, curator) + "contributor": { + "type": "array", + "items": {"type": "object", "$ref": "#/definitions/ResearchAgent"}, + }, + # TODO: need more info + # the only field with FUNDER, study + "is_output_of": { + "title": "Producer project", + "description": "A project that has caused the dataset to be created", + "type": "array", + "items": {"type": "object", "$ref": "#/definitions/Project"}, + }, + # contributor - Rights Holder + # can this be also organisation? + "rights_holder": { + "type": "array", + "items": {"type": "object", "$ref": "#/definitions/ResearchAgent"}, + }, + # study/dataset type + # not mappable as Metax requires identifier from preconfigured list + "theme": { + "type": "array", + "items": {"type": "object", "$ref": "#/definitions/Concept"}, + }, + # language + # cannot be mapped to Metax unless we take Lexvo schema in to use + "language": { + "type": "array", + "items": { + "type": "object", + "item": { + "title": { + "description": ( + "A name of the Linguistic System. Name is given as localized text from IETF language " + "codes. In case text cannot be localixed 'zxx' or 'und' language codes must be used." + ), + "type": "object", + "$ref": "#/definitions/langString", + }, + "identifier": { + "description": ( + "Recommended best practice is to identify the resource by means of a string conforming " + "to a formal identification system. \n\nAn unambiguous reference to the resource " + "within a given context." + ), + "type": "string", + "format": "uri", + }, + }, + }, + }, + # geoLocations + "spatial": { + "geographic_name": { + "description": ( + "A geographic name is a proper noun applied to a spatial object. Taking the example used in " + "the relevant INSPIRE data specification (page 18), the following are all valid geographic " + "names for the Greek capital:" + "- Αθήνα (the Greek endonym written in the Greek script)" + "- Athína (the standard Romanisation of the endonym)" + "- Athens (the English language exonym)" + "For INSPIRE-conformant data, provide the metadata for the geographic name using " + "a skos:Concept as a datatype." + ), + "type": "string", + }, + "as_wkt": { + "title": "Geometry", + "description": "Supported format for geometry is WKT string in WGS84 coordinate system.", + "type": "array", + "example": [ + "POLYGON((-122.358 47.653, -122.348 47.649, -122.348 47.658, -122.358 47.658, -122.358 47.653))" + ], + "items": {"minLength": 1, "type": "string"}, + }, + }, + # dates - Collected (date-time+zone) + "temporal": { + "type": "array", + "items": {"type": "object", "$ref": "#/definitions/PeriodOfTime"}, + }, + # dataset from same folder/submission ? + "relation": { + "type": "array", + "items": { + "type": "object", + "required": ["relation_type", "entity"], + "item": { + "entity": { + "type": "object", + "item": { + "title": { + "description": "A name given to the resource.", + "type": "object", + "$ref": "#/definitions/langString", + }, + "description": { + "description": "An account of the resource.", + "type": "object", + "$ref": "#/definitions/langString", + }, + "identifier": { + "description": "Recommended best practice is to identify the resource by means of " + "a string conforming to a formal identification system. An unambiguous reference " + "to the resource within a given context.", + "type": "string", + "format": "uri", + }, + "type": { + "description": "Type of the entity, for example: API, Application, News article, " + "paper, post or visualization.", + "type": "object", + "$ref": "#/definitions/Concept", + }, + }, + }, + "relation_type": { + "description": "Role of the influence.", + "type": "object", + "$ref": "#/definitions/Concept", + }, + }, + }, + }, + # subject Yrjö Leino + "field_of_science": { + "type": "array", + "items": {"type": "object", "$ref": "#/definitions/Concept"}, + }, + # TODO: Needs clarification + "remote_resources": { + "type": "array", + "items": {"type": "object", "$ref": "#/definitions/WebResource"}, + }, + # restricted - ask Metax team for a link to REMS + "access_rights": { + "type": "object", + "$ref": "#/definitions/RightsStatement", + }, + # contributors - Data Curator + "curator": { + "type": "array", + "items": {"type": "object", "$ref": "#/definitions/ResearchAgent"}, + }, + # sizes field need either some indication on incomming type of data or MUST be in bytes + # Though now dates schema is array so maybe data type is better solution for multiple object inputs + "total_remote_resources_byte_size": { + "type": "integer", + }, + } + } def __init__(self, metax_data: Dict, data: Dict) -> None: """Set variables. :param metax_data: Metax research_dataset metadata """ + self.research_dataset = metax_data + self.datacite_data = data + self.identifier_types = metax_reference_data["identifier_types"] self.person: Dict[str, Any] = { "name": "", "@type": "Person", "member_of": {"name": {"en": ""}, "@type": "Organization"}, "identifier": "", } - self.research_dataset = metax_data - self.datacite_data = data def map_metadata(self) -> Dict[str, Any]: """Public class for actual mapping of metadata fields. @@ -100,6 +355,16 @@ def map_metadata(self) -> Dict[str, Any]: for key, value in self.datacite_data.items(): if key == "creators": self._map_creators(value) + if key == "keywords": + self.research_dataset["keyword"] = value.split(",") + if key == "contributors": + self._map_contributors(value) + if key == "dates": + self._map_dates(value) + if key == "geoLocations": + self._map_spatial(value) + if key == "alternateIdentifiers": + self._map_other_identifier(value) return self.research_dataset def _map_creators(self, creators: List) -> None: @@ -129,3 +394,152 @@ def _map_creators(self, creators: List) -> None: else: del metax_creator["identifier"] self.research_dataset["creator"].append(metax_creator) + + def _map_contributors(self, contributors: List) -> None: + """Map contributors. + + contributors (other then Rights Holder, Data Curator, Distributor) + -> contributor + contributors Rights Holder -> rights_holder + contributors Data Curator -> curator + + :param submitter_data: Contributors data from + """ + LOG.info("Mapping contributors") + LOG.debug(contributors) + self.research_dataset["contributor"] = [] + self.research_dataset["rights_holder"] = [] + self.research_dataset["curator"] = [] + + for contributor in contributors: + metax_contributor = deepcopy(self.person) + metax_contributor["name"] = contributor["name"] + # Metax schema accepts only one affiliation per creator + # so we take first one + if contributor.get("affiliation", None): + affiliation = contributor["affiliation"][0] + metax_contributor["member_of"]["name"]["en"] = affiliation["name"] + if affiliation.get("affiliationIdentifier"): + metax_contributor["member_of"]["identifier"] = affiliation["affiliationIdentifier"] + else: + del metax_contributor["member_of"] + # Metax schema accepts only one identifier per creator + # so we take first one + if contributor.get("nameIdentifiers", None) and contributor["nameIdentifiers"][0].get( + "nameIdentifier", None + ): + metax_contributor["identifier"] = contributor["nameIdentifiers"][0]["nameIdentifier"] + else: + del metax_contributor["identifier"] + + if contributor.get("contributorType", None): + if contributor["contributorType"] == "DataCurator": + self.research_dataset["curator"].append(metax_contributor) + elif contributor["contributorType"] == "RightsHolder": + self.research_dataset["rights_holder"].append(metax_contributor) + else: + self.research_dataset["contributor"].append(metax_contributor) + + if not self.research_dataset["rights_holder"]: + del self.research_dataset["rights_holder"] + if not self.research_dataset["curator"]: + del self.research_dataset["curator"] + + def _map_dates(self, dates: List) -> None: + """Map dates. + + dates Updated -> modified + dates Issued -> issued + dates Collected -> temporal + + :param submitter_data: Dates data from datacite + """ + LOG.info("Mapping dates") + LOG.debug(dates) + self.research_dataset["temporal"] = [] + temporal_date = { + "start_date": { + "type": "string", + "format": "date-time", + }, + "end_date": { + "type": "string", + "format": "date-time", + }, + } + + # format of date must be forced + for date in dates: + date_list: List = list(filter(None, date["date"].split("/"))) + if date["dateType"] == "Issued": + self.research_dataset["issued"] = date_list[-1] + if date["dateType"] == "Updated": + self.research_dataset["modified"] = date_list[-1] + "T00:00:00+03:00" + if date["dateType"] == "Collected": + temporal_date["start_date"] = date_list[0] + "T00:00:00+03:00" + temporal_date["end_date"] = date_list[-1] + "T00:00:00+03:00" + self.research_dataset["temporal"].append(temporal_date) + + if not self.research_dataset["temporal"]: + del self.research_dataset["temporal"] + + def _map_spatial(self, locations: List) -> None: + """Map geoLocations. + + If geoLocationPoint or geoLocationBox is comming with location data + lat lon coordinates will be mapped to wkt geometric presentation. + Inputs should be in degrees as geographic coordinate system (GCS) is used here. + + :param location: GeoLocations data from datacite + """ + LOG.info("Mapping locations") + LOG.debug(locations) + + spatials = self.research_dataset["spatial"] = [] + for location in locations: + spatial: Dict = {} + spatial["as_wkt"] = [] + if location.get("geoLocationPlace", None): + spatial["geographic_name"] = location["geoLocationPlace"] + if location.get("geoLocationPoint", None): + lat = float(location["geoLocationPoint"]["pointLatitude"]) + lon = float(location["geoLocationPoint"]["pointLongitude"]) + spatial["as_wkt"].append(f"POINT({lon} {lat})") + if location.get("geoLocationBox", None): + west_lon = float(location["geoLocationBox"]["westBoundLongitude"]) + east_lon = float(location["geoLocationBox"]["eastBoundLongitude"]) + north_lat = float(location["geoLocationBox"]["northBoundLatitude"]) + south_lat = float(location["geoLocationBox"]["southBoundLatitude"]) + spatial["as_wkt"].append( + f"POLYGON(({west_lon} {north_lat}, {east_lon} {north_lat}, " + f"{east_lon} {south_lat}, {west_lon} {south_lat}, {west_lon} {north_lat}))" + ) + if not spatial["as_wkt"]: + del spatial["as_wkt"] + spatials.append(spatial) + + def _map_other_identifier(self, identifiers: List) -> None: + """Map alternateIdentifiers. + + :param location: Alternate identifiers data from datacite + """ + LOG.info("Mapping alternate identifiers") + LOG.debug(identifiers) + self.research_dataset["other_identifier"] = [] + other_identifier: Dict[str, Any] = { + "notation": "", + "type": { + "identifier": "", + "pref_label": {"en": ""}, + "in_scheme": "http://uri.suomi.fi/codelist/fairdata/identifier_type", + }, + } + for identifier in identifiers: + other_identifier["notation"] = identifier["alternateIdentifier"] + other_identifier["type"]["pref_label"]["en"] = identifier["alternateIdentifierType"] + + type = self.identifier_types[identifier["alternateIdentifierType"].lower()] + + other_identifier["type"]["identifier"] = type + + self.research_dataset["other_identifier"].append(other_identifier) diff --git a/setup.py b/setup.py index db47d9278..dbb824e36 100644 --- a/setup.py +++ b/setup.py @@ -41,6 +41,7 @@ "frontend/static/media/*", "frontend/static/css/*", "conf/schemas.json", + "conf/metax_references", ] }, include_package_data=True, diff --git a/tests/test_files/doi/test_doi.json b/tests/test_files/doi/test_doi.json index b2b0469ab..4e74372bd 100644 --- a/tests/test_files/doi/test_doi.json +++ b/tests/test_files/doi/test_doi.json @@ -40,4 +40,4 @@ "contributorType": "Researcher" } ] -} \ No newline at end of file +} From a691b7d4e7e5c74ef8cc7f43723b14174b6e766d Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 30 Mar 2022 08:17:56 +0000 Subject: [PATCH 329/336] Update tests Removed pref_label from mappings as it is added by Metax. Add research_dataset.json to metax test files. Update test_doi.json. --- metadata_backend/helpers/metax_api_handler.py | 7 +- metadata_backend/helpers/metax_mapper.py | 4 +- tests/integration/run_tests.py | 25 ++++ tests/test_files/doi/test_doi.json | 83 +++++++++++++ tests/test_files/metax/research_dataset.json | 110 ++++++++++++++++++ 5 files changed, 224 insertions(+), 5 deletions(-) create mode 100644 tests/test_files/metax/research_dataset.json diff --git a/metadata_backend/helpers/metax_api_handler.py b/metadata_backend/helpers/metax_api_handler.py index d7f76c632..d3e99a35d 100644 --- a/metadata_backend/helpers/metax_api_handler.py +++ b/metadata_backend/helpers/metax_api_handler.py @@ -41,15 +41,18 @@ def __init__(self, req: Request) -> None: "description": {"en": ""}, # default "access_rights": { - "access_type": {"identifier": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted"}, + "access_type": { + "in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type", + "identifier": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted", + } }, # default "publisher": { - "@type": "Organization", "name": { "en": "CSC Sensitive Data Services for Research", "fi": "CSC:n Arkaluonteisen datan palveluiden aineistokatalogi", }, + "@type": "Organization", }, }, } diff --git a/metadata_backend/helpers/metax_mapper.py b/metadata_backend/helpers/metax_mapper.py index 3081953b4..6ed60cc2e 100644 --- a/metadata_backend/helpers/metax_mapper.py +++ b/metadata_backend/helpers/metax_mapper.py @@ -527,16 +527,14 @@ def _map_other_identifier(self, identifiers: List) -> None: LOG.debug(identifiers) self.research_dataset["other_identifier"] = [] other_identifier: Dict[str, Any] = { - "notation": "", "type": { "identifier": "", - "pref_label": {"en": ""}, "in_scheme": "http://uri.suomi.fi/codelist/fairdata/identifier_type", }, + "notation": "", } for identifier in identifiers: other_identifier["notation"] = identifier["alternateIdentifier"] - other_identifier["type"]["pref_label"]["en"] = identifier["alternateIdentifierType"] type = self.identifier_types[identifier["alternateIdentifierType"].lower()] diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index 6205f7328..1cee4f11c 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -1032,6 +1032,31 @@ async def test_metax_publish_dataset(sess, folder_id): metax_res = await metax_resp.json() assert metax_res["state"] == "published" + # this data is synced with /test_files/doi/test_doi.json + # if data changes inside the file it must data must be reflected here + expected_rd = json.loads(await create_request_json_data("metax", "research_dataset.json")) + actual_rd = metax_res["research_dataset"] + + title = res["title"] if schema == "dataset" else res["descriptor"]["studyTitle"] + description = res["description"] if schema == "dataset" else res["descriptor"]["studyAbstract"] + + assert actual_rd["title"]["en"] == title + assert actual_rd["description"]["en"] == description + assert actual_rd["creator"] == expected_rd["creator"] + assert ( + actual_rd["access_rights"]["access_type"]["identifier"] + == expected_rd["access_rights"]["access_type"]["identifier"] + ) + assert actual_rd["contributor"] == expected_rd["contributor"] + assert actual_rd["curator"] == expected_rd["curator"] + assert actual_rd["issued"] == expected_rd["issued"] + assert actual_rd["modified"] == expected_rd["modified"] + assert actual_rd["other_identifier"][0]["notation"] == expected_rd["other_identifier"][0]["notation"] + assert actual_rd["publisher"] == expected_rd["publisher"] + assert actual_rd["rights_holder"] == expected_rd["rights_holder"] + assert actual_rd["spatial"] == expected_rd["spatial"] + assert actual_rd["temporal"] == expected_rd["temporal"] + async def test_crud_folders_works(sess, project_id): """Test folders REST api POST, GET, PATCH, PUBLISH and DELETE reqs. diff --git a/tests/test_files/doi/test_doi.json b/tests/test_files/doi/test_doi.json index 4e74372bd..dfa38fb4a 100644 --- a/tests/test_files/doi/test_doi.json +++ b/tests/test_files/doi/test_doi.json @@ -38,6 +38,89 @@ } ], "contributorType": "Researcher" + }, + { + "name": "Curator, Test", + "nameType": "Personal", + "givenName": "Test", + "familyName": "Curator", + "affiliation": [ + { + "name": "affiliation place", + "schemeUri": "https://ror.org", + "affiliationIdentifier": "https://ror.org/test3", + "affiliationIdentifierScheme": "ROR" + } + ], + "contributorType": "Data Curator" + }, + { + "name": "Rights, Holder", + "nameType": "Personal", + "givenName": "Rights", + "familyName": "Holder", + "affiliation": [ + { + "name": "affiliation place", + "schemeUri": "https://ror.org", + "affiliationIdentifier": "https://ror.org/test3", + "affiliationIdentifierScheme": "ROR" + } + ], + "contributorType": "Rights Holder" + } + ], + "dates": [ + { + "date": "2020-10-10/2022-01-10", + "dateType": "Collected" + }, + { + "date": "2020-10-10", + "dateType": "Issued" + }, + { + "date": "2020-10-11", + "dateType": "Issued" + }, + { + "date": "2022-01-10", + "dateType": "Updated" + }, + { + "date": "2022-01-11", + "dateType": "Updated" + }, + { + "date": "2022-01-10", + "dateType": "Available" + } + ], + "geoLocations": [ + { + "geoLocationPlace": "Helsinki" + }, + { + "geoLocationPoint": { + "pointLongitude": "24.9384", + "pointLatitude": "60.1699" + }, + "geoLocationBox": { + "westBoundLongitude": "24.8994938494", + "eastBoundLongitude": "25.1845034857", + "southBoundLatitude": "60.1396430193", + "northBoundLatitude": "60.2431299506" + } + } + ], + "language": "Assamese", + "sizes": [ + "30000" + ], + "alternateIdentifiers": [ + { + "alternateIdentifier": "arXiv:9912.12345v2", + "alternateIdentifierType": "arXiv" } ] } diff --git a/tests/test_files/metax/research_dataset.json b/tests/test_files/metax/research_dataset.json new file mode 100644 index 000000000..8b7ba8925 --- /dev/null +++ b/tests/test_files/metax/research_dataset.json @@ -0,0 +1,110 @@ +{ + "title": { + "en": "test" + }, + "issued": "2020-10-10", + "creator": [ + { + "name": "Creator, Test", + "@type": "Person", + "member_of": { + "name": { + "en": "affiliation place" + }, + "@type": "Organization", + "identifier": "https://ror.org/test1" + } + } + ], + "curator": [ + { + "name": "Curator, Test", + "@type": "Person", + "member_of": { + "name": { + "en": "affiliation place" + }, + "@type": "Organization", + "identifier": "https://ror.org/test3" + } + } + ], + "spatial": [ + { + "geographic_name": "Helsinki" + }, + { + "as_wkt": [ + "POINT(24.9384 60.1699)", + "POLYGON((24.8994938494 60.2431299506, 25.1845034857 60.2431299506, 25.1845034857 60.1396430193, 24.8994938494 60.1396430193, 24.8994938494 60.2431299506))" + ] + } + ], + "modified": "2022-01-11T00:00:00+03:00", + "temporal": [ + { + "end_date": "2022-01-10T00:00:00+03:00", + "start_date": "2020-10-10T00:00:00+03:00" + } + ], + "publisher": { + "name": { + "en": "CSC Sensitive Data Services for Research", + "fi": "CSC:n Arkaluonteisen datan palveluiden aineistokatalogi" + }, + "@type": "Organization" + }, + "contributor": [ + { + "name": "Contributor, Test", + "@type": "Person", + "member_of": { + "name": { + "en": "affiliation place" + }, + "@type": "Organization", + "identifier": "https://ror.org/test2" + } + } + ], + "description": { + "en": "some cool test description" + }, + "access_rights": { + "access_type": { + "in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type", + "identifier": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted", + "pref_label": { + "en": "Restricted use", + "fi": "Saatavuutta rajoitettu", + "und": "Saatavuutta rajoitettu" + } + } + }, + "rights_holder": [ + { + "name": "Rights, Holder", + "@type": "Person", + "member_of": { + "name": { + "en": "affiliation place" + }, + "@type": "Organization", + "identifier": "https://ror.org/test3" + } + } + ], + "other_identifier": [ + { + "type": { + "in_scheme": "http://uri.suomi.fi/codelist/fairdata/identifier_type", + "identifier": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/arxiv", + "pref_label": { + "en": "arXiv identifer", + "und": "arXiv identifer" + } + }, + "notation": "arXiv:9912.12345v2" + } + ] +} From c5825e15278c209b2a974da93c0f5097804190e4 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 30 Mar 2022 17:04:13 +0000 Subject: [PATCH 330/336] Update dates mapping for several same date type --- metadata_backend/helpers/metax_mapper.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/metadata_backend/helpers/metax_mapper.py b/metadata_backend/helpers/metax_mapper.py index 6ed60cc2e..b425c485c 100644 --- a/metadata_backend/helpers/metax_mapper.py +++ b/metadata_backend/helpers/metax_mapper.py @@ -1,5 +1,6 @@ """Class for mapping Submitter metadata to Metax metadata.""" from copy import deepcopy +from datetime import datetime from typing import Any, Dict, List from ..conf.conf import metax_reference_data @@ -472,9 +473,15 @@ def _map_dates(self, dates: List) -> None: for date in dates: date_list: List = list(filter(None, date["date"].split("/"))) if date["dateType"] == "Issued": - self.research_dataset["issued"] = date_list[-1] + if not self.research_dataset.get("issued", None) or datetime.strptime( + self.research_dataset["issued"], "%Y-%m-%d" + ) > datetime.strptime(date_list[0], "%Y-%m-%d"): + self.research_dataset["issued"] = date_list[0] if date["dateType"] == "Updated": - self.research_dataset["modified"] = date_list[-1] + "T00:00:00+03:00" + if not self.research_dataset.get("modified", None) or datetime.strptime( + self.research_dataset["modified"][:9], "%Y-%m-%d" + ) < datetime.strptime(date_list[0], "%Y-%m-%d"): + self.research_dataset["modified"] = date_list[-1] + "T00:00:00+03:00" if date["dateType"] == "Collected": temporal_date["start_date"] = date_list[0] + "T00:00:00+03:00" temporal_date["end_date"] = date_list[-1] + "T00:00:00+03:00" From 01845aa61d759465657f6ed29e641306d27494ae Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Thu, 31 Mar 2022 07:57:48 +0000 Subject: [PATCH 331/336] Fix configs for new json folder --- metadata_backend/conf/conf.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata_backend/conf/conf.py b/metadata_backend/conf/conf.py index 562fddc93..45de435f8 100644 --- a/metadata_backend/conf/conf.py +++ b/metadata_backend/conf/conf.py @@ -174,7 +174,7 @@ def create_db_client() -> AsyncIOMotorClient: } metax_reference_data: Dict = {"identifier_types": {}} -with open("metadata_backend/conf/metax_references/identifier_types.json", "r") as codes: +with open(Path(__file__).parent.parent / "conf/metax_references/identifier_types.json", "r") as codes: codes_list = json.load(codes)["codes"] for code in codes_list: metax_reference_data["identifier_types"][code["codeValue"].lower()] = code["uri"] diff --git a/setup.py b/setup.py index dbb824e36..d13bedbd6 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ "frontend/static/media/*", "frontend/static/css/*", "conf/schemas.json", - "conf/metax_references", + "conf/metax_references/*.json", ] }, include_package_data=True, From dbdcf4085b884bfcd12825ccee34265cce044f81 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 5 Apr 2022 12:08:20 +0000 Subject: [PATCH 332/336] Clean mapper class --- metadata_backend/helpers/metax_api_handler.py | 1 - metadata_backend/helpers/metax_mapper.py | 139 ++---------------- 2 files changed, 16 insertions(+), 124 deletions(-) diff --git a/metadata_backend/helpers/metax_api_handler.py b/metadata_backend/helpers/metax_api_handler.py index d3e99a35d..2ccbaea81 100644 --- a/metadata_backend/helpers/metax_api_handler.py +++ b/metadata_backend/helpers/metax_api_handler.py @@ -212,7 +212,6 @@ async def update_dataset_with_doi_info(self, doi_info: Dict, _metax_ids: List) - mapped_metax_data = mapper.map_metadata() bulk_data.append({"identifier": id["metaxIdentifier"], "research_dataset": mapped_metax_data}) - # for id in metax_ids: async with ClientSession() as sess: resp = await sess.patch( f"{self.metax_url}{self.rest_route}", diff --git a/metadata_backend/helpers/metax_mapper.py b/metadata_backend/helpers/metax_mapper.py index b425c485c..a937b29a4 100644 --- a/metadata_backend/helpers/metax_mapper.py +++ b/metadata_backend/helpers/metax_mapper.py @@ -15,104 +15,6 @@ class MetaDataMapper: https://raw.githubusercontent.com/CSCfi/metax-api/master/src/metax_api/api/rest/v2/schemas/att_dataset_schema.json """ - { - "Person": { - "properties": { - "@type": {"type": "string", "enum": ["Person"]}, - "identifier": { - "description": "An unambiguous reference to the resource within a given context.", - "type": "string", - "format": "uri", - "example": ["http://orcid.org/0000-0002-1825-0097"], - }, - "name": { - "title": "Name", - "description": ( - "This property contains a name of the agent. This property can be repeated for different " - "versions of the name (e.g. the name in different languages)" - ), - "type": "string", - }, - "member_of": { - "description": ( - "Indicates that a person is a member of the Organization with no indication of the " - "nature of that membership or the role played." - ), - "type": "object", - "$ref": "#/definitions/Organization", - }, - "contributor_type": { - "description": "Contributor type of the Agent. Reference data from DataCite.", - "type": "array", - "items": {"type": "object", "$ref": "#/definitions/Concept"}, - }, - }, - "required": ["@type", "name", "member_of"], - }, - } - { - "Organization": { - "description": "An organization.", - "type": "object", - "properties": { - "@type": {"type": "string", "enum": ["Organization"]}, - "identifier": { - "type": "string", - "format": "uri", - "example": ["http://orcid.org/0000-0002-1825-0097"], - }, - "name": { - "type": "object", - "$ref": "#/definitions/langString", - }, - "contributor_type": { - "@id": "http://uri.suomi.fi/datamodel/ns/mrd#contributorType", - "description": ( - "Contributor type of the Organization. Based on the subset of the DataCite reference data." - ), - "type": "array", - "items": {"type": "object", "$ref": "#/definitions/Concept"}, - }, - }, - "required": ["@type"], - } - } - { - "Concept": { - "description": "An idea or notion; a unit of thought.", - "type": "object", - "properties": { - "identifier": { - "description": "This is the IRI identifier for the concept", - "type": "string", - "format": "uri", - }, - "pref_label": { - "description": ( - "The preferred lexical label for a resource, in a given language. A resource has no more than " - "one value of skos:prefLabel per language tag, and no more than one value of skos:prefLabel " - "without language tag. The range of skos:prefLabel is the class of RDF plain literals. " - "skos:prefLabel, skos:altLabel and skos:hiddenLabel are pairwise disjoint properties." - ), - "type": "object", - "$ref": "#/definitions/langString", - }, - "definition": { - "description": "A statement or formal explanation of the meaning of a concept.", - "type": "object", - "$ref": "#/definitions/langString", - }, - "in_scheme": { - "description": ( - "Relates a resource (for example a concept) to a concept scheme in which it is included." - ), - "type": "string", # "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/doi", - "format": "uri", - }, - "required": ["identifier"], - }, - } - } { "ResearchDataset": { # DOI @@ -183,8 +85,8 @@ class MetaDataMapper: "type": "array", "items": {"type": "object", "$ref": "#/definitions/ResearchAgent"}, }, - # TODO: need more info - # the only field with FUNDER, study + # TODO: will be implemented later + # describes study from same folder/submission for mapped datasets "is_output_of": { "title": "Producer project", "description": "A project that has caused the dataset to be created", @@ -192,7 +94,7 @@ class MetaDataMapper: "items": {"type": "object", "$ref": "#/definitions/Project"}, }, # contributor - Rights Holder - # can this be also organisation? + # TODO: This can be an organisation at some point "rights_holder": { "type": "array", "items": {"type": "object", "$ref": "#/definitions/ResearchAgent"}, @@ -204,7 +106,7 @@ class MetaDataMapper: "items": {"type": "object", "$ref": "#/definitions/Concept"}, }, # language - # cannot be mapped to Metax unless we take Lexvo schema in to use + # TODO: cannot be mapped as is to Metax unless we take Lexvo schema in to use "language": { "type": "array", "items": { @@ -230,7 +132,7 @@ class MetaDataMapper: }, }, }, - # geoLocations + # geoLocations, MUST be WGS84 coordinates, https://epsg.io/4326 "spatial": { "geographic_name": { "description": ( @@ -260,7 +162,8 @@ class MetaDataMapper: "type": "array", "items": {"type": "object", "$ref": "#/definitions/PeriodOfTime"}, }, - # dataset from same folder/submission ? + # TODO: will be implemented later + # dataset from same folder/submission "relation": { "type": "array", "items": { @@ -303,17 +206,17 @@ class MetaDataMapper: }, }, }, - # subject Yrjö Leino + # TODO: will be implemented later "field_of_science": { "type": "array", "items": {"type": "object", "$ref": "#/definitions/Concept"}, }, - # TODO: Needs clarification + # TODO: Need clarification on necessarity of this field "remote_resources": { "type": "array", "items": {"type": "object", "$ref": "#/definitions/WebResource"}, }, - # restricted - ask Metax team for a link to REMS + # restricted "access_rights": { "type": "object", "$ref": "#/definitions/RightsStatement", @@ -323,8 +226,7 @@ class MetaDataMapper: "type": "array", "items": {"type": "object", "$ref": "#/definitions/ResearchAgent"}, }, - # sizes field need either some indication on incomming type of data or MUST be in bytes - # Though now dates schema is array so maybe data type is better solution for multiple object inputs + # TODO: will be implemented later "total_remote_resources_byte_size": { "type": "integer", }, @@ -399,12 +301,7 @@ def _map_creators(self, creators: List) -> None: def _map_contributors(self, contributors: List) -> None: """Map contributors. - contributors (other then Rights Holder, Data Curator, Distributor) - -> contributor - contributors Rights Holder -> rights_holder - contributors Data Curator -> curator - - :param submitter_data: Contributors data from + :param contributors: Contributors data from """ LOG.info("Mapping contributors") LOG.debug(contributors) @@ -449,11 +346,7 @@ def _map_contributors(self, contributors: List) -> None: def _map_dates(self, dates: List) -> None: """Map dates. - dates Updated -> modified - dates Issued -> issued - dates Collected -> temporal - - :param submitter_data: Dates data from datacite + :param dates: Dates data from datacite """ LOG.info("Mapping dates") LOG.debug(dates) @@ -495,9 +388,9 @@ def _map_spatial(self, locations: List) -> None: If geoLocationPoint or geoLocationBox is comming with location data lat lon coordinates will be mapped to wkt geometric presentation. - Inputs should be in degrees as geographic coordinate system (GCS) is used here. + Inputs MUST be WGS84 degrees coordinates as geographic coordinate system (GCS) is used here. - :param location: GeoLocations data from datacite + :param locations: GeoLocations data from datacite """ LOG.info("Mapping locations") LOG.debug(locations) @@ -528,7 +421,7 @@ def _map_spatial(self, locations: List) -> None: def _map_other_identifier(self, identifiers: List) -> None: """Map alternateIdentifiers. - :param location: Alternate identifiers data from datacite + :param identifiers: Alternate identifiers data from datacite """ LOG.info("Mapping alternate identifiers") LOG.debug(identifiers) From 3ac754393012cecb516cef75483d5e93ffa95652 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 5 Apr 2022 12:33:05 +0000 Subject: [PATCH 333/336] Update changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9517568ad..463144c15 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,12 +13,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Adds mandatory query parameter `folder` for submit endpoint POST - On actions add and modify object is added or updated to folder(submission) where it belongs with it's accession ID, schema, submission type, title and filename - Adds metax integration to submit endpoint -- Integration with Metax service #356 +- Integration with Metax service #356 #387 - Adds new local container for testing against mocked Metax API - Introduces new env vars: METAX_USER, METAX_PASS, METAX_URL for connection to Metax service - Introduces new env var DISCOVERY_URL for creating link to dataset inside Fairdata SD catalog - Adds new key metaxIdentifier to Study and Dataset collections containing metax id returned from Metax API - Adds new handler MetaxServiceHandler to take care of mapping Submitter metadata to Metax metadata and to connect to Metax API + - Adds new mapper class to adjust incoming metadata to Metax schema - Add patching of folders after object save and update operations #354 - Adds mandatory query parameter `folder` for objects endpoint POST - Object is added or updated to folder(submission) where it belongs with it's accession ID, schema, submission type, title and filename in the case of CSV and XML upload From 92bf5f44a1526c441fc8935072822594dfcf80c8 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 6 Apr 2022 09:19:49 +0000 Subject: [PATCH 334/336] Fix logging print format --- metadata_backend/helpers/metax_mapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata_backend/helpers/metax_mapper.py b/metadata_backend/helpers/metax_mapper.py index a937b29a4..876a478a3 100644 --- a/metadata_backend/helpers/metax_mapper.py +++ b/metadata_backend/helpers/metax_mapper.py @@ -254,7 +254,7 @@ def map_metadata(self) -> Dict[str, Any]: :returns: Research dataset """ LOG.info("Mapping datasite data to Metax metadata") - LOG.debug("Data incomming for mapping: ", self.datacite_data) + LOG.debug(f"Data incomming for mapping: {self.datacite_data}") for key, value in self.datacite_data.items(): if key == "creators": self._map_creators(value) From ddcb7fabb3b7451abef904d10a57aebb92d0c849 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 7 Apr 2022 06:50:19 +0000 Subject: [PATCH 335/336] Bump pre-commit from 2.17.0 to 2.18.1 Bumps [pre-commit](https://github.com/pre-commit/pre-commit) from 2.17.0 to 2.18.1. - [Release notes](https://github.com/pre-commit/pre-commit/releases) - [Changelog](https://github.com/pre-commit/pre-commit/blob/main/CHANGELOG.md) - [Commits](https://github.com/pre-commit/pre-commit/compare/v2.17.0...v2.18.1) --- updated-dependencies: - dependency-name: pre-commit dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements-dev.txt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 6735075b0..292e458a7 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -50,7 +50,7 @@ platformdirs==2.5.1 # virtualenv pluggy==1.0.0 # via tox -pre-commit==2.17.0 +pre-commit==2.18.1 # via -r requirements-dev.in py==1.11.0 # via tox @@ -78,9 +78,7 @@ tomli==2.0.1 tox==3.24.5 # via -r requirements-dev.in typing-extensions==4.1.1 - # via - # black - # mypy + # via mypy virtualenv==20.14.0 # via # pre-commit From 32f32eee78ddda7d021541e09f65fb1b5d40ae69 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 7 Apr 2022 10:07:54 +0300 Subject: [PATCH 336/336] bump version to 0.13.0 --- CHANGELOG.md | 5 ++++- docs/conf.py | 2 +- metadata_backend/__init__.py | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 463144c15..7dfb8c269 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.13.0] - 2022-04-07 + ### Added - Submission endpoint update #371 @@ -319,7 +321,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Files are also validated during submission process. -[unreleased]: https://github.com/CSCfi/metadata-submitter/compare/v0.11.0...HEAD +[unreleased]: https://github.com/CSCfi/metadata-submitter/compare/v0.13.0...HEAD +[0.13.0]: https://github.com/CSCfi/metadata-submitter/compare/v0.10.0...v0.13.0 [0.11.0]: https://github.com/CSCfi/metadata-submitter/compare/v0.10.0...v0.11.0 [0.10.0]: https://github.com/CSCfi/metadata-submitter/compare/v0.9.0...v0.10.0 [0.9.0]: https://github.com/CSCfi/metadata-submitter/compare/v0.8.1...v0.9.0 diff --git a/docs/conf.py b/docs/conf.py index af2536b80..b0cf682cb 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -11,7 +11,7 @@ author = "CSC Developers" # The full version, including alpha/beta/rc tags -release = "0.11.0" +release = "0.13.0" # -- General configuration --------------------------------------------------- diff --git a/metadata_backend/__init__.py b/metadata_backend/__init__.py index b5a0f65d8..531cfeab9 100644 --- a/metadata_backend/__init__.py +++ b/metadata_backend/__init__.py @@ -1,5 +1,5 @@ """Backend for submitting and validating XML Files containing ENA metadata.""" __title__ = "metadata_backend" -__version__ = VERSION = "0.11.0" +__version__ = VERSION = "0.13.0" __author__ = "CSC Developers"