From ba062086c89257a01709f17b49b78351946c40bd Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Thu, 2 Dec 2021 15:32:47 +0000 Subject: [PATCH 1/9] Adds indexing on database initialization Adds docker-compose configuration for mongo container to run script on newly initialized database. Adds js script to build database collections and indexes. --- .env.example | 58 +++++++++++++++++++++--------------------- Dockerfile-dev | 2 +- docker-compose-tls.yml | 1 + docker-compose.yml | 1 + scripts/init_mongo.js | 17 +++++++++++++ 5 files changed, 49 insertions(+), 30 deletions(-) create mode 100644 scripts/init_mongo.js diff --git a/.env.example b/.env.example index 3c1fb5dd9..7b9861ec9 100644 --- a/.env.example +++ b/.env.example @@ -1,33 +1,33 @@ - # authentication - AAI_CLIENT_SECRET=secret_must_be_long - AAI_CLIENT_ID=aud2 - ISS_URL=http://mockauth:8000 - AUTH_URL=http://localhost:8000/authorize - OIDC_URL=http://mockauth:8000 - AUTH_REFERER=http://mockauth:8000 - JWK_URL=http://mockauth:8000/keyset +# authentication +AAI_CLIENT_SECRET=secret_must_be_long +AAI_CLIENT_ID=aud2 +ISS_URL=http://mockauth:8000 +AUTH_URL=http://localhost:8000/authorize +OIDC_URL=http://mockauth:8000 +AUTH_REFERER=http://mockauth:8000 +JWK_URL=http://mockauth:8000/keyset - # app urls - BASE_URL=http://localhost:5430 - # change to http://frontend:3000 if started using docker-compose for frontend - REDIRECT_URL=http://localhost:3000 +# app urls +BASE_URL=http://localhost:5430 +# change to http://frontend:3000 if started using docker-compose for frontend +REDIRECT_URL=http://localhost:3000 - # logging - LOG_LEVEL=DEBUG +# logging +LOG_LEVEL=DEBUG - # database - MONGO_HOST=database:27017 - MONGO_DATABASE=default - MONGO_AUTHDB=admin - MONGO_INITDB_ROOT_PASSWORD=admin - MONGO_INITDB_ROOT_USERNAME=admin - MONGO_SSL=true - MONGO_SSL_CA=/tls/cacert - MONGO_SSL_CLIENT_KEY=/tls/key - MONGO_SSL_CLIENT_CERT=/tls/cert +# database +MONGO_HOST=database:27017 +MONGO_DATABASE=default +MONGO_AUTHDB=admin +MONGO_INITDB_ROOT_PASSWORD=admin +MONGO_INITDB_ROOT_USERNAME=admin +MONGO_SSL=true +MONGO_SSL_CA=/tls/cacert +MONGO_SSL_CLIENT_KEY=/tls/key +MONGO_SSL_CLIENT_CERT=/tls/cert - # doi - DOI_API=http://mockdoi:8001/dois - DOI_PREFIX=10.xxxx - DOI_USER=user - DOI_KEY=key +# doi +DOI_API=http://mockdoi:8001/dois +DOI_PREFIX=10.xxxx +DOI_USER=user +DOI_KEY=key diff --git a/Dockerfile-dev b/Dockerfile-dev index af0cefe12..fefef7fb5 100644 --- a/Dockerfile-dev +++ b/Dockerfile-dev @@ -27,7 +27,7 @@ FROM appbase as local #======================= COPY requirements-dev.txt . -COPY ./scripts/ ./scripts +COPY ./scripts/install-hooks.sh ./scripts/install-hooks.sh RUN pip install --no-cache-dir -r requirements.txt RUN pip install --no-cache-dir -r requirements-dev.txt diff --git a/docker-compose-tls.yml b/docker-compose-tls.yml index b1dac31a2..8baa04cbc 100644 --- a/docker-compose-tls.yml +++ b/docker-compose-tls.yml @@ -49,6 +49,7 @@ services: volumes: - data:/data/db - ./config:/tls + - ./scripts/init_mongo.js:/docker-entrypoint-initdb.d/init_mongo.js:ro expose: - 27017 ports: diff --git a/docker-compose.yml b/docker-compose.yml index e1fd117d5..78de48024 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -42,6 +42,7 @@ services: - "MONGO_INITDB_ROOT_PASSWORD=${MONGO_INITDB_ROOT_PASSWORD}" volumes: - data:/data/db + - ./scripts/init_mongo.js:/docker-entrypoint-initdb.d/init_mongo.js:ro expose: - 27017 ports: diff --git a/scripts/init_mongo.js b/scripts/init_mongo.js new file mode 100644 index 000000000..e93e9ee53 --- /dev/null +++ b/scripts/init_mongo.js @@ -0,0 +1,17 @@ +// script to create default database collections and indexes +// on container start up + +db = new Mongo().getDB("default"); + +db.createCollection('user', { capped: false }); +db.createCollection('folder', { capped: false }); +db.folder.createIndex({ "dateCreated": -1 }); +db.folder.createIndex({ "datePublished": -1 }); +db.folder.createIndex({ "folderId": 1, unique: 1 }); +db.user.createIndex({ "userId": 1, unique: 1 }); +db.folder.createIndex( + { + text_name: "text", + } +) +db.folder.getIndexes() From 0fe6f052752ab0eea0d2f1bda841a9eff915907d Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 7 Dec 2021 15:25:42 +0000 Subject: [PATCH 2/9] Add black formatting argument --- .devcontainer/devcontainer.json | 1 + 1 file changed, 1 insertion(+) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 20c4dd801..13140fc03 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -17,6 +17,7 @@ } }, "python.formatting.provider": "black", + "python.formatting.blackArgs": ["--line-length", "120"], "python.languageServer": "Pylance", "python.linting.flake8Enabled": true, "python.linting.pylintEnabled": true, From 3e781bbd08fe69b247b208fe8c1cccd71d533ef1 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 7 Dec 2021 15:29:07 +0000 Subject: [PATCH 3/9] Add new field text_name to folder collection Folder is updated with new field text_name which is populated during folder creation from folder name as delimiter splited words. This ensures filtering name with string. Schema is updated with new field. --- metadata_backend/api/operators.py | 5 +++-- metadata_backend/helpers/schemas/folders.json | 6 +++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 8c3d014eb..90493f842 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -1,10 +1,10 @@ """Operators for handling database-related operations.""" import re +import time from abc import ABC, abstractmethod from datetime import datetime from typing import Any, Dict, List, Tuple, Union from uuid import uuid4 -import time from aiohttp import web from dateutil.relativedelta import relativedelta @@ -12,7 +12,7 @@ from multidict import MultiDictProxy from pymongo.errors import ConnectionFailure, OperationFailure -from ..conf.conf import query_map, mongo_database +from ..conf.conf import mongo_database, query_map from ..database.db_service import DBService, auto_reconnect from ..helpers.logger import LOG from ..helpers.parser import XMLToJSONParser @@ -658,6 +658,7 @@ async def create_folder(self, data: Dict) -> str: """ folder_id = self._generate_folder_id() data["folderId"] = folder_id + data["text_name"] = " ".join(re.split("[\\W_]", data["name"])) data["published"] = False data["dateCreated"] = int(time.time()) data["metadataObjects"] = data["metadataObjects"] if "metadataObjects" in data else [] diff --git a/metadata_backend/helpers/schemas/folders.json b/metadata_backend/helpers/schemas/folders.json index 942f602bd..cda2eab7c 100644 --- a/metadata_backend/helpers/schemas/folders.json +++ b/metadata_backend/helpers/schemas/folders.json @@ -14,6 +14,10 @@ "type": "string", "title": "Folder Name" }, + "text_name": { + "type": "string", + "title": "Searchable Folder Name, used for indexing" + }, "description": { "type": "string", "title": "Folder Description" @@ -1023,4 +1027,4 @@ } }, "additionalProperties": false -} \ No newline at end of file +} From c24b6f5cb7a349a58efb127489c38351de702b58 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 7 Dec 2021 15:30:57 +0000 Subject: [PATCH 4/9] Add folder query by name --- metadata_backend/api/handlers.py | 28 ++++++++++++++-------------- metadata_backend/api/operators.py | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/metadata_backend/api/handlers.py b/metadata_backend/api/handlers.py index 508bd0f83..96bbb1cba 100644 --- a/metadata_backend/api/handlers.py +++ b/metadata_backend/api/handlers.py @@ -1,33 +1,29 @@ """Handle HTTP methods for server.""" -import ujson import json -import re import mimetypes +import re from collections import Counter +from datetime import date, datetime +from distutils.util import strtobool from math import ceil from pathlib import Path -from typing import Dict, List, Tuple, Union, cast, AsyncGenerator, Any -from datetime import date, datetime +from typing import Any, AsyncGenerator, Dict, List, Tuple, Union, cast +import ujson from aiohttp import BodyPartReader, web from aiohttp.web import Request, Response -from multidict import CIMultiDict from motor.motor_asyncio import AsyncIOMotorClient -from multidict import MultiDict, MultiDictProxy +from multidict import CIMultiDict, MultiDict, MultiDictProxy from xmlschema import XMLSchemaException -from distutils.util import strtobool - -from .middlewares import decrypt_cookie, get_session -from ..conf.conf import schema_types +from ..conf.conf import aai_config, publisher, schema_types +from ..helpers.doi import DOIHandler from ..helpers.logger import LOG from ..helpers.parser import XMLToJSONParser from ..helpers.schema_loader import JSONSchemaLoader, SchemaNotFoundException, XMLSchemaLoader from ..helpers.validator import JSONValidator, XMLValidator -from ..helpers.doi import DOIHandler -from .operators import FolderOperator, Operator, XMLOperator, UserOperator - -from ..conf.conf import aai_config, publisher +from .middlewares import decrypt_cookie, get_session +from .operators import FolderOperator, Operator, UserOperator, XMLOperator class RESTAPIHandler: @@ -705,6 +701,10 @@ async def get_folders(self, req: Request) -> Response: reason = "'published' parameter must be either 'true' or 'false'" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) + if "name" in req.query: + name_param = req.query.get("name", "") + if name_param: + folder_query = {"$text": {"$search": name_param}} folder_operator = FolderOperator(db_client) folders, total_folders = await folder_operator.query_folders(folder_query, page, per_page) diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 90493f842..88ed8f5b7 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -692,7 +692,7 @@ async def query_folders(self, query: Dict, page_num: int, page_size: int) -> Tup {"$sort": {"dateCreated": -1}}, {"$skip": skips}, {"$limit": page_size}, - {"$project": {"_id": 0}}, + {"$project": {"_id": 0, "text_name": 0}}, ] data_raw = await self.db_service.do_aggregate("folder", _query) From a7443ce6cca8579dcf65f7e881b4cd8dc45e66bd Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Wed, 8 Dec 2021 13:08:58 +0000 Subject: [PATCH 5/9] Add openapi specs --- docs/specification.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/specification.yml b/docs/specification.yml index 24b746549..7fd9070b3 100644 --- a/docs/specification.yml +++ b/docs/specification.yml @@ -897,7 +897,13 @@ paths: name: published schema: type: string - description: Return folders based on the folder published value + description: Return folders based on the folder published value. Should be 'true' or 'false' + - in: query + name: name + schema: + type: string + description: Return folders containing filtered string[s] in their name + example: test folder responses: 200: description: OK From 12373fa000f2e32616a6f21bca48c2dbe25b1fae Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Thu, 9 Dec 2021 13:49:10 +0000 Subject: [PATCH 6/9] Python scripts for database operations Adds python script for collections and indexes creation to be run if database is destroyed. Updates clean_db script with new functionality to only delete documents from collection. Old functionality ergo dropping database can be run with flag `--purge`. --- tests/integration/clean_db.py | 41 +++++++++++++---- tests/integration/mongo_indexes.py | 72 ++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 8 deletions(-) create mode 100755 tests/integration/mongo_indexes.py diff --git a/tests/integration/clean_db.py b/tests/integration/clean_db.py index 537f99e08..31b9c8dc6 100644 --- a/tests/integration/clean_db.py +++ b/tests/integration/clean_db.py @@ -3,15 +3,20 @@ To be utilised mostly for integration tests """ -from motor.motor_asyncio import AsyncIOMotorClient +import argparse import asyncio import logging -import argparse +import os + +from motor.motor_asyncio import AsyncIOMotorClient serverTimeout = 15000 connectTimeout = 15000 # === Global vars === +DATABASE = os.getenv("MONGO_DATABASE", "default") +AUTHDB = os.getenv("MONGO_AUTHDB", "admin") +HOST = os.getenv("MONGO_HOST", "admin") FORMAT = "[%(asctime)s][%(name)s][%(process)d %(processName)s][%(levelname)-8s](L:%(lineno)s) %(funcName)s: %(message)s" logging.basicConfig(format=FORMAT, datefmt="%Y-%m-%d %H:%M:%S") LOG = logging.getLogger(__name__) @@ -26,20 +31,40 @@ def create_db_client(url: str) -> AsyncIOMotorClient: return AsyncIOMotorClient(url, connectTimeoutMS=connectTimeout, serverSelectionTimeoutMS=serverTimeout) +async def purge_mongodb(url: str) -> None: + """Erase database.""" + client = create_db_client(url) + LOG.debug(f"current databases: {*await client.list_database_names(),}") + LOG.debug("=== Drop curent database ===") + await client.drop_database(DATABASE) + LOG.debug("=== DONE ===") + + async def clean_mongodb(url: str) -> None: """Clean Collection and recreate it.""" client = create_db_client(url) - LOG.debug(f"current databases: {*await client.list_database_names(),}") - LOG.debug("=== Drop any existing database ===") - await client.drop_database("default") + db = client[DATABASE] + LOG.debug(f"Database to clear: {DATABASE}") + collections = await db.list_collection_names() + LOG.debug(f"=== Collections to be cleared: {collections} ===") + LOG.debug("=== Delete all documents in all collections ===") + for col in collections: + x = await db[col].delete_many({}) + LOG.debug(f"{x.deleted_count}{' documents deleted'}\t{'from '}{col}") + LOG.debug("=== DONE ===") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Process some integers.") parser.add_argument("--tls", action="store_true", help="add tls configuration") + parser.add_argument("--purge", action="store_true", help="destroy database") args = parser.parse_args() - url = url = "mongodb://admin:admin@localhost:27017/default?authSource=admin" + url = f"mongodb://{AUTHDB}:{AUTHDB}@{HOST}/{DATABASE}?authSource=admin" if args.tls: _params = "?tls=true&tlsCAFile=./config/cacert&ssl_keyfile=./config/key&ssl_certfile=./config/cert" - url = f"mongodb://admin:admin@localhost:27017/default{_params}&authSource=admin" - asyncio.run(clean_mongodb(url)) + url = f"mongodb://{AUTHDB}:{AUTHDB}@{HOST}/{DATABASE}{_params}&authSource=admin" + LOG.debug(f"=== Database url {url} ===") + if args.purge: + asyncio.run(purge_mongodb(url)) + else: + asyncio.run(clean_mongodb(url)) diff --git a/tests/integration/mongo_indexes.py b/tests/integration/mongo_indexes.py new file mode 100755 index 000000000..982478ffe --- /dev/null +++ b/tests/integration/mongo_indexes.py @@ -0,0 +1,72 @@ +"""Create MongoDB default collections and indexes.""" + +import argparse +import asyncio +import logging +import os + +import pymongo +from motor.motor_asyncio import AsyncIOMotorClient +from pymongo import TEXT + +serverTimeout = 15000 +connectTimeout = 15000 + +# === Global vars === +DATABASE = os.getenv("MONGO_DATABASE", "default") +AUTHDB = os.getenv("MONGO_AUTHDB", "admin") +HOST = os.getenv("MONGO_HOST", "admin") +FORMAT = "[%(asctime)s][%(name)s][%(process)d %(processName)s][%(levelname)-8s](L:%(lineno)s) %(funcName)s: %(message)s" +logging.basicConfig(format=FORMAT, datefmt="%Y-%m-%d %H:%M:%S") +LOG = logging.getLogger(__name__) +LOG.setLevel(logging.DEBUG) + + +def create_db_client(url: str) -> AsyncIOMotorClient: + """Initialize database client for AioHTTP App. + + :returns: Coroutine-based Motor client for Mongo operations + """ + return AsyncIOMotorClient(url, connectTimeoutMS=connectTimeout, serverSelectionTimeoutMS=serverTimeout) + + +async def create_indexes(url: str) -> None: + """Clean Collection and recreate it.""" + client = create_db_client(url) + db = client[DATABASE] + LOG.debug(f"Current database: {db}") + LOG.debug("=== Create collections ===") + for col in ["folder", "user"]: + try: + await db.create_collection(col) + except pymongo.errors.CollectionInvalid as e: + LOG.debug(f"=== Collection {col} not created due to {str(e)} ===") + pass + LOG.debug("=== Create indexes ===") + + indexes = [ + db.folder.create_index([("dateCreated", -1)]), + db.folder.create_index([("folderId", 1)], unique=True), + db.folder.create_index([("text_name", TEXT)]), + db.user.create_index([("userId", 1)], unique=True), + ] + + for index in indexes: + try: + await index + except Exception as e: + LOG.debug(f"=== Indexes not created due to {str(e)} ===") + pass + LOG.debug("=== DONE ===") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Process some integers.") + parser.add_argument("--tls", action="store_true", help="add tls configuration") + args = parser.parse_args() + url = f"mongodb://{AUTHDB}:{AUTHDB}@{HOST}/{DATABASE}?authSource=admin" + if args.tls: + _params = "?tls=true&tlsCAFile=./config/cacert&ssl_keyfile=./config/key&ssl_certfile=./config/cert" + url = f"mongodb://{AUTHDB}:{AUTHDB}@{HOST}/{DATABASE}{_params}&authSource=admin" + LOG.debug(f"=== Database url {url} ===") + asyncio.run(create_indexes(url)) From e2e1070965313a8f7080914764ae6b3cf74bf6e1 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Thu, 9 Dec 2021 13:50:54 +0000 Subject: [PATCH 7/9] Add integration tests for filtering by name Adds dependencies and configs to be able running integration tests on local environment. --- .env.example | 2 +- requirements-dev.in | 3 ++- requirements-dev.txt | 2 ++ tests/integration/run_tests.py | 40 ++++++++++++++++++++++++++++++++-- 4 files changed, 43 insertions(+), 4 deletions(-) diff --git a/.env.example b/.env.example index 7b9861ec9..33b5ede87 100644 --- a/.env.example +++ b/.env.example @@ -10,7 +10,7 @@ JWK_URL=http://mockauth:8000/keyset # app urls BASE_URL=http://localhost:5430 # change to http://frontend:3000 if started using docker-compose for frontend -REDIRECT_URL=http://localhost:3000 +# REDIRECT_URL=http://localhost:3000 # logging LOG_LEVEL=DEBUG diff --git a/requirements-dev.in b/requirements-dev.in index e5a9306fa..2181f099b 100644 --- a/requirements-dev.in +++ b/requirements-dev.in @@ -1,6 +1,7 @@ +aiofiles # to run integration tests black certifi flake8 -pip-tools +pip-tools # pip depedencies management pre-commit tox diff --git a/requirements-dev.txt b/requirements-dev.txt index 49b4bcb92..b1c7301bb 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -4,6 +4,8 @@ # # pip-compile requirements-dev.in # +aiofiles==0.8.0 + # via -r requirements-dev.in backports.entry-points-selectable==1.1.1 # via virtualenv black==21.12b0 diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index d5a0183e2..028377b23 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -8,11 +8,11 @@ import asyncio import json import logging -from pathlib import Path import urllib import xml.etree.ElementTree as ET +from pathlib import Path -import aiofiles # type: ignore +import aiofiles import aiohttp from aiohttp import FormData @@ -925,6 +925,41 @@ async def test_getting_paginated_folders(sess): assert resp.status == 400 +async def test_getting_folders_filtered_by_name(sess): + """Check that /folders returns folders filtered by name. + + :param sess: HTTP session in which request call is made + """ + names = [" filter new ", "_filter_", "-filter-", "_extra-", "_2021special_"] + folders = [] + for name in names: + folder_data = {"name": f"Test{name}name", "description": "Test filtering name"} + folders.append(await post_folder(sess, folder_data)) + + async with sess.get(f"{folders_url}?name=filter") as resp: + assert resp.status == 200 + ans = await resp.json() + assert ans["page"]["totalFolders"] == 3 + + async with sess.get(f"{folders_url}?name=extra") as resp: + assert resp.status == 200 + ans = await resp.json() + assert ans["page"]["totalFolders"] == 1 + + async with sess.get(f"{folders_url}?name=2021 special") as resp: + assert resp.status == 200 + ans = await resp.json() + assert ans["page"]["totalFolders"] == 0 + + async with sess.get(f"{folders_url}?name=new extra") as resp: + assert resp.status == 200 + ans = await resp.json() + assert ans["page"]["totalFolders"] == 2 + + for folder in folders: + await delete_folder(sess, folder) + + async def test_getting_user_items(sess): """Test querying user's templates or folders in the user object with GET user request. @@ -1320,6 +1355,7 @@ async def main(): # Test getting a list of folders and draft templates owned by the user LOG.debug("=== Testing getting folders, draft folders and draft templates with pagination ===") await test_getting_paginated_folders(sess) + await test_getting_folders_filtered_by_name(sess) await test_getting_user_items(sess) # Test add, modify, validate and release action with submissions From d0c83abcc2fe84cf516b598d3b8711ea147015ad Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Thu, 9 Dec 2021 15:07:52 +0000 Subject: [PATCH 8/9] Github workflow env vars Add env vars to github workflow file as database scripts and integration tests use environment variables. --- .github/workflows/int.yml | 24 ++++++++++++++++++++++-- tests/integration/run_tests.py | 11 ++++++----- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/.github/workflows/int.yml b/.github/workflows/int.yml index dee04b94c..0da797a37 100644 --- a/.github/workflows/int.yml +++ b/.github/workflows/int.yml @@ -29,10 +29,20 @@ jobs: docker-compose --env-file .env.example up -d --build sleep 30 - - name: Run Integration test + - name: Clear database run: | python tests/integration/clean_db.py + env: + MONGO_HOST: localhost:27017 + MONGO_DATABASE: default + MONGO_AUTHDB: admin + + - name: Run Integration test + run: | python tests/integration/run_tests.py + env: + BASE_URL: http://localhost:5430 + ISS_URL: http://localhost:8000 - name: Collect logs from docker if: ${{ failure() }} @@ -58,10 +68,20 @@ jobs: docker-compose -f docker-compose-tls.yml --env-file .env.example up -d sleep 30 - - name: Run Integration test + - name: Clear database run: | python tests/integration/clean_db.py --tls + env: + MONGO_HOST: localhost:27017 + MONGO_DATABASE: default + MONGO_AUTHDB: admin + + - name: Run Integration test + run: | python tests/integration/run_tests.py + env: + BASE_URL: http://localhost:5430 + ISS_URL: http://localhost:8000 - name: Collect logs from docker if: ${{ failure() }} diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index 028377b23..bbf5a9928 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -8,6 +8,7 @@ import asyncio import json import logging +import os import urllib import xml.etree.ElementTree as ET from pathlib import Path @@ -47,8 +48,8 @@ ("experiment", "ERX000119.json", "ERX000119.json"), ("analysis", "ERZ266973.json", "ERZ266973.json"), ] -base_url = "http://localhost:5430" -mock_auth_url = "http://localhost:8000" +base_url = os.getenv("BASE_URL", "http://localhost:5430") +mock_auth_url = os.getenv("ISS_URL", "http://localhost:8000") objects_url = f"{base_url}/objects" drafts_url = f"{base_url}/drafts" templates_url = f"{base_url}/templates" @@ -357,8 +358,8 @@ async def post_folder(sess, data): """ async with sess.post(f"{folders_url}", data=json.dumps(data)) as resp: LOG.debug("Adding new folder") - assert resp.status == 201, "HTTP Status code error" ans = await resp.json() + assert resp.status == 201, f"HTTP Status code error {resp.status} {ans}" return ans["folderId"] @@ -937,13 +938,13 @@ async def test_getting_folders_filtered_by_name(sess): folders.append(await post_folder(sess, folder_data)) async with sess.get(f"{folders_url}?name=filter") as resp: - assert resp.status == 200 ans = await resp.json() + assert resp.status == 200, f"HTTP Status code error {resp.status} {ans}" assert ans["page"]["totalFolders"] == 3 async with sess.get(f"{folders_url}?name=extra") as resp: - assert resp.status == 200 ans = await resp.json() + assert resp.status == 200, f"HTTP Status code error {resp.status} {ans}" assert ans["page"]["totalFolders"] == 1 async with sess.get(f"{folders_url}?name=2021 special") as resp: From 8894f12ca9ba1b1471d69ed25c3ed3eca29496d5 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 14 Dec 2021 11:12:12 +0000 Subject: [PATCH 9/9] Add sorting optimization --- metadata_backend/api/handlers.py | 5 ++++- metadata_backend/api/operators.py | 19 ++++++++++++++++--- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/metadata_backend/api/handlers.py b/metadata_backend/api/handlers.py index 96bbb1cba..473582ae5 100644 --- a/metadata_backend/api/handlers.py +++ b/metadata_backend/api/handlers.py @@ -685,6 +685,7 @@ async def get_folders(self, req: Request) -> Response: """ page = self._get_page_param(req, "page", 1) per_page = self._get_page_param(req, "per_page", 5) + sort = {"date": True, "score": False} db_client = req.app["db_client"] user_operator = UserOperator(db_client) @@ -705,9 +706,11 @@ async def get_folders(self, req: Request) -> Response: name_param = req.query.get("name", "") if name_param: folder_query = {"$text": {"$search": name_param}} + sort["score"] = True + sort["date"] = False folder_operator = FolderOperator(db_client) - folders, total_folders = await folder_operator.query_folders(folder_query, page, per_page) + folders, total_folders = await folder_operator.query_folders(folder_query, page, per_page, sort) result = ujson.dumps( { diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 88ed8f5b7..5eacaea80 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -3,7 +3,7 @@ import time from abc import ABC, abstractmethod from datetime import datetime -from typing import Any, Dict, List, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, Union from uuid import uuid4 from aiohttp import web @@ -678,18 +678,31 @@ async def create_folder(self, data: Dict) -> str: LOG.info(f"Inserting folder with id {folder_id} to database succeeded.") return folder_id - async def query_folders(self, query: Dict, page_num: int, page_size: int) -> Tuple[List, int]: + async def query_folders( + self, query: Dict, page_num: int, page_size: int, sort_param: Optional[dict] = None + ) -> Tuple[List, int]: """Query database based on url query parameters. :param query: Dict containing query information :param page_num: Page number :param page_size: Results per page + :param sort_param: Sorting options. :returns: Paginated query result """ skips = page_size * (page_num - 1) + + if not sort_param: + sort = {"dateCreated": -1} + elif sort_param["score"] and not sort_param["date"]: + sort = {"score": {"$meta": "textScore"}, "dateCreated": -1} # type: ignore + elif sort_param["score"] and sort_param["date"]: + sort = {"dateCreated": -1, "score": {"$meta": "textScore"}} # type: ignore + else: + sort = {"dateCreated": -1} + _query = [ {"$match": query}, - {"$sort": {"dateCreated": -1}}, + {"$sort": sort}, {"$skip": skips}, {"$limit": page_size}, {"$project": {"_id": 0, "text_name": 0}},