From 7cf101df7f3fe1940725d31f8d44a4f9a41ade75 Mon Sep 17 00:00:00 2001 From: mgcam Date: Thu, 3 Mar 2022 16:57:28 +0000 Subject: [PATCH 01/37] Token-based authorization. Token-based authorization, initially for the pipeline endpoints only, the pipeline the token belongs to is not validated in the context of the request. --- server/npg/porch/auth/__init__.py | 0 server/npg/porch/auth/token.py | 15 +++++ server/npg/porch/endpoints/pipelines.py | 20 ++++-- server/npg/porchdb/auth.py | 90 +++++++++++++++++++++++++ server/npg/porchdb/connection.py | 22 +++++- server/tests/db_auth.py | 62 +++++++++++++++++ server/tests/fixtures/deploy_db.py | 3 +- server/tests/pipeline_route_test.py | 43 +++++++++--- server/tests/task_route_test.py | 9 ++- 9 files changed, 244 insertions(+), 20 deletions(-) create mode 100644 server/npg/porch/auth/__init__.py create mode 100644 server/npg/porch/auth/token.py create mode 100644 server/npg/porchdb/auth.py create mode 100644 server/tests/db_auth.py diff --git a/server/npg/porch/auth/__init__.py b/server/npg/porch/auth/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/server/npg/porch/auth/token.py b/server/npg/porch/auth/token.py new file mode 100644 index 0000000..7038c7e --- /dev/null +++ b/server/npg/porch/auth/token.py @@ -0,0 +1,15 @@ +from fastapi import Depends +from fastapi.security import HTTPBearer + +from npg.porchdb.connection import get_CredentialsValidator + +auth_scheme = HTTPBearer() + +async def validate( + creds = Depends(auth_scheme), + validator = Depends(get_CredentialsValidator) +): + token = creds.credentials + p = await validator.token2permission(token) + + return p diff --git a/server/npg/porch/endpoints/pipelines.py b/server/npg/porch/endpoints/pipelines.py index 426d8bf..a8591b7 100644 --- a/server/npg/porch/endpoints/pipelines.py +++ b/server/npg/porch/endpoints/pipelines.py @@ -28,6 +28,7 @@ from npg.porch.models.pipeline import Pipeline from npg.porchdb.connection import get_DbAccessor +from npg.porch.auth.token import validate router = APIRouter( prefix="/pipelines", @@ -43,8 +44,10 @@ async def get_pipelines( uri: Optional[str] = None, version: Optional[str] = None, - db_accessor=Depends(get_DbAccessor) + db_accessor=Depends(get_DbAccessor), + permissions=Depends(validate) ) -> List[Pipeline]: + return await db_accessor.get_all_pipelines(uri, version) @router.get( @@ -53,8 +56,12 @@ async def get_pipelines( responses={status.HTTP_404_NOT_FOUND: {"description": "Not found"}}, summary="Get information about one pipeline.", ) -async def get_pipeline(pipeline_name: str, - db_accessor=Depends(get_DbAccessor)): +async def get_pipeline( + pipeline_name: str, + db_accessor=Depends(get_DbAccessor), + permissions=Depends(validate) +) -> Pipeline: + pipeline = None try: pipeline = await db_accessor.get_pipeline_by_name(name=pipeline_name) @@ -74,7 +81,12 @@ async def get_pipeline(pipeline_name: str, status.HTTP_409_CONFLICT: {"description": "Pipeline already exists"} } ) -async def create_pipeline(pipeline: Pipeline, db_accessor=Depends(get_DbAccessor)) -> Pipeline: +async def create_pipeline( + pipeline: Pipeline, + db_accessor=Depends(get_DbAccessor), + permissions=Depends(validate) +) -> Pipeline: + new_pipeline = None try: new_pipeline = await db_accessor.create_pipeline(pipeline) diff --git a/server/npg/porchdb/auth.py b/server/npg/porchdb/auth.py new file mode 100644 index 0000000..6c59a70 --- /dev/null +++ b/server/npg/porchdb/auth.py @@ -0,0 +1,90 @@ +# Copyright (C) 2021, 2022 Genome Research Ltd. +# +# Author: Kieron Taylor kt19@sanger.ac.uk +# Author: Marina Gourtovaia mg8@sanger.ac.uk +# +# This file is part of npg_porch +# +# npg_porch is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see . + +import logging +import re +from sqlalchemy import select +from sqlalchemy.orm import contains_eager +from sqlalchemy.orm.exc import NoResultFound +from fastapi import HTTPException + +from npg.porchdb.models import Token +from npg.porch.models.permission import Permission, RolesEnum + +AUTH_TOKEN_LENGTH = 32 + +class Validator: + ''' + A validator for credentials presented by the requestor. + + Instantiate with a sqlalchemy AsyncSession + ''' + + def __init__(self, session): + self.session = session + self.logger = logging.getLogger(__name__) + + async def token2permission(self, token: str): + + message = None + + if len(token) != AUTH_TOKEN_LENGTH: + message = f"The auth token should be {AUTH_TOKEN_LENGTH} chars long" + else: + prog = re.compile(r'\w{32}', flags=re.ASCII) + if prog.match(token) is None: + message = 'Token failed character validation' + + valid_token_row = None + if message is None: + try: + result = await self.session.execute( + select(Token) + .filter_by(token=token) + .join(Token.pipeline) + .options(contains_eager(Token.pipeline)) + ) + valid_token_row = result.scalar_one() + except NoResultFound: + message = 'An unknown token is used' + + if (valid_token_row is not None) and (valid_token_row.date_revoked is not None): + message = 'A revoked token is used' + + if message: + self.logger.warning(message) + raise HTTPException(status_code=403, detail="Invalid token") + + permission = None + pipeline = valid_token_row.pipeline + token_id = valid_token_row.token_id + if pipeline is None: + permission = Permission( + role = RolesEnum.POWER_USER, + requestor_id = token_id + ) + else: + permission = Permission( + role = RolesEnum.REGULAR_USER, + requestor_id = token_id, + pipeline = pipeline.convert_to_model() + ) + + return permission diff --git a/server/npg/porchdb/connection.py b/server/npg/porchdb/connection.py index 1bc559b..64eda87 100644 --- a/server/npg/porchdb/connection.py +++ b/server/npg/porchdb/connection.py @@ -24,6 +24,7 @@ from npg.porchdb.models import Base from npg.porchdb.data_access import AsyncDbAccessor +from npg.porchdb.auth import Validator config = { 'DB_URL': os.environ.get('DB_URL'), @@ -47,13 +48,28 @@ ) async def get_DbAccessor(): - 'Provides a hook for fastapi to Depend on a DB session in each route' + ''' + Provides a hook for fastapi to Depend on a DB session in each route. + + Yields an instance of AsyncDbAccessor class, which provides an API + for access to data. + + Starts a transaction that finished automatically when the returned + object drops out of scope. + ''' async with session_factory() as session: - # Starting a transaction that finished automatically when the returned - # object drops out of scope async with session.begin(): yield AsyncDbAccessor(session) +async def get_CredentialsValidator(): + ''' + Similar to get_DbAccessor, but yields an instance of the Validator class, + which provides methods for validating credentials submitted with the + request. + ''' + async with session_factory() as session: + async with session.begin(): + yield Validator(session) async def deploy_schema(): async with engine.begin() as conn: diff --git a/server/tests/db_auth.py b/server/tests/db_auth.py new file mode 100644 index 0000000..0876242 --- /dev/null +++ b/server/tests/db_auth.py @@ -0,0 +1,62 @@ +import pytest +import datetime +from sqlalchemy import select +from fastapi.exceptions import HTTPException +from npg.porchdb.models import Token +from npg.porchdb.auth import Validator +import npg.porch.models.permission +import npg.porch.models.pipeline + +@pytest.mark.asyncio +async def test_token_string_is_valid(async_minimum): + + v = Validator(session = async_minimum) + assert isinstance(v, (npg.porchdb.auth.Validator)) + with pytest.raises(HTTPException): + await v.token2permission("") + with pytest.raises(HTTPException): + await v.token2permission('aaaa') + with pytest.raises(HTTPException): + await v.token2permission('AAAAAAAAAAAAAaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa') + with pytest.raises(HTTPException): + await v.token2permission('7dc1457531e3495?9bd5:bcda579c1c6') + +async def test_token_is_known_and_valid(async_minimum): + + v = Validator(session = async_minimum) + with pytest.raises(HTTPException): + await v.token2permission('doesnotexist11111111111111111111') + + result = await async_minimum.execute( + select(Token) + .filter_by(description='OpenStack host, job finder') + ) + token_row = result.scalar_one() + token_string = token_row.token + token_row.date_revoked = datetime.date(2022, 1, 1) + async_minimum.add(token_row) + await async_minimum.commit() + + with pytest.raises(HTTPException): + await v.token2permission(token_string) + +async def test_permission_object_is_returned(async_minimum): + + v = Validator(session = async_minimum) + result = await async_minimum.execute(select(Token)) + token_rows = result.scalars().all() + for t in token_rows: + if t.description == 'Seqfarm host, job runner': + p = await v.token2permission(t.token) + assert isinstance(p, (npg.porch.models.permission.Permission)) + assert p.pipeline is not None + assert isinstance(p.pipeline, (npg.porch.models.pipeline.Pipeline)) + assert p.pipeline.name == 'ptest one' + assert p.requestor_id == t.token_id + assert p.role == 'regular_user' + elif t.description == 'Seqfarm host, admin': + p = await v.token2permission(t.token) + assert isinstance(p, (npg.porch.models.permission.Permission)) + assert p.pipeline is None + assert p.requestor_id == t.token_id + assert p.role == 'power_user' diff --git a/server/tests/fixtures/deploy_db.py b/server/tests/fixtures/deploy_db.py index 38d02ae..24edbcd 100644 --- a/server/tests/fixtures/deploy_db.py +++ b/server/tests/fixtures/deploy_db.py @@ -21,6 +21,7 @@ def minimum_data(): ) tokens = [ Token( + token='cac0533d5599489d9a3d998028a79fe8', pipeline=pipeline, description='OpenStack host, job finder' ), @@ -70,7 +71,7 @@ def lots_of_tasks(): 'A good supply of tasks for testing claims' pipeline = Pipeline( - name='ptest one', + name='ptest some', repository_uri='pipeline-test.com', version='0.3.14' ) diff --git a/server/tests/pipeline_route_test.py b/server/tests/pipeline_route_test.py index b29db0f..d5fdeec 100644 --- a/server/tests/pipeline_route_test.py +++ b/server/tests/pipeline_route_test.py @@ -2,14 +2,27 @@ from npg.porch.models import Pipeline +headers = { + 'Authorization': 'Bearer cac0533d5599489d9a3d998028a79fe8', + 'accept': 'application/json' +} def http_create_pipeline(fastapi_testclient, pipeline): - response = fastapi_testclient.post('/pipelines', json=pipeline.dict(), allow_redirects=True) + response = fastapi_testclient.post( + '/pipelines', json=pipeline.dict(), allow_redirects=True + ) + assert response.status_code == status.HTTP_403_FORBIDDEN + response = fastapi_testclient.post( + '/pipelines', json=pipeline.dict(), allow_redirects=True, + headers=headers + ) assert response.status_code == status.HTTP_201_CREATED return response.json() def test_pipeline_get(async_minimum, fastapi_testclient): response = fastapi_testclient.get('/pipelines') + assert response.status_code == status.HTTP_403_FORBIDDEN + response = fastapi_testclient.get('/pipelines', headers=headers) assert response.status_code == status.HTTP_200_OK pipeline = Pipeline.parse_obj(response.json()[0]) assert pipeline, 'Response fits into the over-the-wire model' @@ -33,12 +46,16 @@ def test_pipeline_filtered_get(async_minimum, fastapi_testclient): http_create_pipeline(fastapi_testclient, second_pipeline) http_create_pipeline(fastapi_testclient, third_pipeline) - response = fastapi_testclient.get('/pipelines?version=0.3.14') + response = fastapi_testclient.get( + '/pipelines?version=0.3.14', headers=headers + ) assert response.status_code == status.HTTP_200_OK pipes = response.json() assert len(pipes) == 3, 'All three pipelines have the same version' - response = fastapi_testclient.get('/pipelines?uri=http://test.com') + response = fastapi_testclient.get( + '/pipelines?uri=http://test.com', headers=headers + ) assert response.status_code == status.HTTP_200_OK pipes = response.json() assert len(pipes) == 1, 'Only one pipeline matches the uri' @@ -46,7 +63,9 @@ def test_pipeline_filtered_get(async_minimum, fastapi_testclient): def test_get_known_pipeline(async_minimum, fastapi_testclient): - response = fastapi_testclient.get('/pipelines/ptest one') + response = fastapi_testclient.get( + '/pipelines/ptest one', headers=headers + ) assert response.status_code == status.HTTP_200_OK pipeline = Pipeline.parse_obj(response.json()) @@ -54,11 +73,13 @@ def test_get_known_pipeline(async_minimum, fastapi_testclient): assert pipeline.name == 'ptest one' assert pipeline.version == '0.3.14' - response = fastapi_testclient.get('/pipelines/not here') + response = fastapi_testclient.get( + '/pipelines/not here', headers=headers + ) assert response.status_code == status.HTTP_404_NOT_FOUND assert response.json()['detail'] == "Pipeline 'not here' not found" -def test_create_pipeline(fastapi_testclient): +def test_create_pipeline(async_minimum, fastapi_testclient): # Create a pipeline desired_pipeline = Pipeline( name='ptest two', @@ -74,7 +95,8 @@ def test_create_pipeline(fastapi_testclient): response = fastapi_testclient.post( '/pipelines', json=desired_pipeline.dict(), - allow_redirects=True + allow_redirects=True, + headers=headers ) assert response.status_code == status.HTTP_409_CONFLICT, 'ptest two already in DB' @@ -92,10 +114,10 @@ def test_create_pipeline(fastapi_testclient): # Retrieve the same pipelines response = fastapi_testclient.get( - '/pipelines' + '/pipelines', headers=headers ) assert response.status_code == status.HTTP_200_OK - assert response.json() == [desired_pipeline, second_desired_pipeline] + assert response.json()[1:] == [desired_pipeline, second_desired_pipeline] # Create a very poorly provenanced pipeline third_desired_pipeline = Pipeline( @@ -107,7 +129,8 @@ def test_create_pipeline(fastapi_testclient): response = fastapi_testclient.post( '/pipelines', json=third_desired_pipeline.dict(), - allow_redirects=True + allow_redirects=True, + headers=headers ) assert response.status_code == status.HTTP_400_BAD_REQUEST diff --git a/server/tests/task_route_test.py b/server/tests/task_route_test.py index cb85f85..f88fad7 100644 --- a/server/tests/task_route_test.py +++ b/server/tests/task_route_test.py @@ -90,8 +90,13 @@ def test_task_update(async_minimum, fastapi_testclient): assert response.status_code == status.HTTP_404_NOT_FOUND assert response.json() == {'detail': 'Pipeline not found'} -def test_task_claim(async_tasks, fastapi_testclient): - response = fastapi_testclient.get('/pipelines/ptest one') +def test_task_claim(async_minimum, async_tasks, fastapi_testclient): + headers = { + 'Authorization': 'Bearer cac0533d5599489d9a3d998028a79fe8', + 'accept': 'application/json' + } + response = fastapi_testclient.get( + '/pipelines/ptest some', headers = headers) assert response.status_code == status.HTTP_200_OK pipeline = response.json() From ce1703500ae804bd6d624fa40fb218cec76f716b Mon Sep 17 00:00:00 2001 From: mgcam Date: Wed, 9 Mar 2022 14:23:02 +0000 Subject: [PATCH 02/37] correct name for the test file --- server/tests/{db_auth.py => db_auth_test.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename server/tests/{db_auth.py => db_auth_test.py} (100%) diff --git a/server/tests/db_auth.py b/server/tests/db_auth_test.py similarity index 100% rename from server/tests/db_auth.py rename to server/tests/db_auth_test.py From 60aa467f77065512799b53a4396a3471ce584f94 Mon Sep 17 00:00:00 2001 From: mgcam Date: Wed, 9 Mar 2022 17:04:14 +0000 Subject: [PATCH 03/37] Bug fix: need left join to link tokens to pipelines --- server/npg/porchdb/auth.py | 8 ++++++-- server/tests/db_auth_test.py | 17 ++++++++++++++++- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/server/npg/porchdb/auth.py b/server/npg/porchdb/auth.py index 6c59a70..8b2e877 100644 --- a/server/npg/porchdb/auth.py +++ b/server/npg/porchdb/auth.py @@ -1,4 +1,4 @@ -# Copyright (C) 2021, 2022 Genome Research Ltd. +# Copyright (C) 2022 Genome Research Ltd. # # Author: Kieron Taylor kt19@sanger.ac.uk # Author: Marina Gourtovaia mg8@sanger.ac.uk @@ -55,10 +55,14 @@ async def token2permission(self, token: str): valid_token_row = None if message is None: try: + # Using 'outerjoin' to get the left join for token, pipeline. + # We need to retrieve all token rows, regardless of whether + # they are linked the pipeline table or not (we are using a + # nullable foreign key to allow for no link). result = await self.session.execute( select(Token) .filter_by(token=token) - .join(Token.pipeline) + .outerjoin(Token.pipeline) .options(contains_eager(Token.pipeline)) ) valid_token_row = result.scalar_one() diff --git a/server/tests/db_auth_test.py b/server/tests/db_auth_test.py index 0876242..b0d9eb2 100644 --- a/server/tests/db_auth_test.py +++ b/server/tests/db_auth_test.py @@ -2,7 +2,8 @@ import datetime from sqlalchemy import select from fastapi.exceptions import HTTPException -from npg.porchdb.models import Token + +from npg.porchdb.models import Token, Pipeline from npg.porchdb.auth import Validator import npg.porch.models.permission import npg.porch.models.pipeline @@ -21,12 +22,14 @@ async def test_token_string_is_valid(async_minimum): with pytest.raises(HTTPException): await v.token2permission('7dc1457531e3495?9bd5:bcda579c1c6') +@pytest.mark.asyncio async def test_token_is_known_and_valid(async_minimum): v = Validator(session = async_minimum) with pytest.raises(HTTPException): await v.token2permission('doesnotexist11111111111111111111') + # Mark one of the tokens as revoked. result = await async_minimum.execute( select(Token) .filter_by(description='OpenStack host, job finder') @@ -40,11 +43,23 @@ async def test_token_is_known_and_valid(async_minimum): with pytest.raises(HTTPException): await v.token2permission(token_string) +@pytest.mark.asyncio async def test_permission_object_is_returned(async_minimum): + # The fixtures have a token not associated with any pipeline. + # To model data realistically, create a pipeline not associated + # with any token. + async_minimum.add(Pipeline( + name='ptest ten', + repository_uri='pipeline-testten.com', + version='0.3.15' + )) + await async_minimum.commit() + v = Validator(session = async_minimum) result = await async_minimum.execute(select(Token)) token_rows = result.scalars().all() + for t in token_rows: if t.description == 'Seqfarm host, job runner': p = await v.token2permission(t.token) From ced91973f8ba105fbaf71526eff520b932d48273 Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Mon, 7 Mar 2022 12:01:31 +0000 Subject: [PATCH 04/37] Provide clue when testing mode not enabled --- server/npg/porchdb/connection.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server/npg/porchdb/connection.py b/server/npg/porchdb/connection.py index 1bc559b..c705113 100644 --- a/server/npg/porchdb/connection.py +++ b/server/npg/porchdb/connection.py @@ -36,7 +36,9 @@ # config['DB_URL'] = 'sqlite+aiosqlite:///test.db' if config['DB_URL'] is None or config['DB_URL'] == '': - raise Exception("ENV['DB_URL'] must be set with a database URL") + raise Exception( + "ENV['DB_URL'] must be set with a database URL, or NPG_PORCH_MODE must be set for testing" + ) engine = create_async_engine( config['DB_URL'], future=True From 5adeeba743afdf2804c1e64e8ec0950bb32597f4 Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Mon, 7 Mar 2022 12:04:02 +0000 Subject: [PATCH 05/37] Remove spurious test mark --- server/tests/data_access_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/server/tests/data_access_test.py b/server/tests/data_access_test.py index 2cc0ab2..5a23c37 100644 --- a/server/tests/data_access_test.py +++ b/server/tests/data_access_test.py @@ -19,7 +19,6 @@ def give_me_a_pipeline(number: int = 1): async def store_me_a_pipeline(dac: AsyncDbAccessor, number: int = 1) -> ModelledPipeline: return await dac.create_pipeline(give_me_a_pipeline(number)) -@pytest.mark.asyncio def test_data_accessor_setup(async_session): with pytest.raises(TypeError): dac = AsyncDbAccessor() From 1192809fee38092149993e7ab136130965435d28 Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Wed, 9 Mar 2022 17:32:11 +0000 Subject: [PATCH 06/37] Document some settings for production use --- README.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/README.md b/README.md index 2ea938d..3aa8642 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,27 @@ and open your browser at `http://localhost:8080` to see links to the docs. The server will not start without `DB_URL` in the environment +## Running in production + +When you want HTTPS, logging and all that jazz: + +```bash +uvicorn main:app --workers 2 --host 0.0.0.0 --port 8080 --log-config ~/logging.json --ssl-keyfile ~/.ssh/key.pem --ssl-certfile ~/.ssh/cert.pem --ssl-ca-certs /usr/local/share/ca-certificates/institute_ca.crt +``` + +Consider running with nohup or similar. + +Some notes on arguments: +--workers: How many pre-forks to run. Async should mean we don't need many. Directly increases memory consumption + +--host: 0.0.0.0 = bind to all network interfaces. Reliable but greedy in some situations + +--log-config: Refers to a JSON file for python logging library. An example file is found in /server/logging.json. Uvicorn provides its own logging configuration via `uvicorn.access` and `uvicorn.error`. These may behave undesirably, and can be overridden in the JSON file with an alternate config. Likewise, fastapi logs to `fastapi` if that needs filtering. For logging to files, set `use_colors = False` in the relevant handlers or shell colour settings will appear as garbage in the logs. + +--ssl-keyfile: A PEM format key for the server certificate +--ssl-certfile: A PEM format certificate for signing HTTPS communications +--ssl-ca-certs: A CRT format certificate authority file that pleases picky clients. Uvicorn does not automatically find the system certificates, or so it seems. + ## Testing ```bash From 6230a85c0c15eddd2516262fb55895df9b418510 Mon Sep 17 00:00:00 2001 From: mgcam Date: Wed, 9 Mar 2022 17:38:41 +0000 Subject: [PATCH 07/37] Improve regexp for the token string. Restrict the tokens to hexadecimal. Store the compiled regular expression as a module-level variable. Use private module-level variables. --- server/npg/porchdb/auth.py | 14 +++++++------- server/tests/db_auth_test.py | 9 +++++++++ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/server/npg/porchdb/auth.py b/server/npg/porchdb/auth.py index 8b2e877..e8fd7ab 100644 --- a/server/npg/porchdb/auth.py +++ b/server/npg/porchdb/auth.py @@ -28,7 +28,9 @@ from npg.porchdb.models import Token from npg.porch.models.permission import Permission, RolesEnum -AUTH_TOKEN_LENGTH = 32 +__AUTH_TOKEN_LENGTH__ = 32 +__AUTH_TOKEN_REGEXP__ = re.compile( + r'\A[0-9A-F]+\Z', flags = re.ASCII | re.IGNORECASE) class Validator: ''' @@ -45,12 +47,10 @@ async def token2permission(self, token: str): message = None - if len(token) != AUTH_TOKEN_LENGTH: - message = f"The auth token should be {AUTH_TOKEN_LENGTH} chars long" - else: - prog = re.compile(r'\w{32}', flags=re.ASCII) - if prog.match(token) is None: - message = 'Token failed character validation' + if len(token) != __AUTH_TOKEN_LENGTH__: + message = f"The auth token should be {__AUTH_TOKEN_LENGTH__} chars long" + elif __AUTH_TOKEN_REGEXP__.match(token) is None: + message = 'Token failed character validation' valid_token_row = None if message is None: diff --git a/server/tests/db_auth_test.py b/server/tests/db_auth_test.py index b0d9eb2..c0672af 100644 --- a/server/tests/db_auth_test.py +++ b/server/tests/db_auth_test.py @@ -13,19 +13,28 @@ async def test_token_string_is_valid(async_minimum): v = Validator(session = async_minimum) assert isinstance(v, (npg.porchdb.auth.Validator)) + # This token is an empty string. with pytest.raises(HTTPException): await v.token2permission("") + # This token is too short. with pytest.raises(HTTPException): await v.token2permission('aaaa') + # This token is too long. with pytest.raises(HTTPException): await v.token2permission('AAAAAAAAAAAAAaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa') + # This token contains punctuation characters. with pytest.raises(HTTPException): await v.token2permission('7dc1457531e3495?9bd5:bcda579c1c6') + # This token contains characters beyong F. + with pytest.raises(HTTPException): + await v.token2permission('7dc1457531e3495P9bd5Kbcda579c1c6') @pytest.mark.asyncio async def test_token_is_known_and_valid(async_minimum): v = Validator(session = async_minimum) + + # This token does not exist. with pytest.raises(HTTPException): await v.token2permission('doesnotexist11111111111111111111') From 93dea5efb458bd6dfd1eef7c174f413f978f2136 Mon Sep 17 00:00:00 2001 From: mgcam Date: Wed, 9 Mar 2022 18:37:37 +0000 Subject: [PATCH 08/37] Use custom exception for token validation --- server/npg/porchdb/auth.py | 53 ++++++++++++++++++------------------ server/tests/db_auth_test.py | 30 +++++++++++--------- 2 files changed, 43 insertions(+), 40 deletions(-) diff --git a/server/npg/porchdb/auth.py b/server/npg/porchdb/auth.py index e8fd7ab..7b797fb 100644 --- a/server/npg/porchdb/auth.py +++ b/server/npg/porchdb/auth.py @@ -18,12 +18,10 @@ # You should have received a copy of the GNU General Public License along with # this program. If not, see . -import logging import re from sqlalchemy import select from sqlalchemy.orm import contains_eager from sqlalchemy.orm.exc import NoResultFound -from fastapi import HTTPException from npg.porchdb.models import Token from npg.porch.models.permission import Permission, RolesEnum @@ -32,6 +30,11 @@ __AUTH_TOKEN_REGEXP__ = re.compile( r'\A[0-9A-F]+\Z', flags = re.ASCII | re.IGNORECASE) +class CredentialsValidationException(Exception): + def __init__(self, message): + super().__init__(message) + + class Validator: ''' A validator for credentials presented by the requestor. @@ -41,40 +44,36 @@ class Validator: def __init__(self, session): self.session = session - self.logger = logging.getLogger(__name__) async def token2permission(self, token: str): - message = None - if len(token) != __AUTH_TOKEN_LENGTH__: - message = f"The auth token should be {__AUTH_TOKEN_LENGTH__} chars long" + raise CredentialsValidationException( + f"The token should be {__AUTH_TOKEN_LENGTH__} chars long" + ) elif __AUTH_TOKEN_REGEXP__.match(token) is None: - message = 'Token failed character validation' + raise CredentialsValidationException( + 'Token failed character validation' + ) valid_token_row = None - if message is None: - try: - # Using 'outerjoin' to get the left join for token, pipeline. - # We need to retrieve all token rows, regardless of whether - # they are linked the pipeline table or not (we are using a - # nullable foreign key to allow for no link). - result = await self.session.execute( - select(Token) - .filter_by(token=token) - .outerjoin(Token.pipeline) - .options(contains_eager(Token.pipeline)) - ) - valid_token_row = result.scalar_one() - except NoResultFound: - message = 'An unknown token is used' + try: + # Using 'outerjoin' to get the left join for token, pipeline. + # We need to retrieve all token rows, regardless of whether + # they are linked the pipeline table or not (we are using a + # nullable foreign key to allow for no link). + result = await self.session.execute( + select(Token) + .filter_by(token=token) + .outerjoin(Token.pipeline) + .options(contains_eager(Token.pipeline)) + ) + valid_token_row = result.scalar_one() + except NoResultFound: + raise CredentialsValidationException('An unknown token is used') if (valid_token_row is not None) and (valid_token_row.date_revoked is not None): - message = 'A revoked token is used' - - if message: - self.logger.warning(message) - raise HTTPException(status_code=403, detail="Invalid token") + raise CredentialsValidationException('A revoked token is used') permission = None pipeline = valid_token_row.pipeline diff --git a/server/tests/db_auth_test.py b/server/tests/db_auth_test.py index c0672af..e155452 100644 --- a/server/tests/db_auth_test.py +++ b/server/tests/db_auth_test.py @@ -1,10 +1,9 @@ import pytest import datetime from sqlalchemy import select -from fastapi.exceptions import HTTPException from npg.porchdb.models import Token, Pipeline -from npg.porchdb.auth import Validator +from npg.porchdb.auth import Validator, CredentialsValidationException import npg.porch.models.permission import npg.porch.models.pipeline @@ -13,20 +12,23 @@ async def test_token_string_is_valid(async_minimum): v = Validator(session = async_minimum) assert isinstance(v, (npg.porchdb.auth.Validator)) - # This token is an empty string. - with pytest.raises(HTTPException): + + with pytest.raises(CredentialsValidationException, + match=r'The token should be 32 chars long'): await v.token2permission("") - # This token is too short. - with pytest.raises(HTTPException): + with pytest.raises(CredentialsValidationException, + match=r'The token should be 32 chars long'): await v.token2permission('aaaa') - # This token is too long. - with pytest.raises(HTTPException): + with pytest.raises(CredentialsValidationException, + match=r'The token should be 32 chars long'): await v.token2permission('AAAAAAAAAAAAAaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa') # This token contains punctuation characters. - with pytest.raises(HTTPException): + with pytest.raises(CredentialsValidationException, + match=r'Token failed character validation'): await v.token2permission('7dc1457531e3495?9bd5:bcda579c1c6') # This token contains characters beyong F. - with pytest.raises(HTTPException): + with pytest.raises(CredentialsValidationException, + match=r'Token failed character validation'): await v.token2permission('7dc1457531e3495P9bd5Kbcda579c1c6') @pytest.mark.asyncio @@ -35,8 +37,9 @@ async def test_token_is_known_and_valid(async_minimum): v = Validator(session = async_minimum) # This token does not exist. - with pytest.raises(HTTPException): - await v.token2permission('doesnotexist11111111111111111111') + with pytest.raises(CredentialsValidationException, + match=r'An unknown token is used'): + await v.token2permission('aaaaaaaBBaaa11111111111111111111') # Mark one of the tokens as revoked. result = await async_minimum.execute( @@ -49,7 +52,8 @@ async def test_token_is_known_and_valid(async_minimum): async_minimum.add(token_row) await async_minimum.commit() - with pytest.raises(HTTPException): + with pytest.raises(CredentialsValidationException, + match=r'A revoked token is used'): await v.token2permission(token_string) @pytest.mark.asyncio From 908c86894d3daa7d0c5952f20076dc6a68095aba Mon Sep 17 00:00:00 2001 From: mgcam Date: Wed, 9 Mar 2022 19:02:06 +0000 Subject: [PATCH 09/37] Exception declaration and propagation --- server/npg/porch/auth/token.py | 12 +++++++++++- server/npg/porch/endpoints/pipelines.py | 6 +++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/server/npg/porch/auth/token.py b/server/npg/porch/auth/token.py index 7038c7e..1297286 100644 --- a/server/npg/porch/auth/token.py +++ b/server/npg/porch/auth/token.py @@ -1,7 +1,10 @@ +import logging from fastapi import Depends from fastapi.security import HTTPBearer +from fastapi import HTTPException from npg.porchdb.connection import get_CredentialsValidator +from npg.porchdb.auth import CredentialsValidationException auth_scheme = HTTPBearer() @@ -9,7 +12,14 @@ async def validate( creds = Depends(auth_scheme), validator = Depends(get_CredentialsValidator) ): + token = creds.credentials - p = await validator.token2permission(token) + p = None + try: + p = await validator.token2permission(token) + except CredentialsValidationException as e: + logger = logging.getLogger(__name__) + logger.warning(e) + raise HTTPException(status_code=403, detail="Invalid token") return p diff --git a/server/npg/porch/endpoints/pipelines.py b/server/npg/porch/endpoints/pipelines.py index a8591b7..ebd482e 100644 --- a/server/npg/porch/endpoints/pipelines.py +++ b/server/npg/porch/endpoints/pipelines.py @@ -32,7 +32,11 @@ router = APIRouter( prefix="/pipelines", - tags=["pipelines"] + tags=["pipelines"], + responses={ + status.HTTP_403_FORBIDDEN: {"description": "Not authorised"}, + status.HTTP_500_INTERNAL_SERVER_ERROR: {"description": "Unexpected error"} + } ) @router.get( From 24b40ad462b1f2ac637bfcce62ec9f66ab61d4dd Mon Sep 17 00:00:00 2001 From: mgcam Date: Wed, 9 Mar 2022 19:09:12 +0000 Subject: [PATCH 10/37] Copyright notices added/fixed --- server/npg/porch/auth/token.py | 20 ++++++++++++++++++++ server/npg/porch/models/permission.py | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/server/npg/porch/auth/token.py b/server/npg/porch/auth/token.py index 1297286..369a233 100644 --- a/server/npg/porch/auth/token.py +++ b/server/npg/porch/auth/token.py @@ -1,3 +1,23 @@ +# Copyright (C) 2022 Genome Research Ltd. +# +# Author: Kieron Taylor kt19@sanger.ac.uk +# Author: Marina Gourtovaia mg8@sanger.ac.uk +# +# This file is part of npg_porch +# +# npg_porch is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see . + import logging from fastapi import Depends from fastapi.security import HTTPBearer diff --git a/server/npg/porch/models/permission.py b/server/npg/porch/models/permission.py index f4d1cd9..21cfab4 100644 --- a/server/npg/porch/models/permission.py +++ b/server/npg/porch/models/permission.py @@ -1,4 +1,4 @@ -# Copyright (C) 2021, 2022 Genome Research Ltd. +# Copyright (C) 2022 Genome Research Ltd. # # Author: Kieron Taylor kt19@sanger.ac.uk # Author: Marina Gourtovaia mg8@sanger.ac.uk From d3148526f3deb0c369515e7ba6cd4b085ce224ec Mon Sep 17 00:00:00 2001 From: mgcam Date: Thu, 10 Mar 2022 11:20:38 +0000 Subject: [PATCH 11/37] Token auth for request to create a pipeline. A special token is required to create a pipeline. Documentation for pipeline end points is added/extended. To improve code readability, added blank lines in accordance with https://peps.python.org/pep-0008/#blank-lines --- server/npg/porch/endpoints/pipelines.py | 24 ++++++++++++++++++--- server/tests/fixtures/deploy_db.py | 1 + server/tests/pipeline_route_test.py | 28 ++++++++++++++++++++----- 3 files changed, 45 insertions(+), 8 deletions(-) diff --git a/server/npg/porch/endpoints/pipelines.py b/server/npg/porch/endpoints/pipelines.py index ebd482e..2ee358e 100644 --- a/server/npg/porch/endpoints/pipelines.py +++ b/server/npg/porch/endpoints/pipelines.py @@ -27,9 +27,11 @@ from starlette import status from npg.porch.models.pipeline import Pipeline +from npg.porch.models.permission import RolesEnum from npg.porchdb.connection import get_DbAccessor from npg.porch.auth.token import validate + router = APIRouter( prefix="/pipelines", tags=["pipelines"], @@ -39,11 +41,15 @@ } ) + @router.get( "/", response_model=List[Pipeline], summary="Get information about all pipelines.", - description="Get all pipelines as a list. A uri and/or version filter can be used." + description=''' + Returns a list of pydantic Pipeline models. + A uri and/or version filter can be used. + A valid token issued for any pipeline is required for authorisation.''' ) async def get_pipelines( uri: Optional[str] = None, @@ -54,11 +60,15 @@ async def get_pipelines( return await db_accessor.get_all_pipelines(uri, version) + @router.get( "/{pipeline_name}", response_model=Pipeline, responses={status.HTTP_404_NOT_FOUND: {"description": "Not found"}}, summary="Get information about one pipeline.", + description=''' + Returns a single pydantic Pipeline model if found. + A valid token issued for any pipeline is required for authorisation.''' ) async def get_pipeline( pipeline_name: str, @@ -74,16 +84,20 @@ async def get_pipeline( detail=f"Pipeline '{pipeline_name}' not found") return pipeline + @router.post( "/", response_model=Pipeline, - summary="Create one pipeline record.", status_code=status.HTTP_201_CREATED, responses={ status.HTTP_201_CREATED: {"description": "Pipeline was created"}, status.HTTP_400_BAD_REQUEST: {"description": "Insufficient pipeline properties provided"}, status.HTTP_409_CONFLICT: {"description": "Pipeline already exists"} - } + }, + summary="Create one pipeline record.", + description=''' + Using JSON data in the request, creates a new pipeline record. + A valid special power user token is required for authorisation.''' ) async def create_pipeline( pipeline: Pipeline, @@ -91,6 +105,10 @@ async def create_pipeline( permissions=Depends(validate) ) -> Pipeline: + if permissions.role != RolesEnum.POWER_USER: + logging.error(f"Role {RolesEnum.POWER_USER} is required") + raise HTTPException(status_code=403) + new_pipeline = None try: new_pipeline = await db_accessor.create_pipeline(pipeline) diff --git a/server/tests/fixtures/deploy_db.py b/server/tests/fixtures/deploy_db.py index 24edbcd..f44e820 100644 --- a/server/tests/fixtures/deploy_db.py +++ b/server/tests/fixtures/deploy_db.py @@ -30,6 +30,7 @@ def minimum_data(): description='Seqfarm host, job runner' ), Token( + token='4bab73544c834c6f86f9662e5de26d0d', description='Seqfarm host, admin' ) ] diff --git a/server/tests/pipeline_route_test.py b/server/tests/pipeline_route_test.py index d5fdeec..702ad27 100644 --- a/server/tests/pipeline_route_test.py +++ b/server/tests/pipeline_route_test.py @@ -2,24 +2,41 @@ from npg.porch.models import Pipeline + headers = { 'Authorization': 'Bearer cac0533d5599489d9a3d998028a79fe8', 'accept': 'application/json' } +headers4power_user = { + 'Authorization': 'Bearer 4bab73544c834c6f86f9662e5de26d0d', + 'accept': 'application/json' +} + def http_create_pipeline(fastapi_testclient, pipeline): + response = fastapi_testclient.post( '/pipelines', json=pipeline.dict(), allow_redirects=True ) assert response.status_code == status.HTTP_403_FORBIDDEN + response = fastapi_testclient.post( '/pipelines', json=pipeline.dict(), allow_redirects=True, headers=headers ) + assert response.status_code == status.HTTP_403_FORBIDDEN + + response = fastapi_testclient.post( + '/pipelines', json=pipeline.dict(), allow_redirects=True, + headers=headers4power_user + ) assert response.status_code == status.HTTP_201_CREATED + return response.json() + def test_pipeline_get(async_minimum, fastapi_testclient): + response = fastapi_testclient.get('/pipelines') assert response.status_code == status.HTTP_403_FORBIDDEN response = fastapi_testclient.get('/pipelines', headers=headers) @@ -29,6 +46,7 @@ def test_pipeline_get(async_minimum, fastapi_testclient): assert pipeline.name == 'ptest one' assert pipeline.version == '0.3.14' + def test_pipeline_filtered_get(async_minimum, fastapi_testclient): second_pipeline = Pipeline( @@ -63,6 +81,7 @@ def test_pipeline_filtered_get(async_minimum, fastapi_testclient): def test_get_known_pipeline(async_minimum, fastapi_testclient): + response = fastapi_testclient.get( '/pipelines/ptest one', headers=headers ) @@ -79,7 +98,9 @@ def test_get_known_pipeline(async_minimum, fastapi_testclient): assert response.status_code == status.HTTP_404_NOT_FOUND assert response.json()['detail'] == "Pipeline 'not here' not found" + def test_create_pipeline(async_minimum, fastapi_testclient): + # Create a pipeline desired_pipeline = Pipeline( name='ptest two', @@ -96,9 +117,8 @@ def test_create_pipeline(async_minimum, fastapi_testclient): '/pipelines', json=desired_pipeline.dict(), allow_redirects=True, - headers=headers + headers=headers4power_user ) - assert response.status_code == status.HTTP_409_CONFLICT, 'ptest two already in DB' assert response.json()['detail'] == 'Pipeline already exists' @@ -112,7 +132,6 @@ def test_create_pipeline(async_minimum, fastapi_testclient): response = http_create_pipeline(fastapi_testclient, second_desired_pipeline) # Retrieve the same pipelines - response = fastapi_testclient.get( '/pipelines', headers=headers ) @@ -130,7 +149,6 @@ def test_create_pipeline(async_minimum, fastapi_testclient): '/pipelines', json=third_desired_pipeline.dict(), allow_redirects=True, - headers=headers + headers=headers4power_user ) - assert response.status_code == status.HTTP_400_BAD_REQUEST From 36f7271eba9384af8f83309896c55be3deb00384 Mon Sep 17 00:00:00 2001 From: mgcam Date: Wed, 9 Mar 2022 19:09:12 +0000 Subject: [PATCH 12/37] Copyright notices added/fixed --- server/npg/porch/auth/token.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/npg/porch/auth/token.py b/server/npg/porch/auth/token.py index 369a233..49cf89c 100644 --- a/server/npg/porch/auth/token.py +++ b/server/npg/porch/auth/token.py @@ -39,7 +39,7 @@ async def validate( p = await validator.token2permission(token) except CredentialsValidationException as e: logger = logging.getLogger(__name__) - logger.warning(e) + logger.warning(str(e)) raise HTTPException(status_code=403, detail="Invalid token") return p From ad032b990b4b0b87870fa1e83039243e17202b5a Mon Sep 17 00:00:00 2001 From: mgcam Date: Thu, 10 Mar 2022 15:53:00 +0000 Subject: [PATCH 13/37] Simple token auth for task endpoints --- server/npg/porch/endpoints/tasks.py | 108 ++++++++++++++++------------ server/tests/task_route_test.py | 78 +++++++++++++------- 2 files changed, 116 insertions(+), 70 deletions(-) diff --git a/server/npg/porch/endpoints/tasks.py b/server/npg/porch/endpoints/tasks.py index 208a5d2..e6c7ceb 100644 --- a/server/npg/porch/endpoints/tasks.py +++ b/server/npg/porch/endpoints/tasks.py @@ -27,57 +27,64 @@ from npg.porch.models.pipeline import Pipeline from npg.porch.models.task import Task - from npg.porchdb.connection import get_DbAccessor +from npg.porch.auth.token import validate + router = APIRouter( prefix="/tasks", - tags=["tasks"] + tags=["tasks"], + responses={ + status.HTTP_403_FORBIDDEN: {"description": "Not authorised"}, + status.HTTP_500_INTERNAL_SERVER_ERROR: {"description": "Unexpected error"} + } ) + @router.get( "/", response_model=List[Task], summary="Returns all tasks.", description="Return all tasks. A filter will be applied if used in the query." ) -async def get_tasks(db_accessor=Depends(get_DbAccessor)): +async def get_tasks( + db_accessor=Depends(get_DbAccessor), + permissions=Depends(validate) +) -> List[Task]: + return await db_accessor.get_tasks() -#@router.get( -# "/{task_name}", -# response_model=Task, -# summary="Get one task.", -# description="Get one task using its unique name." -#) -#def get_task(task_name: str): -# return Task(name=task_name) @router.post( "/", response_model=Task, - summary="Creates one task.", status_code=status.HTTP_201_CREATED, responses={ status.HTTP_201_CREATED: {"description": "Task creation was successful"}, status.HTTP_404_NOT_FOUND: {"description": "The pipeline for this task is invalid"}, status.HTTP_409_CONFLICT: {"description": "A task with the same signature already exists"} - } -) -async def create_task(task: Task, db_accessor=Depends(get_DbAccessor)): - """ + }, + summary="Creates one task record.", + description=''' Given a Task object, creates a database record for it and returns - the same object with status 201 'Created' + the same object, the response HTTP status is 201 'Created'. The + new task is assigned pending status, ie becomes awailable for claiming. The pipeline specified by the `pipeline` attribute of the Task object - should exist. If it does not exist, return status 404 'Not found' and - an error. + should exist. If it does not exist, return status 404 'Not found'.''' +) +async def create_task( + task: Task, + db_accessor=Depends(get_DbAccessor), + permissions=Depends(validate) +) -> Task: - Errors if task status is not PENDING. - """ created_task = None try: - created_task = await db_accessor.create_task(token_id=1, task=task) + created_task = await db_accessor.create_task( + token_id=1, + task=task + ) except IntegrityError: raise HTTPException( status_code=409, @@ -85,52 +92,57 @@ async def create_task(task: Task, db_accessor=Depends(get_DbAccessor)): ) except NoResultFound: raise HTTPException(status_code=404, detail='Failed to find pipeline for this task') + return created_task + @router.put( "/", response_model=Task, - summary="Update one task.", responses={ status.HTTP_200_OK: {"description": "Task was modified"}, status.HTTP_404_NOT_FOUND: { "description": "The pipeline or task in the request is invalid" }, - } -) -async def update_task(task: Task, db_accessor=Depends(get_DbAccessor)): - """ - Given a Task object, updates the status of the task in the database. + }, + summary="Update one task.", + description=''' + Given a Task object, updates the status of the task in the database + to the value of the status in this Task object. The pipeline specified by the `pipeline` attribute of the Task object should exist. If it does not exist, return status 404 'Not found' and - an error. - """ + an error.''' +) +async def update_task( + task: Task, + db_accessor=Depends(get_DbAccessor), + permissions=Depends(validate) +) -> Task: + changed_task = None try: - changed_task = await db_accessor.update_task(token_id=1, task=task) + changed_task = await db_accessor.update_task( + token_id=1, + task=task + ) except NoResultFound as e: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) + return changed_task + @router.post( "/claim", response_model=List[Task], - summary="Claim tasks.", - description="Claim tasks for a particular pipeline.", responses={ status.HTTP_200_OK: {"description": "Receive a list of tasks that have been claimed"}, status.HTTP_404_NOT_FOUND: { "description": "Cannot find the pipeline submitted with the claim" } - } -) -async def claim_task( - pipeline: Pipeline, - num_tasks: PositiveInt = 1, - db_accessor=Depends(get_DbAccessor) -) -> List[Task]: - """ + }, + summary="Claim tasks for a particular pipeline.", + description=''' Arguments - the Pipeline object and the maximum number of tasks to retrieve and claim, the latter defaults to 1 if not given. @@ -145,15 +157,23 @@ async def claim_task( The pipeline object returned within the Task should be consistent with the pipeline object in the payload, but, typically, will have - more attributes defined (uri, the specific version). - """ + more attributes defined (uri, the specific version).''' +) +async def claim_task( + pipeline: Pipeline, + num_tasks: PositiveInt = 1, + db_accessor=Depends(get_DbAccessor), + permissions=Depends(validate) +) -> List[Task]: + tasks = None try: tasks = await db_accessor.claim_tasks( token_id=1, pipeline=pipeline, claim_limit=num_tasks ) - return tasks except NoResultFound as e: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=e.value) + + return tasks diff --git a/server/tests/task_route_test.py b/server/tests/task_route_test.py index f88fad7..aeeddee 100644 --- a/server/tests/task_route_test.py +++ b/server/tests/task_route_test.py @@ -4,7 +4,13 @@ # Not testing get-all-tasks as this method will ultimately go +headers4ptest_one = { + 'Authorization': 'Bearer cac0533d5599489d9a3d998028a79fe8', + 'accept': 'application/json' +} + def test_task_creation(async_minimum, fastapi_testclient): + # Create a task with a sparse pipeline definition task_one = Task( pipeline = { @@ -18,9 +24,9 @@ def test_task_creation(async_minimum, fastapi_testclient): response = fastapi_testclient.post( 'tasks', json=task_one.dict(), - allow_redirects=True + allow_redirects=True, + headers=headers4ptest_one ) - assert response.status_code == status.HTTP_201_CREATED assert task_one == response.json() @@ -28,9 +34,9 @@ def test_task_creation(async_minimum, fastapi_testclient): response = fastapi_testclient.post( 'tasks', json=task_one.dict(), - allow_redirects=True + allow_redirects=True, + headers=headers4ptest_one ) - assert response.status_code == status.HTTP_409_CONFLICT task_two = Task( @@ -41,38 +47,42 @@ def test_task_creation(async_minimum, fastapi_testclient): 'number': 1 } ) + response = fastapi_testclient.post( 'tasks', json=task_two.dict(), - allow_redirects=True + allow_redirects=True, + headers=headers4ptest_one ) assert response.status_code == status.HTTP_404_NOT_FOUND + def test_task_update(async_minimum, fastapi_testclient): - task = fastapi_testclient.get('/tasks').json()[0] + task = fastapi_testclient.get('/tasks', headers=headers4ptest_one).json()[0] assert task['status'] is None - task['status'] = TaskStateEnum.PENDING + task['status'] = TaskStateEnum.PENDING response = fastapi_testclient.put( '/tasks', json=task, - allow_redirects=True + allow_redirects=True, + headers=headers4ptest_one ) assert response.status_code == status.HTTP_200_OK - modified_task = Task.parse_obj(response.json()) + modified_task = Task.parse_obj(response.json()) assert modified_task == task # Now invalidate the task by changing the signature modified_task.task_input = { 'something': 'different' } - response = fastapi_testclient.put( '/tasks', json=modified_task.dict(), - allow_redirects=True + allow_redirects=True, + headers=headers4ptest_one ) assert response.status_code == status.HTTP_404_NOT_FOUND assert response.json() == {'detail': 'Task to be modified could not be found'} @@ -81,42 +91,51 @@ def test_task_update(async_minimum, fastapi_testclient): modified_task.pipeline = { 'name': 'ptest one thousand' } - response = fastapi_testclient.put( '/tasks', json=modified_task.dict(), - allow_redirects=True + allow_redirects=True, + headers=headers4ptest_one ) assert response.status_code == status.HTTP_404_NOT_FOUND assert response.json() == {'detail': 'Pipeline not found'} + def test_task_claim(async_minimum, async_tasks, fastapi_testclient): - headers = { - 'Authorization': 'Bearer cac0533d5599489d9a3d998028a79fe8', - 'accept': 'application/json' - } + response = fastapi_testclient.get( - '/pipelines/ptest some', headers = headers) + '/pipelines/ptest some', headers=headers4ptest_one) assert response.status_code == status.HTTP_200_OK - pipeline = response.json() + pipeline = response.json() tasks_seen = [] - response = fastapi_testclient.post('/tasks/claim', json=pipeline) + response = fastapi_testclient.post( + '/tasks/claim', + json=pipeline, + headers=headers4ptest_one + ) assert response.status_code == status.HTTP_200_OK tasks = response.json() - assert len(tasks) == 1, 'Defaults to one task claimed' t = tasks[0] assert t['task_input'] == {'input': 1} assert t['status'] == TaskStateEnum.CLAIMED tasks_seen.append(t['task_input_id']) - response = fastapi_testclient.post('/tasks/claim?num_tasks=0', json=pipeline) + response = fastapi_testclient.post( + '/tasks/claim?num_tasks=0', + json=pipeline, + headers=headers4ptest_one + ) assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY, 'Not allowed to use invalid numbers of tasks' # noqa: E501 - response = fastapi_testclient.post('/tasks/claim?num_tasks=2', json=pipeline) + response = fastapi_testclient.post( + '/tasks/claim?num_tasks=2', + json=pipeline, + headers=headers4ptest_one + ) assert response.status_code == status.HTTP_200_OK tasks = response.json() assert len(tasks) == 2, 'Asked for two, got two' @@ -124,15 +143,22 @@ def test_task_claim(async_minimum, async_tasks, fastapi_testclient): # Cannot test race conditions, because sqlite only pretends to support full async # Claim the rest - response = fastapi_testclient.post('/tasks/claim?num_tasks=8', json=pipeline) + response = fastapi_testclient.post( + '/tasks/claim?num_tasks=8', + json=pipeline, + headers=headers4ptest_one + ) assert response.status_code == status.HTTP_200_OK tasks = response.json() assert len(tasks) == 7, 'Asked for eight, got seven' tasks_seen.extend([t['task_input_id'] for t in tasks]) - assert len(set(tasks_seen)) == 10, 'Ten unique tasks were claimed' - response = fastapi_testclient.post('/tasks/claim', json=pipeline) + response = fastapi_testclient.post( + '/tasks/claim', + json=pipeline, + headers=headers4ptest_one + ) assert response.status_code == status.HTTP_200_OK tasks = response.json() assert len(tasks) == 0, 'Tried to claim, did not get any tasks' From 1d75aed6fffd7e3c91e1027cd4f0a8b0f7ab9759 Mon Sep 17 00:00:00 2001 From: mgcam Date: Fri, 11 Mar 2022 17:04:58 +0000 Subject: [PATCH 14/37] Pipeline-aware token auth for task endpoints --- server/npg/porch/endpoints/tasks.py | 81 +++++++++++++++------------ server/npg/porch/models/permission.py | 24 ++++++++ server/tests/fixtures/deploy_db.py | 1 + server/tests/model_permission_test.py | 38 ++++++++++++- server/tests/task_route_test.py | 33 +++++++---- 5 files changed, 129 insertions(+), 48 deletions(-) diff --git a/server/npg/porch/endpoints/tasks.py b/server/npg/porch/endpoints/tasks.py index e6c7ceb..e85727a 100644 --- a/server/npg/porch/endpoints/tasks.py +++ b/server/npg/porch/endpoints/tasks.py @@ -18,6 +18,7 @@ # You should have received a copy of the GNU General Public License along with # this program. If not, see . +import logging from fastapi import APIRouter, HTTPException, Depends from pydantic import PositiveInt from typing import List @@ -27,10 +28,27 @@ from npg.porch.models.pipeline import Pipeline from npg.porch.models.task import Task +from npg.porch.models.permission import PermissionValidationException from npg.porchdb.connection import get_DbAccessor from npg.porch.auth.token import validate +def _validate_request(permission, pipeline): + + try: + permission.validate_pipeline(pipeline) + except PermissionValidationException as e: + logger = logging.getLogger(__name__) + logger.warning(str(e)) + raise HTTPException( + status_code=403, + detail=("Given credentials cannot be used for" + f" pipeline '{pipeline.name}'") + ) + + pass + + router = APIRouter( prefix="/tasks", tags=["tasks"], @@ -45,11 +63,13 @@ "/", response_model=List[Task], summary="Returns all tasks.", - description="Return all tasks. A filter will be applied if used in the query." + description=''' + Return all tasks. A filter will be applied if used in the query. + The filter feature is not yet implemented.''' ) async def get_tasks( db_accessor=Depends(get_DbAccessor), - permissions=Depends(validate) + permission=Depends(validate) ) -> List[Task]: return await db_accessor.get_tasks() @@ -61,7 +81,6 @@ async def get_tasks( status_code=status.HTTP_201_CREATED, responses={ status.HTTP_201_CREATED: {"description": "Task creation was successful"}, - status.HTTP_404_NOT_FOUND: {"description": "The pipeline for this task is invalid"}, status.HTTP_409_CONFLICT: {"description": "A task with the same signature already exists"} }, summary="Creates one task record.", @@ -76,13 +95,14 @@ async def get_tasks( async def create_task( task: Task, db_accessor=Depends(get_DbAccessor), - permissions=Depends(validate) + permission=Depends(validate) ) -> Task: + _validate_request(permission, task.pipeline) created_task = None try: created_task = await db_accessor.create_task( - token_id=1, + token_id=permission.requestor_id, task=task ) except IntegrityError: @@ -101,29 +121,25 @@ async def create_task( response_model=Task, responses={ status.HTTP_200_OK: {"description": "Task was modified"}, - status.HTTP_404_NOT_FOUND: { - "description": "The pipeline or task in the request is invalid" - }, }, summary="Update one task.", description=''' Given a Task object, updates the status of the task in the database to the value of the status in this Task object. - The pipeline specified by the `pipeline` attribute of the Task object - should exist. If it does not exist, return status 404 'Not found' and - an error.''' + If the task does not exist, status 404 'Not found' is returned.''' ) async def update_task( task: Task, db_accessor=Depends(get_DbAccessor), - permissions=Depends(validate) + permission=Depends(validate) ) -> Task: + _validate_request(permission, task.pipeline) changed_task = None try: changed_task = await db_accessor.update_task( - token_id=1, + token_id=permission.requestor_id, task=task ) except NoResultFound as e: @@ -136,44 +152,35 @@ async def update_task( "/claim", response_model=List[Task], responses={ - status.HTTP_200_OK: {"description": "Receive a list of tasks that have been claimed"}, - status.HTTP_404_NOT_FOUND: { - "description": "Cannot find the pipeline submitted with the claim" - } + status.HTTP_200_OK: {"description": "Receive a list of tasks that have been claimed"} }, summary="Claim tasks for a particular pipeline.", description=''' Arguments - the Pipeline object and the maximum number of tasks to retrieve and claim, the latter defaults to 1 if not given. - Return an error and status 404 'Not Found' if the pipeline with the - given name does not exist. - - It is possible that no tasks that satisfy the given criteria and - are unclaimed are found. Return status 200 and an empty array. + If no tasks that satisfy the given criteria and are unclaimed + are found, returns status 200 and an empty array. - If any tasks are claimed, return an array of these Task objects and - status 200. + If any tasks are claimed, return an array of these Task objects + and status 200. - The pipeline object returned within the Task should be consistent - with the pipeline object in the payload, but, typically, will have - more attributes defined (uri, the specific version).''' + The pipeline object returned within each of the tasks is consistent + with the pipeline object in the payload, but has all possible + attributes defined (uri, version).''' ) async def claim_task( pipeline: Pipeline, num_tasks: PositiveInt = 1, db_accessor=Depends(get_DbAccessor), - permissions=Depends(validate) + permission=Depends(validate) ) -> List[Task]: - tasks = None - try: - tasks = await db_accessor.claim_tasks( - token_id=1, - pipeline=pipeline, - claim_limit=num_tasks - ) - except NoResultFound as e: - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=e.value) + _validate_request(permission, pipeline) + tasks = await db_accessor.claim_tasks( + token_id=permission.requestor_id, + pipeline=pipeline, + claim_limit=num_tasks + ) return tasks diff --git a/server/npg/porch/models/permission.py b/server/npg/porch/models/permission.py index 21cfab4..a89d856 100644 --- a/server/npg/porch/models/permission.py +++ b/server/npg/porch/models/permission.py @@ -24,11 +24,18 @@ from npg.porch.models.pipeline import Pipeline + +class PermissionValidationException(Exception): + def __init__(self, message): + super().__init__(message) + class RolesEnum(str, Enum): POWER_USER = 'power_user' REGULAR_USER = 'regular_user' + class Permission(BaseModel): + pipeline: Optional[Pipeline] = Field( None, title = 'An optional pipeline object', @@ -44,7 +51,24 @@ class Permission(BaseModel): @validator('role') def no_pipeline4special_users(cls, v, values): + if (v == RolesEnum.POWER_USER and ('pipeline' in values and values['pipeline'] is not None)): raise ValueError('Power user cannot be associated with a pipeline') + return v + + def validate_pipeline(self, pipeline: Pipeline): + + if self.role != RolesEnum.REGULAR_USER: + raise PermissionValidationException( + f"Operation is not valid for role {self.role}") + if not self.pipeline: + raise PermissionValidationException("No associated pipeline object") + + if pipeline.name != self.pipeline.name: + raise PermissionValidationException( + "Token-request pipeline mismatch: " + + f"'{self.pipeline.name}' and '{pipeline.name}'") + + pass diff --git a/server/tests/fixtures/deploy_db.py b/server/tests/fixtures/deploy_db.py index f44e820..09c10a7 100644 --- a/server/tests/fixtures/deploy_db.py +++ b/server/tests/fixtures/deploy_db.py @@ -77,6 +77,7 @@ def lots_of_tasks(): version='0.3.14' ) job_finder_token = Token( + token='ba53eaf7073d4c2b95ca47aeed41086c', pipeline=pipeline, description='OpenStack host, job finder' ) diff --git a/server/tests/model_permission_test.py b/server/tests/model_permission_test.py index ff103e5..aa85e4f 100644 --- a/server/tests/model_permission_test.py +++ b/server/tests/model_permission_test.py @@ -1,9 +1,10 @@ import pytest from npg.porch.models.pipeline import Pipeline -from npg.porch.models.permission import Permission +from npg.porch.models.permission import Permission, PermissionValidationException from pydantic.error_wrappers import ValidationError + def test_model_create(): '''' Test objects can be created. @@ -18,6 +19,7 @@ def test_model_create(): ) assert type(p) is Permission + def test_xvalidation_role_pipeline(): ''' Test cross validation for the role and pipeline fields. @@ -32,6 +34,7 @@ def test_xvalidation_role_pipeline(): pipeline = Pipeline(name='number one') ) + def test_error_with_insufficient_args(): with pytest.raises(ValidationError, match=r'requestor_id\s+field required'): @@ -44,3 +47,36 @@ def test_error_with_insufficient_args(): requestor_id = 1, pipeline = Pipeline(name='number one') ) + + +def test_pipeline_validation(): + + pipeline = Pipeline(name='number one') + permission = Permission( + requestor_id = 3, + role = 'power_user') + with pytest.raises(PermissionValidationException, + match=r'Operation is not valid for role power_user'): + permission.validate_pipeline(pipeline) + + permission = Permission( + requestor_id = 3, + role = 'regular_user') + with pytest.raises(PermissionValidationException, + match=r'No associated pipeline object'): + permission.validate_pipeline(pipeline) + + permission = Permission( + requestor_id = 3, + role = 'regular_user', + pipeline = Pipeline(name='number two')) + with pytest.raises( + PermissionValidationException, match + =r"Token-request pipeline mismatch: 'number two' and 'number one'"): + permission.validate_pipeline(pipeline) + + permission = Permission( + requestor_id = 3, + role = 'regular_user', + pipeline = pipeline) + assert (permission.validate_pipeline(pipeline) is None), 'no error' diff --git a/server/tests/task_route_test.py b/server/tests/task_route_test.py index aeeddee..50437a9 100644 --- a/server/tests/task_route_test.py +++ b/server/tests/task_route_test.py @@ -8,6 +8,10 @@ 'Authorization': 'Bearer cac0533d5599489d9a3d998028a79fe8', 'accept': 'application/json' } +headers4ptest_some = { + 'Authorization': 'Bearer ba53eaf7073d4c2b95ca47aeed41086c', + 'accept': 'application/json' +} def test_task_creation(async_minimum, fastapi_testclient): @@ -47,14 +51,15 @@ def test_task_creation(async_minimum, fastapi_testclient): 'number': 1 } ) - + # The token is valid, but for a different pipeline. It is impossible + # to have a valid token for a pipeline that does not exist. response = fastapi_testclient.post( 'tasks', json=task_two.dict(), allow_redirects=True, headers=headers4ptest_one ) - assert response.status_code == status.HTTP_404_NOT_FOUND + assert response.status_code == status.HTTP_403_FORBIDDEN def test_task_update(async_minimum, fastapi_testclient): @@ -87,7 +92,9 @@ def test_task_update(async_minimum, fastapi_testclient): assert response.status_code == status.HTTP_404_NOT_FOUND assert response.json() == {'detail': 'Task to be modified could not be found'} - # And change the reference pipeline to something wrong + # And change the reference pipeline to something wrong. + # This token is valid, but for a different pipeline. It is impossible + # to have a valid token for a pipeline that does not exist. modified_task.pipeline = { 'name': 'ptest one thousand' } @@ -97,25 +104,31 @@ def test_task_update(async_minimum, fastapi_testclient): allow_redirects=True, headers=headers4ptest_one ) - assert response.status_code == status.HTTP_404_NOT_FOUND - assert response.json() == {'detail': 'Pipeline not found'} + assert response.status_code == status.HTTP_403_FORBIDDEN def test_task_claim(async_minimum, async_tasks, fastapi_testclient): response = fastapi_testclient.get( '/pipelines/ptest some', headers=headers4ptest_one) - assert response.status_code == status.HTTP_200_OK pipeline = response.json() tasks_seen = [] + # Cannot claim with a token issued for a different pipeline. response = fastapi_testclient.post( '/tasks/claim', json=pipeline, headers=headers4ptest_one ) + assert response.status_code == status.HTTP_403_FORBIDDEN + + response = fastapi_testclient.post( + '/tasks/claim', + json=pipeline, + headers=headers4ptest_some + ) assert response.status_code == status.HTTP_200_OK tasks = response.json() assert len(tasks) == 1, 'Defaults to one task claimed' @@ -127,14 +140,14 @@ def test_task_claim(async_minimum, async_tasks, fastapi_testclient): response = fastapi_testclient.post( '/tasks/claim?num_tasks=0', json=pipeline, - headers=headers4ptest_one + headers=headers4ptest_some ) assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY, 'Not allowed to use invalid numbers of tasks' # noqa: E501 response = fastapi_testclient.post( '/tasks/claim?num_tasks=2', json=pipeline, - headers=headers4ptest_one + headers=headers4ptest_some ) assert response.status_code == status.HTTP_200_OK tasks = response.json() @@ -146,7 +159,7 @@ def test_task_claim(async_minimum, async_tasks, fastapi_testclient): response = fastapi_testclient.post( '/tasks/claim?num_tasks=8', json=pipeline, - headers=headers4ptest_one + headers=headers4ptest_some ) assert response.status_code == status.HTTP_200_OK tasks = response.json() @@ -157,7 +170,7 @@ def test_task_claim(async_minimum, async_tasks, fastapi_testclient): response = fastapi_testclient.post( '/tasks/claim', json=pipeline, - headers=headers4ptest_one + headers=headers4ptest_some ) assert response.status_code == status.HTTP_200_OK tasks = response.json() From 18a69b7c1e790bab45567f8d141b2108674bda22 Mon Sep 17 00:00:00 2001 From: mgcam Date: Mon, 14 Mar 2022 12:42:22 +0000 Subject: [PATCH 15/37] Fix typo --- server/npg/porch/endpoints/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/npg/porch/endpoints/tasks.py b/server/npg/porch/endpoints/tasks.py index e85727a..49a71d9 100644 --- a/server/npg/porch/endpoints/tasks.py +++ b/server/npg/porch/endpoints/tasks.py @@ -87,7 +87,7 @@ async def get_tasks( description=''' Given a Task object, creates a database record for it and returns the same object, the response HTTP status is 201 'Created'. The - new task is assigned pending status, ie becomes awailable for claiming. + new task is assigned pending status, ie becomes available for claiming. The pipeline specified by the `pipeline` attribute of the Task object should exist. If it does not exist, return status 404 'Not found'.''' From 4f36cb6df4582308cb6ce12108fdb8b057226749 Mon Sep 17 00:00:00 2001 From: mgcam Date: Mon, 14 Mar 2022 12:44:07 +0000 Subject: [PATCH 16/37] Simplified class definitions for custom exceptions. --- server/npg/porch/models/permission.py | 4 ++-- server/npg/porchdb/auth.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/server/npg/porch/models/permission.py b/server/npg/porch/models/permission.py index a89d856..09f1ab5 100644 --- a/server/npg/porch/models/permission.py +++ b/server/npg/porch/models/permission.py @@ -26,8 +26,8 @@ class PermissionValidationException(Exception): - def __init__(self, message): - super().__init__(message) + pass + class RolesEnum(str, Enum): POWER_USER = 'power_user' diff --git a/server/npg/porchdb/auth.py b/server/npg/porchdb/auth.py index 7b797fb..3d30ef5 100644 --- a/server/npg/porchdb/auth.py +++ b/server/npg/porchdb/auth.py @@ -31,8 +31,7 @@ r'\A[0-9A-F]+\Z', flags = re.ASCII | re.IGNORECASE) class CredentialsValidationException(Exception): - def __init__(self, message): - super().__init__(message) + pass class Validator: From 03e0da102a6faea67fe09759b69e58f428a7ac01 Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Tue, 15 Mar 2022 10:49:50 +0000 Subject: [PATCH 17/37] Update the user guide to describe token use. --- docs/user_guide.md | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/docs/user_guide.md b/docs/user_guide.md index e6ab199..ec3a3f1 100644 --- a/docs/user_guide.md +++ b/docs/user_guide.md @@ -20,6 +20,14 @@ We have tried to make interactions with npg_porch as atomic as possible, so the Security is necessary in order to prevent accidental misuse of npg_porch. An authorisation token can be provided to you by the maintainers, which you will then use to enable each request. Not implemented yet! +A note on HTTPS: Client libraries like `requests`, certain GUIs and Firefox will try to verify the server certificate authority. System-administered software are already configured correctly. Others may need to be told where this certificate is, e.g. `/usr/share/ca-certificates/` + +### Step 0 - get issued security tokens + +Access to the service is loosely controlled with authorisation tokens. You will be issued with an admin token that enables you to register pipelines, and further tokens for pipeline-specific communication. Please do not share the tokens around and use them for purposes besides the specific pipeline. This will help us to monitor pipeline reliability and quality of service. Authorisation is achieved by HTTP Bearer Token: + +`curl -L -H "Authorization: Bearer $TOKEN" https://$SERVER:$PORT` + ### Step 1 - register your pipeline with npg_porch *Schema: npg.porch.model.pipeline* @@ -38,7 +46,7 @@ You can name your pipeline however you like, but the name must be unique, and be } ``` -`url='npg_porch_server.sanger.ac.uk/pipelines'; curl -L -XPOST ${url} -H "content-type: application/json" -w " %{http_code}" -d @pipeline-def.json` +`url='$SERVER:$PORT/pipelines'; curl -L -XPOST ${url} -H "content-type: application/json" -H "Authorization: Bearer $ADMIN_TOKEN" -w " %{http_code}" -d @pipeline-def.json` Keep this pipeline definition with your data, as you will need it to tell npg_porch which pipeline you are acting on. @@ -110,9 +118,9 @@ Note that it is possible to run the same `task_input` with a different `pipeline Now you want the pipeline to run once per specification, and so register the documents with npg_porch. ```bash -url='npg_porch_server.sanger.ac.uk/tasks' +url='$SERVER:$PORT/tasks' for DOC in *.json; do - response=$(curl -w '%{http_code}' -L -XPOST ${url} -H "content-type: application/json" -d @${DOC}`) + response=$(curl -w '%{http_code}' -L -XPOST ${url} -H "content-type: application/json" -H "Authorization: Bearer $TOKEN" -d @${DOC}`) # parsing the response is left as an exercise for the reader... if [[ "$response_code" ne 201]]; then @@ -128,7 +136,7 @@ use HTTP::Request; use LWP::UserAgent; my $ua = LWP::UserAgent->new; -my $request = HTTP::Request->new(POST => 'npg_porch_server.sanger.ac.uk/tasks'); +my $request = HTTP::Request->new(POST => '$SERVER:$PORT/tasks'); $request->content_type('application/json'); $request->header(Accept => 'application/json'); $request->content($DOC); @@ -180,8 +188,8 @@ Supposing there are new tasks created every 24 hours, we then also need a client Using the "claim" interface, you can ask npg_porch to earmark tasks that you intend to run. Others will remain unclaimed until this script or another claims them. Generally speaking, tasks are first-in first-out, so the first task you get if you claim one is the first unclaimed task npg_porch was told about. ```bash -url='npg_porch_server.sanger.ac.uk/tasks/claim' -response=$(curl -L -I -XPOST ${url} -H "content-type: application/json" -d @pipeline-def.json) +url='$SERVER:$PORT/tasks/claim' +response=$(curl -L -I -XPOST ${url} -H "content-type: application/json" -H "Authorization: Bearer $TOKEN" -d @pipeline-def.json) ``` Response body: @@ -216,9 +224,10 @@ or use JSON qw/decode_json/; my $ua = LWP::UserAgent->new; -my $request = HTTP::Request->new(POST => 'npg_porch_server.sanger.ac.uk/tasks/claim'); +my $request = HTTP::Request->new(POST => '$SERVER:$PORT/tasks/claim'); $request->content_type('application/json'); $request->header(Accept => 'application/json'); +$request->header(Authorization => "Bearer $TOKEN") my $response = $ua->request($request); if ($response->is_success) { From 10e518f248bb0579fdd6a8d41e56427ddaf2655b Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Tue, 15 Mar 2022 14:02:11 +0000 Subject: [PATCH 18/37] Addendum about proxies --- docs/user_guide.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/user_guide.md b/docs/user_guide.md index ec3a3f1..f77f3cb 100644 --- a/docs/user_guide.md +++ b/docs/user_guide.md @@ -18,9 +18,9 @@ Bash tools like `jq` and `jo` can be useful in working with the server, as all m We have tried to make interactions with npg_porch as atomic as possible, so the data you send and the data you receive follow the same schema. -Security is necessary in order to prevent accidental misuse of npg_porch. An authorisation token can be provided to you by the maintainers, which you will then use to enable each request. Not implemented yet! +Security is necessary in order to prevent accidental misuse of npg_porch. An authorisation token can be provided to you by the maintainers, which you will then use to enable each request. -A note on HTTPS: Client libraries like `requests`, certain GUIs and Firefox will try to verify the server certificate authority. System-administered software are already configured correctly. Others may need to be told where this certificate is, e.g. `/usr/share/ca-certificates/` +A note on HTTPS: Client libraries like `requests`, certain GUIs and Firefox will try to verify the server certificate authority. System-administered software are already configured correctly, but other packages installed by conda or pip may need to be told how the client may verify with a client certificate e.g. contained in `/usr/share/ca-certificates/`. It may also be useful to unset `https_proxy` and `HTTPS_PROXY` in your environment. ### Step 0 - get issued security tokens From 75e1b15898e0b0e744693990c030186642bd1ebe Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Tue, 15 Mar 2022 14:28:08 +0000 Subject: [PATCH 19/37] Mask some tool baggage --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 9c5ed95..a2db668 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,6 @@ __pycache__ *.egg-info .vscode -.eggs \ No newline at end of file +.eggs +build +.pytest_cache From 97f05d608dd6ed551efb5e2046a83a0c001494b6 Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Thu, 17 Mar 2022 15:22:01 +0000 Subject: [PATCH 20/37] Big refactor to make package install and be testable in develop and regular modes --- README.md | 2 +- pyproject.toml | 3 + {server => scripts}/deploy_schema.py | 0 scripts/issue_token.py | 75 +++++++++++++++++++ server/{tests => }/__init__.py | 0 server/{ => npg}/main.py | 0 setup.py | 8 +- {server/tests/fixtures => tests}/__init__.py | 0 {server/tests => tests}/conftest.py | 0 {server/tests => tests}/data_access_test.py | 0 {server/tests => tests}/db_auth_test.py | 0 {server/tests => tests}/db_task_test.py | 0 {server/tests => tests}/db_token_test.py | 0 tests/fixtures/__init__.py | 0 {server/tests => tests}/fixtures/deploy_db.py | 2 +- .../tests => tests}/fixtures/orm_session.py | 0 {server/tests => tests}/init_test.py | 0 .../tests => tests}/model_permission_test.py | 0 .../tests => tests}/pipeline_route_test.py | 0 {server/tests => tests}/task_route_test.py | 0 20 files changed, 84 insertions(+), 6 deletions(-) create mode 100644 pyproject.toml rename {server => scripts}/deploy_schema.py (100%) create mode 100755 scripts/issue_token.py rename server/{tests => }/__init__.py (100%) rename server/{ => npg}/main.py (100%) rename {server/tests/fixtures => tests}/__init__.py (100%) rename {server/tests => tests}/conftest.py (100%) rename {server/tests => tests}/data_access_test.py (100%) rename {server/tests => tests}/db_auth_test.py (100%) rename {server/tests => tests}/db_task_test.py (100%) rename {server/tests => tests}/db_token_test.py (100%) create mode 100644 tests/fixtures/__init__.py rename {server/tests => tests}/fixtures/deploy_db.py (99%) rename {server/tests => tests}/fixtures/orm_session.py (100%) rename {server/tests => tests}/init_test.py (100%) rename {server/tests => tests}/model_permission_test.py (100%) rename {server/tests => tests}/pipeline_route_test.py (100%) rename {server/tests => tests}/task_route_test.py (100%) diff --git a/README.md b/README.md index 3aa8642..158e738 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ cd server mkdir -p logs export DB_URL=postgresql+asyncpg://npg_rw:$PASS@npg_porch_db:$PORT/$DATABASE export DB_SCHEMA='non_default' -uvicorn main:app --host 0.0.0.0 --port 8080 --reload --log-config logging.json +uvicorn npg.main:app --host 0.0.0.0 --port 8080 --reload --log-config logging.json ``` and open your browser at `http://localhost:8080` to see links to the docs. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..6402e6c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools>=60"] +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/server/deploy_schema.py b/scripts/deploy_schema.py similarity index 100% rename from server/deploy_schema.py rename to scripts/deploy_schema.py diff --git a/scripts/issue_token.py b/scripts/issue_token.py new file mode 100755 index 0000000..cf05de2 --- /dev/null +++ b/scripts/issue_token.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python + +import argparse +from sqlalchemy import create_engine, select +from sqlalchemy.orm import sessionmaker +from sqlalchemy.orm.exc import NoResultFound + +from npg.porchdb.models import Token, Pipeline + +parser = argparse.ArgumentParser( + description='Creates a token in the backend DB and returns it' +) + +parser.add_argument( + '-h', '--host', help='Postgres host', required=True +) +parser.add_argument( + '-d', '--database', help='Postgres database', default='npg_porch' +) +parser.add_argument( + '-s', '--schema', help='Postgres schema', default='npg_porch' +) +parser.add_argument( + '-u', '--user', help='Postgres rw user', required=True +) +parser.add_argument( + '-p', '--pass', help='Postgres rw password', required=True +) +parser.add_argument( + '-P', '--port', help='Postgres port', required=True +) +parser.add_argument( + '-n', '--pipeline', help='Pipeline name. If given, create ' +) +parser.add_argument( + '-D', '--description', help='Description of token purpose', required=True +) + +args = parser.parse_args() + + +db_url = f'postgresql+psycopg2://{args["user"]}:{args["pass"]}@{args["host"]}:{args["port"]}/{args["database"]}' + +engine = create_engine(db_url) +SessionFactory = sessionmaker(bind=engine) +session = SessionFactory() + +token = None +pipeline = None + +if args["pipeline"]: + try: + pipeline = session.execute( + select(Pipeline) + .where(Pipeline.name == args["pipeline"]) + ).scalar_one() + except NoResultFound: + raise Exception( + 'Pipeline with name {} not found in database'.format(args["pipeline"]) + ) + + token = Token( + pipeline=pipeline, + description=args["description"] + ) +else: + token = Token(description=args["description"]) + +session.add(token) +session.commit() + +print(token.token) + +session.close() +engine.dispose() diff --git a/server/tests/__init__.py b/server/__init__.py similarity index 100% rename from server/tests/__init__.py rename to server/__init__.py diff --git a/server/main.py b/server/npg/main.py similarity index 100% rename from server/main.py rename to server/npg/main.py diff --git a/setup.py b/setup.py index 4247863..a245010 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -from setuptools import setup +from setuptools import find_packages, setup from distutils.util import convert_path version_path = convert_path('server/npg/version.py') @@ -7,12 +7,12 @@ exec(ver_file.read(), namespace) setup( - name='npg-porch', + name='npg_porch', version=namespace['__version__'], package_dir={ - 'npg': './server' + '': 'server', }, - packages=['npg'], + packages=find_packages('server'), license='GNU General Public License v3.0', author='Wellcome Sanger Institute', author_email='npg@sanger.ac.uk', diff --git a/server/tests/fixtures/__init__.py b/tests/__init__.py similarity index 100% rename from server/tests/fixtures/__init__.py rename to tests/__init__.py diff --git a/server/tests/conftest.py b/tests/conftest.py similarity index 100% rename from server/tests/conftest.py rename to tests/conftest.py diff --git a/server/tests/data_access_test.py b/tests/data_access_test.py similarity index 100% rename from server/tests/data_access_test.py rename to tests/data_access_test.py diff --git a/server/tests/db_auth_test.py b/tests/db_auth_test.py similarity index 100% rename from server/tests/db_auth_test.py rename to tests/db_auth_test.py diff --git a/server/tests/db_task_test.py b/tests/db_task_test.py similarity index 100% rename from server/tests/db_task_test.py rename to tests/db_task_test.py diff --git a/server/tests/db_token_test.py b/tests/db_token_test.py similarity index 100% rename from server/tests/db_token_test.py rename to tests/db_token_test.py diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/server/tests/fixtures/deploy_db.py b/tests/fixtures/deploy_db.py similarity index 99% rename from server/tests/fixtures/deploy_db.py rename to tests/fixtures/deploy_db.py index 09c10a7..1a0d966 100644 --- a/server/tests/fixtures/deploy_db.py +++ b/tests/fixtures/deploy_db.py @@ -8,7 +8,7 @@ ) from npg.porchdb.data_access import AsyncDbAccessor from npg.porch.models import Task as ModelledTask, TaskStateEnum -from main import app +from npg.main import app @pytest.fixture def minimum_data(): diff --git a/server/tests/fixtures/orm_session.py b/tests/fixtures/orm_session.py similarity index 100% rename from server/tests/fixtures/orm_session.py rename to tests/fixtures/orm_session.py diff --git a/server/tests/init_test.py b/tests/init_test.py similarity index 100% rename from server/tests/init_test.py rename to tests/init_test.py diff --git a/server/tests/model_permission_test.py b/tests/model_permission_test.py similarity index 100% rename from server/tests/model_permission_test.py rename to tests/model_permission_test.py diff --git a/server/tests/pipeline_route_test.py b/tests/pipeline_route_test.py similarity index 100% rename from server/tests/pipeline_route_test.py rename to tests/pipeline_route_test.py diff --git a/server/tests/task_route_test.py b/tests/task_route_test.py similarity index 100% rename from server/tests/task_route_test.py rename to tests/task_route_test.py From 253de7dd717ca0cbeaa2616016e57f596470c0dc Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Tue, 22 Mar 2022 17:31:07 +0000 Subject: [PATCH 21/37] Make scripts and server correctly use postgres schema --- .gitignore | 1 + README.md | 11 ++++++----- scripts/deploy_schema.py | 7 ++++++- scripts/issue_token.py | 18 +++++++++--------- server/npg/porchdb/connection.py | 9 ++++++++- 5 files changed, 30 insertions(+), 16 deletions(-) diff --git a/.gitignore b/.gitignore index a2db668..b77136a 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ __pycache__ .eggs build .pytest_cache +.vscode diff --git a/README.md b/README.md index 158e738..37de3cd 100644 --- a/README.md +++ b/README.md @@ -92,11 +92,15 @@ Create a schema on a postgres server: ```bash psql --host=npg_porch_db --port=$PORT --username=npg_admin --password -d postgres +``` -CREATE SCHEMA npg_porch +```sql +CREATE SCHEMA npg_porch; +SET search_path = npg_porch, public; +GRANT USAGE ON SCHEMA npg_porch TO npgtest_ro, npgtest_rw; ``` -Then run a script that deploys the ORM to this schema +The SET command ensures that the new schema is visible _for one session only_ in the `\d*` commands you might use in psql. Then run a script that deploys the ORM to this schema ```bash DB=npg_porch @@ -110,7 +114,6 @@ psql --host=npg_porch_db --port=$PORT --username=npg_admin --password -d $DB Permissions must be granted to the npg_rw and npg_ro users to the newly created schema ```sql -GRANT USAGE ON SCHEMA npg_porch TO npgtest_ro, npgtest_rw; GRANT USAGE ON ALL SEQUENCES IN SCHEMA npg_porch TO npgtest_rw; GRANT SELECT ON ALL TABLES IN SCHEMA npg_porch TO npgtest_ro; @@ -119,6 +122,4 @@ GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA npg_porch TO npgtes Note that granting usage on sequences is required to allow autoincrement columns to work during an insert. This is a trick of newer Postgres versions. -It may prove necessary to `GRANT` to specific named tables and sequences. Under specific circumstances the `ALL TABLES` qualifier doesn't work. - Until token support is implemented, a row will need to be inserted manually into the token table. Otherwise none of the event logging works. diff --git a/scripts/deploy_schema.py b/scripts/deploy_schema.py index 53d8eed..9f56d71 100644 --- a/scripts/deploy_schema.py +++ b/scripts/deploy_schema.py @@ -10,7 +10,12 @@ if schema_name is None: schema_name = 'npg_porch' -engine = sqlalchemy.create_engine(db_url) +print(f'Deploying npg_porch tables to schema {schema_name}') + +engine = sqlalchemy.create_engine( + db_url, + connect_args={'options': f'-csearch_path={schema_name}'} +) npg.porchdb.models.Base.metadata.schema = schema_name npg.porchdb.models.Base.metadata.create_all(engine) diff --git a/scripts/issue_token.py b/scripts/issue_token.py index cf05de2..b31e4db 100755 --- a/scripts/issue_token.py +++ b/scripts/issue_token.py @@ -12,7 +12,7 @@ ) parser.add_argument( - '-h', '--host', help='Postgres host', required=True + '-H', '--host', help='Postgres host', required=True ) parser.add_argument( '-d', '--database', help='Postgres database', default='npg_porch' @@ -24,7 +24,7 @@ '-u', '--user', help='Postgres rw user', required=True ) parser.add_argument( - '-p', '--pass', help='Postgres rw password', required=True + '-p', '--password', help='Postgres rw password', required=True ) parser.add_argument( '-P', '--port', help='Postgres port', required=True @@ -39,32 +39,32 @@ args = parser.parse_args() -db_url = f'postgresql+psycopg2://{args["user"]}:{args["pass"]}@{args["host"]}:{args["port"]}/{args["database"]}' +db_url = f'postgresql+psycopg2://{args.user}:{args.password}@{args.host}:{args.port}/{args.database}' -engine = create_engine(db_url) +engine = create_engine(db_url, connect_args={'options': f'-csearch_path={args.schema}'}) SessionFactory = sessionmaker(bind=engine) session = SessionFactory() token = None pipeline = None -if args["pipeline"]: +if args.pipeline: try: pipeline = session.execute( select(Pipeline) - .where(Pipeline.name == args["pipeline"]) + .where(Pipeline.name == args.pipeline) ).scalar_one() except NoResultFound: raise Exception( - 'Pipeline with name {} not found in database'.format(args["pipeline"]) + 'Pipeline with name {} not found in database'.format(args.pipeline) ) token = Token( pipeline=pipeline, - description=args["description"] + description=args.description ) else: - token = Token(description=args["description"]) + token = Token(description=args.description) session.add(token) session.commit() diff --git a/server/npg/porchdb/connection.py b/server/npg/porchdb/connection.py index 04e655c..5ac225c 100644 --- a/server/npg/porchdb/connection.py +++ b/server/npg/porchdb/connection.py @@ -41,8 +41,15 @@ "ENV['DB_URL'] must be set with a database URL, or NPG_PORCH_MODE must be set for testing" ) + +# asyncpg driver receives options differently to psycopg +# Embed them inside a server_settings dict engine = create_async_engine( - config['DB_URL'], future=True + config['DB_URL'], + future=True, + connect_args={ + 'server_settings': {'options': '-csearch_path={}'.format(config["DB_SCHEMA"])} + } ) Base.metadata.schema = config['DB_SCHEMA'] session_factory = sessionmaker( From 5282e51e0bae8c3b6fd8d962716bbd6aa446d411 Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Wed, 23 Mar 2022 16:23:24 +0000 Subject: [PATCH 22/37] Prevent tests from receiving postgres driver options --- server/npg/porchdb/connection.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/server/npg/porchdb/connection.py b/server/npg/porchdb/connection.py index 5ac225c..2c4b7a2 100644 --- a/server/npg/porchdb/connection.py +++ b/server/npg/porchdb/connection.py @@ -44,12 +44,17 @@ # asyncpg driver receives options differently to psycopg # Embed them inside a server_settings dict +connect_args = {} +if config['TEST'] is None: + connect_args = { + 'server_settings': { + 'options': '-csearch_path={}'.format(config["DB_SCHEMA"]) + } + } engine = create_async_engine( config['DB_URL'], future=True, - connect_args={ - 'server_settings': {'options': '-csearch_path={}'.format(config["DB_SCHEMA"])} - } + connect_args=connect_args ) Base.metadata.schema = config['DB_SCHEMA'] session_factory = sessionmaker( From 59453b652e3a9923acf90bf713fe3bdb47c44ae1 Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Thu, 24 Mar 2022 11:25:02 +0000 Subject: [PATCH 23/37] Make user guide refer to https wherever possible --- docs/user_guide.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/user_guide.md b/docs/user_guide.md index f77f3cb..73e50ba 100644 --- a/docs/user_guide.md +++ b/docs/user_guide.md @@ -46,7 +46,7 @@ You can name your pipeline however you like, but the name must be unique, and be } ``` -`url='$SERVER:$PORT/pipelines'; curl -L -XPOST ${url} -H "content-type: application/json" -H "Authorization: Bearer $ADMIN_TOKEN" -w " %{http_code}" -d @pipeline-def.json` +`url='https://$SERVER:$PORT/pipelines'; curl -L -XPOST ${url} -H "content-type: application/json" -H "Authorization: Bearer $ADMIN_TOKEN" -w " %{http_code}" -d @pipeline-def.json` Keep this pipeline definition with your data, as you will need it to tell npg_porch which pipeline you are acting on. @@ -118,7 +118,7 @@ Note that it is possible to run the same `task_input` with a different `pipeline Now you want the pipeline to run once per specification, and so register the documents with npg_porch. ```bash -url='$SERVER:$PORT/tasks' +url='https://$SERVER:$PORT/tasks' for DOC in *.json; do response=$(curl -w '%{http_code}' -L -XPOST ${url} -H "content-type: application/json" -H "Authorization: Bearer $TOKEN" -d @${DOC}`) @@ -136,7 +136,7 @@ use HTTP::Request; use LWP::UserAgent; my $ua = LWP::UserAgent->new; -my $request = HTTP::Request->new(POST => '$SERVER:$PORT/tasks'); +my $request = HTTP::Request->new(POST => 'https://$SERVER:$PORT/tasks'); $request->content_type('application/json'); $request->header(Accept => 'application/json'); $request->content($DOC); @@ -188,7 +188,7 @@ Supposing there are new tasks created every 24 hours, we then also need a client Using the "claim" interface, you can ask npg_porch to earmark tasks that you intend to run. Others will remain unclaimed until this script or another claims them. Generally speaking, tasks are first-in first-out, so the first task you get if you claim one is the first unclaimed task npg_porch was told about. ```bash -url='$SERVER:$PORT/tasks/claim' +url='https://$SERVER:$PORT/tasks/claim' response=$(curl -L -I -XPOST ${url} -H "content-type: application/json" -H "Authorization: Bearer $TOKEN" -d @pipeline-def.json) ``` @@ -224,7 +224,7 @@ or use JSON qw/decode_json/; my $ua = LWP::UserAgent->new; -my $request = HTTP::Request->new(POST => '$SERVER:$PORT/tasks/claim'); +my $request = HTTP::Request->new(POST => 'https://$SERVER:$PORT/tasks/claim'); $request->content_type('application/json'); $request->header(Accept => 'application/json'); $request->header(Authorization => "Bearer $TOKEN") From 9f4b71be1a25dd2a9ee4dffcc177725c51ffe65c Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Fri, 8 Apr 2022 18:05:32 +0100 Subject: [PATCH 24/37] Complete get_tasks coverage and allow filtering by pipeline name and task status --- server/npg/porch/endpoints/tasks.py | 16 +++++---- server/npg/porchdb/data_access.py | 23 ++++++++++--- tests/data_access_test.py | 53 +++++++++++++++++++++++++++++ tests/task_route_test.py | 39 +++++++++++++++++++++ 4 files changed, 119 insertions(+), 12 deletions(-) diff --git a/server/npg/porch/endpoints/tasks.py b/server/npg/porch/endpoints/tasks.py index 49a71d9..4f7e0b9 100644 --- a/server/npg/porch/endpoints/tasks.py +++ b/server/npg/porch/endpoints/tasks.py @@ -21,13 +21,13 @@ import logging from fastapi import APIRouter, HTTPException, Depends from pydantic import PositiveInt -from typing import List +from typing import List, Optional from sqlalchemy.exc import IntegrityError from sqlalchemy.orm.exc import NoResultFound from starlette import status from npg.porch.models.pipeline import Pipeline -from npg.porch.models.task import Task +from npg.porch.models.task import Task, TaskStateEnum from npg.porch.models.permission import PermissionValidationException from npg.porchdb.connection import get_DbAccessor from npg.porch.auth.token import validate @@ -62,17 +62,19 @@ def _validate_request(permission, pipeline): @router.get( "/", response_model=List[Task], - summary="Returns all tasks.", + summary="Returns all tasks, and can be filtered to task status or pipeline name", description=''' - Return all tasks. A filter will be applied if used in the query. - The filter feature is not yet implemented.''' + Return all tasks. The list of tasks can be filtered by supplying a pipeline + name and/or task status''' ) async def get_tasks( + pipeline_name: Optional[str] = None, + status: Optional[TaskStateEnum] = None, db_accessor=Depends(get_DbAccessor), permission=Depends(validate) ) -> List[Task]: - - return await db_accessor.get_tasks() + print(pipeline_name, status) + return await db_accessor.get_tasks(pipeline_name=pipeline_name, task_status=status) @router.post( diff --git a/server/npg/porchdb/data_access.py b/server/npg/porchdb/data_access.py index 49bb7b3..d36e2d9 100644 --- a/server/npg/porchdb/data_access.py +++ b/server/npg/porchdb/data_access.py @@ -194,14 +194,27 @@ async def update_task(self, token_id: int, task: Task) -> Task: return og_task.convert_to_model() - async def get_tasks(self) -> List[Task]: + async def get_tasks( + self, + pipeline_name: Optional[str] = None, + task_status: Optional[TaskStateEnum] = None + ) -> List[Task]: ''' - Gets all the tasks. Going to be problematic without filtering + Gets all the tasks. + + Can filter tasks by pipeline name and task status in order to be more useful. ''' - task_result = await self.session.execute( - select(DbTask) + query = select(DbTask)\ + .join(DbTask.pipeline)\ .options(joinedload(DbTask.pipeline)) - ) + + if pipeline_name: + query = query.where(DbPipeline.name == pipeline_name) + + if task_status: + query = query.filter(DbTask.state == task_status) + + task_result = await self.session.execute(query) tasks = task_result.scalars().all() return [t.convert_to_model() for t in tasks] diff --git a/tests/data_access_test.py b/tests/data_access_test.py index 5a23c37..c463b0c 100644 --- a/tests/data_access_test.py +++ b/tests/data_access_test.py @@ -227,3 +227,56 @@ async def test_update_tasks(db_accessor): with pytest.raises(Exception) as exception: await db_accessor.update_task(1, saved_task) assert exception.value == 'Cannot change task definition. Submit a new task instead' + + +@pytest.mark.asyncio +async def test_get_tasks(db_accessor): + all_tasks = await db_accessor.get_tasks() + + assert len(all_tasks) == 2, 'All tasks currently includes two for one pipeline from the fixture' + + tasks = await db_accessor.get_tasks(pipeline_name = 'ptest one') + + assert tasks == all_tasks, 'Filtering by pipeline name gives the same result' + + tasks = await db_accessor.get_tasks(task_status = TaskStateEnum.FAILED) + assert len(tasks) == 0, 'No failed tasks yet' + + # Create an additional pipeline and tasks + + pipeline = await store_me_a_pipeline(db_accessor, 2) + + for i in range(3): + await db_accessor.create_task( + token_id=1, + task=Task( + task_input={'number': i + 1}, + pipeline=pipeline + ) + ) + + all_tasks = await db_accessor.get_tasks() + assert len(all_tasks) == 5, 'Now we have five tasks in two pipelines' + + tasks = await db_accessor.get_tasks(pipeline_name = 'ptest one') + assert len(tasks) == 2, 'New tasks filtered out by pipeline name' + assert tasks[0].pipeline.name == 'ptest one' + + # Change one task to another status + await db_accessor.update_task( + token_id = 1, + task=Task( + task_input={'number': 3}, + pipeline=pipeline, + status=TaskStateEnum.DONE + ) + ) + + tasks = await db_accessor.get_tasks(task_status = TaskStateEnum.DONE) + assert len(tasks) == 1, 'Not done tasks are filtered' + assert tasks[0].task_input == {'number': 3}, 'Leaving only one' + + # Check interaction of both constraints + + tasks = await db_accessor.get_tasks(pipeline_name='ptest one', task_status=TaskStateEnum.DONE) + assert len(tasks) == 0, 'Pipeline "ptest one" has no DONE tasks' diff --git a/tests/task_route_test.py b/tests/task_route_test.py index 50437a9..54fccd1 100644 --- a/tests/task_route_test.py +++ b/tests/task_route_test.py @@ -175,3 +175,42 @@ def test_task_claim(async_minimum, async_tasks, fastapi_testclient): assert response.status_code == status.HTTP_200_OK tasks = response.json() assert len(tasks) == 0, 'Tried to claim, did not get any tasks' + +def test_get_tasks(async_minimum, async_tasks, fastapi_testclient): + response = fastapi_testclient.get('/tasks') + assert response.status_code == status.HTTP_403_FORBIDDEN, 'Need a token to see any tasks' + + response = fastapi_testclient.get('/tasks', headers=headers4ptest_one) + assert response.status_code == status.HTTP_200_OK, 'Authorised task fetching' + tasks = response.json() + + unique_pipelines = {t['pipeline']['name'] for t in tasks} + + assert 'ptest one' in unique_pipelines, 'Tasks for pipeline present with relevant token' + assert 'ptest some' in unique_pipelines, 'Tasks for other pipelines also present with token' + + response = fastapi_testclient.get( + '/tasks?pipeline_name=ptest one', + headers=headers4ptest_one + ) + assert response.status_code == status.HTTP_200_OK, 'One optional argument works' + tasks = response.json() + assert len(tasks) == 2, 'Most tasks now filtered' + assert {t['pipeline']['name'] for t in tasks} == {'ptest one'}, 'All tasks belong to pipeline' + + response = fastapi_testclient.get( + '/tasks?status=PENDING', + headers=headers4ptest_one + ) + assert response.status_code == status.HTTP_200_OK, 'Other optional argument works' + tasks = response.json() + assert len(tasks) == 10, 'Ten pending tasks selected' + + response = fastapi_testclient.get( + '/tasks?pipeline_name="ptest one"&status=PENDING', + headers=headers4ptest_one + ) + assert response.status_code == status.HTTP_200_OK, 'Both arguments together work' + print(response.text) + tasks = response.json() + assert len(tasks) == 0, 'but no tasks are returned that match status and pipeline' From 90dc184fe74ba5510499b8357bca260477be0d7b Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Tue, 13 Dec 2022 15:16:46 +0000 Subject: [PATCH 25/37] Newer starlette releases require httpx, and redirect handling has changed --- setup.py | 4 +++- tests/pipeline_route_test.py | 10 +++++----- tests/task_route_test.py | 12 ++++++------ 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/setup.py b/setup.py index a245010..1aa60ed 100644 --- a/setup.py +++ b/setup.py @@ -24,6 +24,7 @@ 'pydantic', 'pysqlite3', 'psycopg2-binary', + 'starlette>=0.22.0', 'sqlalchemy>=1.4.29', 'ujson', 'uvicorn', @@ -34,7 +35,8 @@ 'pytest', 'pytest-asyncio', 'requests', - 'flake8' + 'flake8', + 'httpx' ] } ) diff --git a/tests/pipeline_route_test.py b/tests/pipeline_route_test.py index 702ad27..7d3502e 100644 --- a/tests/pipeline_route_test.py +++ b/tests/pipeline_route_test.py @@ -16,18 +16,18 @@ def http_create_pipeline(fastapi_testclient, pipeline): response = fastapi_testclient.post( - '/pipelines', json=pipeline.dict(), allow_redirects=True + '/pipelines', json=pipeline.dict(), follow_redirects=True ) assert response.status_code == status.HTTP_403_FORBIDDEN response = fastapi_testclient.post( - '/pipelines', json=pipeline.dict(), allow_redirects=True, + '/pipelines', json=pipeline.dict(), follow_redirects=True, headers=headers ) assert response.status_code == status.HTTP_403_FORBIDDEN response = fastapi_testclient.post( - '/pipelines', json=pipeline.dict(), allow_redirects=True, + '/pipelines', json=pipeline.dict(), follow_redirects=True, headers=headers4power_user ) assert response.status_code == status.HTTP_201_CREATED @@ -116,7 +116,7 @@ def test_create_pipeline(async_minimum, fastapi_testclient): response = fastapi_testclient.post( '/pipelines', json=desired_pipeline.dict(), - allow_redirects=True, + follow_redirects=True, headers=headers4power_user ) assert response.status_code == status.HTTP_409_CONFLICT, 'ptest two already in DB' @@ -148,7 +148,7 @@ def test_create_pipeline(async_minimum, fastapi_testclient): response = fastapi_testclient.post( '/pipelines', json=third_desired_pipeline.dict(), - allow_redirects=True, + follow_redirects=True, headers=headers4power_user ) assert response.status_code == status.HTTP_400_BAD_REQUEST diff --git a/tests/task_route_test.py b/tests/task_route_test.py index 54fccd1..b33cf97 100644 --- a/tests/task_route_test.py +++ b/tests/task_route_test.py @@ -28,7 +28,7 @@ def test_task_creation(async_minimum, fastapi_testclient): response = fastapi_testclient.post( 'tasks', json=task_one.dict(), - allow_redirects=True, + follow_redirects=True, headers=headers4ptest_one ) assert response.status_code == status.HTTP_201_CREATED @@ -38,7 +38,7 @@ def test_task_creation(async_minimum, fastapi_testclient): response = fastapi_testclient.post( 'tasks', json=task_one.dict(), - allow_redirects=True, + follow_redirects=True, headers=headers4ptest_one ) assert response.status_code == status.HTTP_409_CONFLICT @@ -56,7 +56,7 @@ def test_task_creation(async_minimum, fastapi_testclient): response = fastapi_testclient.post( 'tasks', json=task_two.dict(), - allow_redirects=True, + follow_redirects=True, headers=headers4ptest_one ) assert response.status_code == status.HTTP_403_FORBIDDEN @@ -71,7 +71,7 @@ def test_task_update(async_minimum, fastapi_testclient): response = fastapi_testclient.put( '/tasks', json=task, - allow_redirects=True, + follow_redirects=True, headers=headers4ptest_one ) assert response.status_code == status.HTTP_200_OK @@ -86,7 +86,7 @@ def test_task_update(async_minimum, fastapi_testclient): response = fastapi_testclient.put( '/tasks', json=modified_task.dict(), - allow_redirects=True, + follow_redirects=True, headers=headers4ptest_one ) assert response.status_code == status.HTTP_404_NOT_FOUND @@ -101,7 +101,7 @@ def test_task_update(async_minimum, fastapi_testclient): response = fastapi_testclient.put( '/tasks', json=modified_task.dict(), - allow_redirects=True, + follow_redirects=True, headers=headers4ptest_one ) assert response.status_code == status.HTTP_403_FORBIDDEN From cfab67a458ab65ef004ae169a66ea2e2348a3f12 Mon Sep 17 00:00:00 2001 From: Keith James <47220353+kjsanger@users.noreply.github.com> Date: Fri, 16 Dec 2022 12:42:38 +0000 Subject: [PATCH 26/37] Move from checkout@v2 to @v3 --- .github/workflows/python-app.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 98485d9..041de38 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python 3.10 uses: actions/setup-python@v2 with: From 6b36e5cc23a6fca26b770ef93c7b882b36db4451 Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Tue, 15 Aug 2023 14:01:01 +0000 Subject: [PATCH 27/37] Prevent sqlalchemy 2, pydantic 2 from installing --- setup.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index a245010..9ad37d1 100644 --- a/setup.py +++ b/setup.py @@ -21,10 +21,11 @@ 'aiosqlite', 'asyncpg', 'fastapi', - 'pydantic', + 'httpx', # missing dep for another dep + 'pydantic<2', 'pysqlite3', 'psycopg2-binary', - 'sqlalchemy>=1.4.29', + 'sqlalchemy>=1.4.29,<2', 'ujson', 'uvicorn', 'uuid' From dc3861947f31b86ed70ce77ca2f511af49489c2d Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Wed, 26 Jul 2023 16:51:37 +0100 Subject: [PATCH 28/37] Switch to pyproject file for package setup --- pyproject.toml | 36 ++++++++++++++++++++++++++++++++++-- setup.py | 33 +-------------------------------- 2 files changed, 35 insertions(+), 34 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6402e6c..7ca3ffc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,35 @@ [build-system] -requires = ["setuptools>=60"] -build-backend = "setuptools.build_meta" \ No newline at end of file +requires = ["setuptools>=61"] +build-backend = "setuptools.build_meta" + +[project] +name = "npg_porch" +requires-python = ">=3.8" +authors = [{name="Marina Gourtovaia", email="mg8@sanger.ac.uk"}, {name="Kieron Taylor", email="kt19@sanger.ac.uk"}] +description = "API server for tracking unique workflow executions" +readme = "README.md" +license = {file = "LICENSE.md"} +dependencies = [ + "aiosqlite", + "asyncpg", + "fastapi", + "pydantic < 2.0.0", + "pysqlite3", + "psycopg2-binary", + "sqlalchemy >= 1.4.29, <2", + "ujson", + "uvicorn", + "uuid" +] +dynamic = ["version"] + +[tools.setuptools] +packages = "find:" + +[project.optional-dependencies] +test = [ + "pytest", + "pytest-asyncio", + "requests", + "flake8" +] diff --git a/setup.py b/setup.py index addb276..88f955a 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -from setuptools import find_packages, setup +from setuptools import setup from distutils.util import convert_path version_path = convert_path('server/npg/version.py') @@ -7,36 +7,5 @@ exec(ver_file.read(), namespace) setup( - name='npg_porch', version=namespace['__version__'], - package_dir={ - '': 'server', - }, - packages=find_packages('server'), - license='GNU General Public License v3.0', - author='Wellcome Sanger Institute', - author_email='npg@sanger.ac.uk', - description='Work allocation and tracking for portable pipelines', - install_requires=[ - 'aiosqlite', - 'asyncpg', - 'fastapi', - 'pydantic<2', - 'pysqlite3', - 'psycopg2-binary', - 'sqlalchemy>=1.4.29,<2', - 'starlette>=0.22.0', - 'ujson', - 'uvicorn', - 'uuid' - ], - extras_require={ - 'test': [ - 'pytest', - 'pytest-asyncio', - 'requests', - 'flake8', - 'httpx' - ] - } ) From 36a996abc795429332f3633112b72cdf4a7e42b6 Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Tue, 22 Aug 2023 10:31:12 +0000 Subject: [PATCH 29/37] Use setup.cfg to find modules, remove redundant setup.py --- pyproject.toml | 3 --- server/npg/version.py | 1 - setup.cfg | 11 +++++++++++ setup.py | 11 ----------- 4 files changed, 11 insertions(+), 15 deletions(-) delete mode 100644 server/npg/version.py create mode 100644 setup.cfg delete mode 100644 setup.py diff --git a/pyproject.toml b/pyproject.toml index 7ca3ffc..d58b821 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,9 +23,6 @@ dependencies = [ ] dynamic = ["version"] -[tools.setuptools] -packages = "find:" - [project.optional-dependencies] test = [ "pytest", diff --git a/server/npg/version.py b/server/npg/version.py deleted file mode 100644 index b794fd4..0000000 --- a/server/npg/version.py +++ /dev/null @@ -1 +0,0 @@ -__version__ = '0.1.0' diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..ee151de --- /dev/null +++ b/setup.cfg @@ -0,0 +1,11 @@ +[metadata] +name = npg_porch +version = 0.1.0 + +[options] +package_dir = + =server +packages = find: + +[options.packages.find] +where=server \ No newline at end of file diff --git a/setup.py b/setup.py deleted file mode 100644 index 88f955a..0000000 --- a/setup.py +++ /dev/null @@ -1,11 +0,0 @@ -from setuptools import setup -from distutils.util import convert_path - -version_path = convert_path('server/npg/version.py') -namespace = {} -with open(version_path) as ver_file: - exec(ver_file.read(), namespace) - -setup( - version=namespace['__version__'], -) From c4ee8223e6c9b9376f7e8201d6c5dc6f8703fa22 Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Tue, 22 Aug 2023 10:38:01 +0000 Subject: [PATCH 30/37] Annoying httpx dependency absence --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d58b821..b94bb6f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,5 +28,6 @@ test = [ "pytest", "pytest-asyncio", "requests", - "flake8" + "flake8", + "httpx" ] From 497acdd1f5a46ecc5de65587c9ea83061e69bc46 Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Tue, 22 Aug 2023 10:51:33 +0000 Subject: [PATCH 31/37] Push to python 3.10 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b94bb6f..eade512 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "npg_porch" -requires-python = ">=3.8" +requires-python = ">=3.10" authors = [{name="Marina Gourtovaia", email="mg8@sanger.ac.uk"}, {name="Kieron Taylor", email="kt19@sanger.ac.uk"}] description = "API server for tracking unique workflow executions" readme = "README.md" From 010b0c04fb79b21086a71fa79f47c485d052f0b6 Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Tue, 22 Aug 2023 11:08:24 +0000 Subject: [PATCH 32/37] Compatibility upgrade to use sqlachemy 2 --- pyproject.toml | 2 +- server/npg/porchdb/connection.py | 1 - server/npg/porchdb/models/base.py | 2 +- tests/fixtures/orm_session.py | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index eade512..ba03a6f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ dependencies = [ "pydantic < 2.0.0", "pysqlite3", "psycopg2-binary", - "sqlalchemy >= 1.4.29, <2", + "sqlalchemy >2", "ujson", "uvicorn", "uuid" diff --git a/server/npg/porchdb/connection.py b/server/npg/porchdb/connection.py index 2c4b7a2..93fc692 100644 --- a/server/npg/porchdb/connection.py +++ b/server/npg/porchdb/connection.py @@ -53,7 +53,6 @@ } engine = create_async_engine( config['DB_URL'], - future=True, connect_args=connect_args ) Base.metadata.schema = config['DB_SCHEMA'] diff --git a/server/npg/porchdb/models/base.py b/server/npg/porchdb/models/base.py index 7d796e0..db40e83 100644 --- a/server/npg/porchdb/models/base.py +++ b/server/npg/porchdb/models/base.py @@ -18,6 +18,6 @@ # You should have received a copy of the GNU General Public License along with # this program. If not, see . -from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import declarative_base Base = declarative_base() diff --git a/tests/fixtures/orm_session.py b/tests/fixtures/orm_session.py index fb6dd10..7cdbb09 100644 --- a/tests/fixtures/orm_session.py +++ b/tests/fixtures/orm_session.py @@ -16,7 +16,7 @@ def sync_session(): ''' sqlite_url = 'sqlite+pysqlite:///:memory:' - engine = sqlalchemy.create_engine(sqlite_url, future=True) + engine = sqlalchemy.create_engine(sqlite_url) Base.metadata.create_all(engine) SessionFactory = sqlalchemy.orm.sessionmaker(bind=engine) sess = sqlalchemy.orm.scoped_session(SessionFactory) From f870e4888da8f4b27212cb19390ef9fe23c9e0dd Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Thu, 24 Aug 2023 14:33:55 +0000 Subject: [PATCH 33/37] Adjust pydantic use until tests pass. --- pyproject.toml | 2 +- server/npg/porch/endpoints/tasks.py | 4 ++-- server/npg/porch/models/permission.py | 11 ++++++----- server/npg/porch/models/task.py | 4 ++-- tests/model_permission_test.py | 6 +++--- tests/pipeline_route_test.py | 4 ++-- 6 files changed, 16 insertions(+), 15 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index eade512..a95e9e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ dependencies = [ "aiosqlite", "asyncpg", "fastapi", - "pydantic < 2.0.0", + "pydantic > 2.0.0", "pysqlite3", "psycopg2-binary", "sqlalchemy >= 1.4.29, <2", diff --git a/server/npg/porch/endpoints/tasks.py b/server/npg/porch/endpoints/tasks.py index 4f7e0b9..02d7e8c 100644 --- a/server/npg/porch/endpoints/tasks.py +++ b/server/npg/porch/endpoints/tasks.py @@ -1,4 +1,4 @@ -# Copyright (C) 2021, 2022 Genome Research Ltd. +# Copyright (C) 2021, 2022, 2023 Genome Research Ltd. # # Author: Kieron Taylor kt19@sanger.ac.uk # Author: Marina Gourtovaia mg8@sanger.ac.uk @@ -173,7 +173,7 @@ async def update_task( ) async def claim_task( pipeline: Pipeline, - num_tasks: PositiveInt = 1, + num_tasks: PositiveInt | None = 1, db_accessor=Depends(get_DbAccessor), permission=Depends(validate) ) -> List[Task]: diff --git a/server/npg/porch/models/permission.py b/server/npg/porch/models/permission.py index 09f1ab5..df5b62c 100644 --- a/server/npg/porch/models/permission.py +++ b/server/npg/porch/models/permission.py @@ -1,4 +1,4 @@ -# Copyright (C) 2022 Genome Research Ltd. +# Copyright (C) 2022. 2023 Genome Research Ltd. # # Author: Kieron Taylor kt19@sanger.ac.uk # Author: Marina Gourtovaia mg8@sanger.ac.uk @@ -19,7 +19,7 @@ # this program. If not, see . from enum import Enum -from pydantic import BaseModel, Field, validator +from pydantic import BaseModel, Field, field_validator, FieldValidationInfo from typing import Optional from npg.porch.models.pipeline import Pipeline @@ -49,11 +49,12 @@ class Permission(BaseModel): title = 'A role associated with the presented credentials', ) - @validator('role') - def no_pipeline4special_users(cls, v, values): + @field_validator('role') + @classmethod + def no_pipeline4special_users(cls, v: str, info: FieldValidationInfo): if (v == RolesEnum.POWER_USER - and ('pipeline' in values and values['pipeline'] is not None)): + and ('pipeline' in info.data and info.data['pipeline'] is not None)): raise ValueError('Power user cannot be associated with a pipeline') return v diff --git a/server/npg/porch/models/task.py b/server/npg/porch/models/task.py index 42bd167..b73dfb7 100644 --- a/server/npg/porch/models/task.py +++ b/server/npg/porch/models/task.py @@ -1,4 +1,4 @@ -# Copyright (C) 2021, 2022 Genome Research Ltd. +# Copyright (C) 2021, 2022, 2023 Genome Research Ltd. # # Author: Kieron Taylor kt19@sanger.ac.uk # Author: Marina Gourtovaia mg8@sanger.ac.uk @@ -46,7 +46,7 @@ class Task(BaseModel): title='Task Input', description='A structured parameter set that uniquely identifies a piece of work, and enables an iteration of a pipeline' # noqa: E501 ) - status: Optional[TaskStateEnum] + status: Optional[TaskStateEnum] = None def generate_task_id(self): return hashlib.sha256(ujson.dumps(self.task_input, sort_keys=True).encode()).hexdigest() diff --git a/tests/model_permission_test.py b/tests/model_permission_test.py index aa85e4f..23d3716 100644 --- a/tests/model_permission_test.py +++ b/tests/model_permission_test.py @@ -2,7 +2,7 @@ from npg.porch.models.pipeline import Pipeline from npg.porch.models.permission import Permission, PermissionValidationException -from pydantic.error_wrappers import ValidationError +from pydantic import ValidationError def test_model_create(): @@ -37,12 +37,12 @@ def test_xvalidation_role_pipeline(): def test_error_with_insufficient_args(): - with pytest.raises(ValidationError, match=r'requestor_id\s+field required'): + with pytest.raises(ValidationError, match=r'requestor_id\s+Field required'): Permission( role = 'regular_user', pipeline = Pipeline(name='number one') ) - with pytest.raises(ValidationError, match=r'role\s+field required'): + with pytest.raises(ValidationError, match=r'role\s+Field required'): Permission( requestor_id = 1, pipeline = Pipeline(name='number one') diff --git a/tests/pipeline_route_test.py b/tests/pipeline_route_test.py index 7d3502e..0413ece 100644 --- a/tests/pipeline_route_test.py +++ b/tests/pipeline_route_test.py @@ -77,7 +77,7 @@ def test_pipeline_filtered_get(async_minimum, fastapi_testclient): assert response.status_code == status.HTTP_200_OK pipes = response.json() assert len(pipes) == 1, 'Only one pipeline matches the uri' - assert pipes[0] == second_pipeline + assert pipes[0] == second_pipeline.dict() def test_get_known_pipeline(async_minimum, fastapi_testclient): @@ -136,7 +136,7 @@ def test_create_pipeline(async_minimum, fastapi_testclient): '/pipelines', headers=headers ) assert response.status_code == status.HTTP_200_OK - assert response.json()[1:] == [desired_pipeline, second_desired_pipeline] + assert response.json()[1:] == [desired_pipeline.dict(), second_desired_pipeline.dict()] # Create a very poorly provenanced pipeline third_desired_pipeline = Pipeline( From 9c3e56d66f8aea7a96ad1f87f9050f7e20090f62 Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Thu, 24 Aug 2023 15:55:03 +0000 Subject: [PATCH 34/37] Satisfy pydantic deprecation warnings --- server/npg/porch/models/task.py | 6 +++--- tests/pipeline_route_test.py | 20 ++++++++++---------- tests/task_route_test.py | 18 +++++++++--------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/server/npg/porch/models/task.py b/server/npg/porch/models/task.py index b73dfb7..7233812 100644 --- a/server/npg/porch/models/task.py +++ b/server/npg/porch/models/task.py @@ -65,13 +65,13 @@ def __eq__(self, other): ''' if not isinstance(other, Task): if isinstance(other, dict): - other = Task.parse_obj(other) + other = Task.model_validate(other) else: return False truths = [] - for k, v in self.dict().items(): - other_d = other.dict() + for k, v in self.model_dump().items(): + other_d = other.model_dump() if k == 'pipeline': truths.append(v['name'] == other_d[k]['name']) elif k == 'task_input_id': diff --git a/tests/pipeline_route_test.py b/tests/pipeline_route_test.py index 0413ece..bf0f5d8 100644 --- a/tests/pipeline_route_test.py +++ b/tests/pipeline_route_test.py @@ -16,18 +16,18 @@ def http_create_pipeline(fastapi_testclient, pipeline): response = fastapi_testclient.post( - '/pipelines', json=pipeline.dict(), follow_redirects=True + '/pipelines', json=pipeline.model_dump(), follow_redirects=True ) assert response.status_code == status.HTTP_403_FORBIDDEN response = fastapi_testclient.post( - '/pipelines', json=pipeline.dict(), follow_redirects=True, + '/pipelines', json=pipeline.model_dump(), follow_redirects=True, headers=headers ) assert response.status_code == status.HTTP_403_FORBIDDEN response = fastapi_testclient.post( - '/pipelines', json=pipeline.dict(), follow_redirects=True, + '/pipelines', json=pipeline.model_dump(), follow_redirects=True, headers=headers4power_user ) assert response.status_code == status.HTTP_201_CREATED @@ -41,7 +41,7 @@ def test_pipeline_get(async_minimum, fastapi_testclient): assert response.status_code == status.HTTP_403_FORBIDDEN response = fastapi_testclient.get('/pipelines', headers=headers) assert response.status_code == status.HTTP_200_OK - pipeline = Pipeline.parse_obj(response.json()[0]) + pipeline = Pipeline.model_validate(response.json()[0]) assert pipeline, 'Response fits into the over-the-wire model' assert pipeline.name == 'ptest one' assert pipeline.version == '0.3.14' @@ -77,7 +77,7 @@ def test_pipeline_filtered_get(async_minimum, fastapi_testclient): assert response.status_code == status.HTTP_200_OK pipes = response.json() assert len(pipes) == 1, 'Only one pipeline matches the uri' - assert pipes[0] == second_pipeline.dict() + assert pipes[0] == second_pipeline.model_dump() def test_get_known_pipeline(async_minimum, fastapi_testclient): @@ -87,7 +87,7 @@ def test_get_known_pipeline(async_minimum, fastapi_testclient): ) assert response.status_code == status.HTTP_200_OK - pipeline = Pipeline.parse_obj(response.json()) + pipeline = Pipeline.model_validate(response.json()) assert pipeline, 'Response fits into the over-the-wire model' assert pipeline.name == 'ptest one' assert pipeline.version == '0.3.14' @@ -109,13 +109,13 @@ def test_create_pipeline(async_minimum, fastapi_testclient): ) response = http_create_pipeline(fastapi_testclient, desired_pipeline) - pipeline = Pipeline.parse_obj(response) + pipeline = Pipeline.model_validate(response) assert pipeline == desired_pipeline, 'Got back what we put in' # Create the same pipeline response = fastapi_testclient.post( '/pipelines', - json=desired_pipeline.dict(), + json=desired_pipeline.model_dump(), follow_redirects=True, headers=headers4power_user ) @@ -136,7 +136,7 @@ def test_create_pipeline(async_minimum, fastapi_testclient): '/pipelines', headers=headers ) assert response.status_code == status.HTTP_200_OK - assert response.json()[1:] == [desired_pipeline.dict(), second_desired_pipeline.dict()] + assert response.json()[1:] == [desired_pipeline.model_dump(), second_desired_pipeline.model_dump()] # Create a very poorly provenanced pipeline third_desired_pipeline = Pipeline( @@ -147,7 +147,7 @@ def test_create_pipeline(async_minimum, fastapi_testclient): response = fastapi_testclient.post( '/pipelines', - json=third_desired_pipeline.dict(), + json=third_desired_pipeline.model_dump(), follow_redirects=True, headers=headers4power_user ) diff --git a/tests/task_route_test.py b/tests/task_route_test.py index b33cf97..c24bd6a 100644 --- a/tests/task_route_test.py +++ b/tests/task_route_test.py @@ -1,6 +1,6 @@ from starlette import status -from npg.porch.models import Task, TaskStateEnum +from npg.porch.models import Task, TaskStateEnum, Pipeline # Not testing get-all-tasks as this method will ultimately go @@ -27,7 +27,7 @@ def test_task_creation(async_minimum, fastapi_testclient): response = fastapi_testclient.post( 'tasks', - json=task_one.dict(), + json=task_one.model_dump(), follow_redirects=True, headers=headers4ptest_one ) @@ -37,7 +37,7 @@ def test_task_creation(async_minimum, fastapi_testclient): # Try again and expect to fail response = fastapi_testclient.post( 'tasks', - json=task_one.dict(), + json=task_one.model_dump(), follow_redirects=True, headers=headers4ptest_one ) @@ -55,7 +55,7 @@ def test_task_creation(async_minimum, fastapi_testclient): # to have a valid token for a pipeline that does not exist. response = fastapi_testclient.post( 'tasks', - json=task_two.dict(), + json=task_two.model_dump(), follow_redirects=True, headers=headers4ptest_one ) @@ -76,7 +76,7 @@ def test_task_update(async_minimum, fastapi_testclient): ) assert response.status_code == status.HTTP_200_OK - modified_task = Task.parse_obj(response.json()) + modified_task = Task.model_validate(response.json()) assert modified_task == task # Now invalidate the task by changing the signature @@ -85,7 +85,7 @@ def test_task_update(async_minimum, fastapi_testclient): } response = fastapi_testclient.put( '/tasks', - json=modified_task.dict(), + json=modified_task.model_dump(), follow_redirects=True, headers=headers4ptest_one ) @@ -95,12 +95,12 @@ def test_task_update(async_minimum, fastapi_testclient): # And change the reference pipeline to something wrong. # This token is valid, but for a different pipeline. It is impossible # to have a valid token for a pipeline that does not exist. - modified_task.pipeline = { + modified_task.pipeline = Pipeline.model_validate({ 'name': 'ptest one thousand' - } + }) response = fastapi_testclient.put( '/tasks', - json=modified_task.dict(), + json=modified_task.model_dump(), follow_redirects=True, headers=headers4ptest_one ) From 7432a8ef44bddedfb7a873788040a48a5a9974b7 Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Wed, 11 Oct 2023 15:36:45 +0000 Subject: [PATCH 35/37] Use best/better practices for type hints and pydantic constraints. --- server/npg/porch/endpoints/pipelines.py | 9 ++++---- server/npg/porch/endpoints/tasks.py | 29 ++++++++++++------------- server/npg/porch/models/pipeline.py | 5 ++--- server/npg/porch/models/task.py | 7 +++--- server/npg/porchdb/data_access.py | 27 +++++++++++------------ 5 files changed, 36 insertions(+), 41 deletions(-) diff --git a/server/npg/porch/endpoints/pipelines.py b/server/npg/porch/endpoints/pipelines.py index 2ee358e..d1afcd7 100644 --- a/server/npg/porch/endpoints/pipelines.py +++ b/server/npg/porch/endpoints/pipelines.py @@ -20,7 +20,6 @@ from fastapi import APIRouter, HTTPException, Depends import logging -from typing import List, Optional import re from sqlalchemy.exc import IntegrityError from sqlalchemy.orm.exc import NoResultFound @@ -44,7 +43,7 @@ @router.get( "/", - response_model=List[Pipeline], + response_model=list[Pipeline], summary="Get information about all pipelines.", description=''' Returns a list of pydantic Pipeline models. @@ -52,11 +51,11 @@ A valid token issued for any pipeline is required for authorisation.''' ) async def get_pipelines( - uri: Optional[str] = None, - version: Optional[str] = None, + uri: str | None = None, + version: str | None = None, db_accessor=Depends(get_DbAccessor), permissions=Depends(validate) -) -> List[Pipeline]: +) -> list[Pipeline]: return await db_accessor.get_all_pipelines(uri, version) diff --git a/server/npg/porch/endpoints/tasks.py b/server/npg/porch/endpoints/tasks.py index 02d7e8c..3a57d2a 100644 --- a/server/npg/porch/endpoints/tasks.py +++ b/server/npg/porch/endpoints/tasks.py @@ -19,18 +19,17 @@ # this program. If not, see . import logging -from fastapi import APIRouter, HTTPException, Depends -from pydantic import PositiveInt -from typing import List, Optional -from sqlalchemy.exc import IntegrityError -from sqlalchemy.orm.exc import NoResultFound -from starlette import status +from typing import Annotated +from fastapi import APIRouter, Depends, HTTPException, Query +from npg.porch.auth.token import validate +from npg.porch.models.permission import PermissionValidationException from npg.porch.models.pipeline import Pipeline from npg.porch.models.task import Task, TaskStateEnum -from npg.porch.models.permission import PermissionValidationException from npg.porchdb.connection import get_DbAccessor -from npg.porch.auth.token import validate +from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm.exc import NoResultFound +from starlette import status def _validate_request(permission, pipeline): @@ -61,18 +60,18 @@ def _validate_request(permission, pipeline): @router.get( "/", - response_model=List[Task], + response_model=list[Task], summary="Returns all tasks, and can be filtered to task status or pipeline name", description=''' Return all tasks. The list of tasks can be filtered by supplying a pipeline name and/or task status''' ) async def get_tasks( - pipeline_name: Optional[str] = None, - status: Optional[TaskStateEnum] = None, + pipeline_name: str | None = None, + status: TaskStateEnum | None = None, db_accessor=Depends(get_DbAccessor), permission=Depends(validate) -) -> List[Task]: +) -> list[Task]: print(pipeline_name, status) return await db_accessor.get_tasks(pipeline_name=pipeline_name, task_status=status) @@ -152,7 +151,7 @@ async def update_task( @router.post( "/claim", - response_model=List[Task], + response_model=list[Task], responses={ status.HTTP_200_OK: {"description": "Receive a list of tasks that have been claimed"} }, @@ -173,10 +172,10 @@ async def update_task( ) async def claim_task( pipeline: Pipeline, - num_tasks: PositiveInt | None = 1, + num_tasks: Annotated[int | None, Query(gt=0)] = 1, db_accessor=Depends(get_DbAccessor), permission=Depends(validate) -) -> List[Task]: +) -> list[Task]: _validate_request(permission, pipeline) tasks = await db_accessor.claim_tasks( diff --git a/server/npg/porch/models/pipeline.py b/server/npg/porch/models/pipeline.py index 2ffa1f1..ce212b8 100644 --- a/server/npg/porch/models/pipeline.py +++ b/server/npg/porch/models/pipeline.py @@ -19,7 +19,6 @@ # this program. If not, see . from pydantic import BaseModel, Field -from typing import Optional class Pipeline(BaseModel): name: str = Field( @@ -27,12 +26,12 @@ class Pipeline(BaseModel): title='Pipeline Name', description='A user-controlled name for the pipeline' ) - uri: Optional[str] = Field( + uri: str | None = Field( default = None, title='URI', description='URI to bootstrap the pipeline code' ) - version: Optional[str] = Field( + version: str | None = Field( default = None, title='Version', description='Pipeline version to use with URI' diff --git a/server/npg/porch/models/task.py b/server/npg/porch/models/task.py index 7233812..788518d 100644 --- a/server/npg/porch/models/task.py +++ b/server/npg/porch/models/task.py @@ -22,7 +22,6 @@ import hashlib import ujson from pydantic import BaseModel, Field -from typing import Optional, Dict from npg.porch.models.pipeline import Pipeline @@ -36,17 +35,17 @@ class TaskStateEnum(str, Enum): class Task(BaseModel): pipeline: Pipeline - task_input_id: Optional[str] = Field( + task_input_id: str | None = Field( None, title='Task Input ID', description='A stringified unique identifier for a piece of work. Set by the npg_porch server, not the client' # noqa: E501 ) - task_input: Dict = Field( + task_input: dict = Field( None, title='Task Input', description='A structured parameter set that uniquely identifies a piece of work, and enables an iteration of a pipeline' # noqa: E501 ) - status: Optional[TaskStateEnum] = None + status: TaskStateEnum | None = None def generate_task_id(self): return hashlib.sha256(ujson.dumps(self.task_input, sort_keys=True).encode()).hexdigest() diff --git a/server/npg/porchdb/data_access.py b/server/npg/porchdb/data_access.py index d36e2d9..effd0e4 100644 --- a/server/npg/porchdb/data_access.py +++ b/server/npg/porchdb/data_access.py @@ -23,7 +23,6 @@ from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import contains_eager, joinedload from sqlalchemy.orm.exc import NoResultFound -from typing import Optional, List from npg.porchdb.models import Pipeline as DbPipeline, Task as DbTask, Event from npg.porch.models import Task, Pipeline, TaskStateEnum @@ -55,10 +54,10 @@ async def _get_pipeline_db_object(self, name: str) -> Pipeline: async def _get_pipeline_db_objects( self, - name: Optional[str] = None, - version: Optional[str] = None, - uri: Optional[str] = None - ) -> List[Pipeline]: + name: str | None = None, + version: str | None = None, + uri: str | None = None + ) -> list[Pipeline]: query = select(DbPipeline) if name: query = query.filter_by(name=name) @@ -72,9 +71,9 @@ async def _get_pipeline_db_objects( async def get_all_pipelines( self, - uri: Optional[str] = None, - version: Optional[str] = None - ) -> List[Pipeline]: + uri: str | None = None, + version: str | None = None + ) -> list[Pipeline]: pipelines = [] pipelines = await self._get_pipeline_db_objects(uri=uri, version=version) return [pipe.convert_to_model() for pipe in pipelines] @@ -117,8 +116,8 @@ async def create_task(self, token_id: int, task: Task) -> Task: return t.convert_to_model() async def claim_tasks( - self, token_id: int, pipeline: Pipeline, claim_limit: Optional[int] = 1 - ) -> List[Task]: + self, token_id: int, pipeline: Pipeline, claim_limit: int | None = 1 + ) -> list[Task]: session = self.session try: @@ -196,9 +195,9 @@ async def update_task(self, token_id: int, task: Task) -> Task: async def get_tasks( self, - pipeline_name: Optional[str] = None, - task_status: Optional[TaskStateEnum] = None - ) -> List[Task]: + pipeline_name: str | None = None, + task_status: TaskStateEnum | None = None + ) -> list[Task]: ''' Gets all the tasks. @@ -229,7 +228,7 @@ def convert_task_to_db(task: Task, pipeline: DbPipeline) -> DbTask: state=task.status ) - async def get_events_for_task(self, task: Task) -> List[Event]: + async def get_events_for_task(self, task: Task) -> list[Event]: events = await self.session.execute( select(Event) .join(Event.task) From 3a0b662cb8ed3fb099af1d6074a3ed8f1844c493 Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Thu, 12 Oct 2023 14:20:03 +0000 Subject: [PATCH 36/37] Try to prevent dormant DB connection from causing 500 errors for the client. --- server/npg/porchdb/connection.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/npg/porchdb/connection.py b/server/npg/porchdb/connection.py index 93fc692..1374db3 100644 --- a/server/npg/porchdb/connection.py +++ b/server/npg/porchdb/connection.py @@ -53,7 +53,8 @@ } engine = create_async_engine( config['DB_URL'], - connect_args=connect_args + connect_args=connect_args, + pool_pre_ping=True ) Base.metadata.schema = config['DB_SCHEMA'] session_factory = sessionmaker( From 80ca8fc6e1bc2bd4cc1e06b32717a31ad8b40aac Mon Sep 17 00:00:00 2001 From: Kieron Taylor Date: Mon, 16 Oct 2023 14:16:30 +0000 Subject: [PATCH 37/37] Version update --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index ee151de..9eb1408 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = npg_porch -version = 0.1.0 +version = 1.0.0 [options] package_dir =