From 1c3f84883fd1a40acb23cf569ad22fda70f91320 Mon Sep 17 00:00:00 2001 From: Anna Pokorska Date: Tue, 3 Dec 2024 14:32:24 +0000 Subject: [PATCH 1/8] Config endpoint returns only orgs that correspond to allowed corpora --- app/api/api_v1/routers/lookups/config.py | 14 ++- app/repository/lookups.py | 6 +- app/repository/organisation.py | 9 +- tests/conftest.py | 26 ++++++ .../non_search/routers/lookups/test_config.py | 92 +++++++++++++++++-- tests/unit/app/core/test_organisation.py | 4 +- 6 files changed, 132 insertions(+), 19 deletions(-) diff --git a/app/api/api_v1/routers/lookups/config.py b/app/api/api_v1/routers/lookups/config.py index bf044f2e..71981ca8 100644 --- a/app/api/api_v1/routers/lookups/config.py +++ b/app/api/api_v1/routers/lookups/config.py @@ -1,12 +1,20 @@ -from fastapi import Depends, Request +from typing import Annotated + +from fastapi import Depends, Header, Request from app.api.api_v1.routers.lookups.router import lookups_router from app.clients.db.session import get_db from app.models.metadata import ApplicationConfig from app.repository.lookups import get_config +from app.service.custom_app import AppTokenFactory @lookups_router.get("/config", response_model=ApplicationConfig) -def lookup_config(request: Request, db=Depends(get_db)): +def lookup_config( + request: Request, app_token: Annotated[str, Header()], db=Depends(get_db) +): """Get the config for the metadata.""" - return get_config(db) + token = AppTokenFactory() + token.decode_and_validate(db, request, app_token) + + return get_config(db, token.allowed_corpora_ids) diff --git a/app/repository/lookups.py b/app/repository/lookups.py index 38a84c0a..38d0809f 100644 --- a/app/repository/lookups.py +++ b/app/repository/lookups.py @@ -8,20 +8,20 @@ from sqlalchemy.orm import Session from app.models.metadata import ApplicationConfig -from app.repository.organisation import get_all_organisations, get_organisation_config +from app.repository.organisation import get_organisation_config, get_organisations from app.service.pipeline import IMPORT_ID_MATCHER from app.service.util import tree_table_to_json _LOGGER = logging.getLogger(__name__) -def get_config(db: Session) -> ApplicationConfig: +def get_config(db: Session, allowed_corpora: list[str]) -> ApplicationConfig: # First get the CCLW stats return ApplicationConfig( geographies=tree_table_to_json(table=Geography, db=db), organisations={ cast(str, org.name): get_organisation_config(db, org) - for org in get_all_organisations(db) + for org in get_organisations(db, allowed_corpora) }, languages={lang.language_code: lang.name for lang in db.query(Language).all()}, document_variants=[ diff --git a/app/repository/organisation.py b/app/repository/organisation.py index 419d1a59..1a710821 100644 --- a/app/repository/organisation.py +++ b/app/repository/organisation.py @@ -87,5 +87,10 @@ def get_organisation_config(db: Session, org: Organisation) -> OrganisationConfi ) -def get_all_organisations(db: Session) -> list[Organisation]: - return db.query(Organisation).all() +def get_organisations(db: Session, allowed_corpora: list[str]) -> list[Organisation]: + return ( + db.query(Organisation) + .join(Corpus, Corpus.organisation_id == Organisation.id) + .filter(Corpus.import_id.in_(allowed_corpora)) + .all() + ) diff --git a/tests/conftest.py b/tests/conftest.py index a60cfa3f..cc51f1fd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -151,6 +151,32 @@ def mock_return(_, __, ___): return af.create_configuration_token(input_str) +@pytest.fixture +def app_token_factory(monkeypatch): + """Generate a valid config token using TOKEN_SECRET_KEY and given corpora ids. + + Need to generate the config token using the token secret key from + your local env file. For tests in CI, this will be the secret key in + the .env.example file, but for local development this secret key + might be different (e.g., the one for staging). This fixture works + around this. + """ + + def mock_return(_, __, ___): + return True + + def _app_token(allowed_corpora_ids): + subject = "CCLW" + audience = "localhost" + input_str = f"{allowed_corpora_ids};{subject};{audience}" + + af = AppTokenFactory() + monkeypatch.setattr(custom_app.AppTokenFactory, "validate", mock_return) + return af.create_configuration_token(input_str) + + return _app_token + + @pytest.fixture def create_test_db(): """Create a test database and use it for the whole test session.""" diff --git a/tests/non_search/routers/lookups/test_config.py b/tests/non_search/routers/lookups/test_config.py index 01e8c8c2..774157d1 100644 --- a/tests/non_search/routers/lookups/test_config.py +++ b/tests/non_search/routers/lookups/test_config.py @@ -9,7 +9,7 @@ FamilyCorpus, FamilyGeography, ) -from db_client.models.organisation import Corpus, Organisation +from db_client.models.organisation import Corpus, CorpusType, Organisation from app.clients.db.session import SessionLocal from app.service.util import tree_table_to_json @@ -71,15 +71,13 @@ def _add_family(test_db, import_id: str, cat: FamilyCategory, corpus_import_id): ) -def test_config_endpoint_content(data_client, data_db): +def test_config_endpoint_content(data_client, data_db, valid_token): """Tests whether we get the expected content when the /config endpoint is called.""" # TODO: this test is fragile, we should look into validation according to the # supporting data, rather than counts & fixed lists url_under_test = "/api/v1/config" - response = data_client.get( - url_under_test, - ) + response = data_client.get(url_under_test, headers={"app-token": valid_token}) response_json = response.json() @@ -165,7 +163,7 @@ def test_config_endpoint_content(data_client, data_db): ) -def test_config_endpoint_cclw_stats(data_client, data_db): +def test_config_endpoint_cclw_stats(data_client, data_db, valid_token): url_under_test = "/api/v1/config" cclw = ( @@ -191,9 +189,7 @@ def test_config_endpoint_cclw_stats(data_client, data_db): _add_family(data_db, "T.0.0.6", FamilyCategory.UNFCCC, unfccc.import_id) data_db.flush() - response = data_client.get( - url_under_test, - ) + response = data_client.get(url_under_test, headers={"app-token": valid_token}) response_json = response.json() @@ -211,6 +207,84 @@ def test_config_endpoint_cclw_stats(data_client, data_db): assert org_config["total"] == laws + policies + unfccc +@pytest.mark.parametrize( + "allowed_corpora_ids, expected_organisation, other_organisation", + [ + ("UNFCCC.corpus.i00000001.n0000", "UNFCCC", "CCLW"), + ("CCLW.corpus.i00000001.n0000", "CCLW", "UNFCCC"), + ], +) +def test_config_endpoint_returns_stats_for_allowed_corpora_only( + allowed_corpora_ids, + expected_organisation, + other_organisation, + app_token_factory, + data_client, + data_db, +): + app_token = app_token_factory(allowed_corpora_ids) + url_under_test = "/api/v1/config" + + other_corpus = ( + data_db.query(Corpus) + .join(Organisation, Organisation.id == Corpus.organisation_id) + .filter(Organisation.name == other_organisation) + .one() + ) + expected_corpus = ( + data_db.query(Corpus) + .join(Organisation, Organisation.id == Corpus.organisation_id) + .filter(Organisation.name == expected_organisation) + .one() + ) + expected_corpus_type = ( + data_db.query(CorpusType) + .join(Corpus, Corpus.corpus_type_name == CorpusType.name) + .filter(CorpusType.name == expected_corpus.corpus_type_name) + .one() + ) + + _add_family(data_db, "T.0.0.1", FamilyCategory.EXECUTIVE, other_corpus.import_id) + _add_family( + data_db, "T.0.0.2", FamilyCategory.LEGISLATIVE, expected_corpus.import_id + ) + data_db.flush() + + response = data_client.get(url_under_test, headers={"app-token": app_token}) + + response_json = response.json() + + org_config = response_json["organisations"] + expected_org_config = { + expected_organisation: { + "corpora": [ + { + "corpus_import_id": expected_corpus.import_id, + "title": expected_corpus.title, + "description": expected_corpus.description, + "corpus_type": expected_corpus.corpus_type_name, + "corpus_type_description": expected_corpus_type.description, + "taxonomy": expected_corpus_type.valid_metadata, + "text": expected_corpus.corpus_text, + "image_url": ( + f"https://cdn.climatepolicyradar.org/{expected_corpus.corpus_image_url}" + if expected_corpus.corpus_image_url + else "" + ), + } + ], + "total": 1, + "count_by_category": { + "Executive": 0, + "Legislative": 1, + "MCF": 0, + "UNFCCC": 0, + }, + } + } + assert org_config == expected_org_config + + class _MockColumn: def __init__(self, name): self.name = name diff --git a/tests/unit/app/core/test_organisation.py b/tests/unit/app/core/test_organisation.py index 2d9e765e..fd71ba7a 100644 --- a/tests/unit/app/core/test_organisation.py +++ b/tests/unit/app/core/test_organisation.py @@ -3,7 +3,7 @@ import pytest from sqlalchemy.orm import Session -from app.repository.organisation import get_all_organisations, get_corpora_for_org +from app.repository.organisation import get_corpora_for_org, get_organisations from tests.non_search.setup_helpers import setup_new_corpus, setup_with_docs CCLW_EXPECTED_NUM_CORPORA = 1 @@ -34,7 +34,7 @@ def test_expected_organisations_present(data_db: Session): - orgs = get_all_organisations(data_db) + orgs = get_organisations(data_db) assert len(orgs) == EXPECTED_NUM_ORGS org_names = set([cast(str, org.name) for org in orgs]) From 871d39c76a1c4bc31025d097a3915ab6ad091507 Mon Sep 17 00:00:00 2001 From: Anna Pokorska Date: Tue, 3 Dec 2024 14:33:41 +0000 Subject: [PATCH 2/8] Bump patch version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b5593f7f..f1fcda7a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "navigator_backend" -version = "1.19.16" +version = "1.19.17" description = "" authors = ["CPR-dev-team "] packages = [{ include = "app" }, { include = "tests" }] From e1d038ce2292a3b567cfacd792bcdee7efc66bb5 Mon Sep 17 00:00:00 2001 From: Anna Pokorska Date: Tue, 3 Dec 2024 14:41:34 +0000 Subject: [PATCH 3/8] Bump patch version again --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f1fcda7a..523c4ba8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "navigator_backend" -version = "1.19.17" +version = "1.19.18" description = "" authors = ["CPR-dev-team "] packages = [{ include = "app" }, { include = "tests" }] From 0950014402b8a3d7e9c535963e616eaa0ad87499 Mon Sep 17 00:00:00 2001 From: Anna Pokorska Date: Tue, 3 Dec 2024 14:46:32 +0000 Subject: [PATCH 4/8] Add missing param --- tests/unit/app/core/test_organisation.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/unit/app/core/test_organisation.py b/tests/unit/app/core/test_organisation.py index fd71ba7a..c025c4eb 100644 --- a/tests/unit/app/core/test_organisation.py +++ b/tests/unit/app/core/test_organisation.py @@ -34,7 +34,9 @@ def test_expected_organisations_present(data_db: Session): - orgs = get_organisations(data_db) + orgs = get_organisations( + data_db, ["UNFCCC.corpus.i00000001.n0000,CCLW.corpus.i00000001.n0000"] + ) assert len(orgs) == EXPECTED_NUM_ORGS org_names = set([cast(str, org.name) for org in orgs]) From 9f81d6897953ec2b599968563becad0a5319556b Mon Sep 17 00:00:00 2001 From: Anna Pokorska Date: Tue, 3 Dec 2024 14:54:45 +0000 Subject: [PATCH 5/8] Fix unit test --- tests/conftest.py | 2 +- tests/non_search/routers/lookups/test_config.py | 4 ++-- tests/unit/app/core/test_organisation.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index cc51f1fd..9780caf8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -165,7 +165,7 @@ def app_token_factory(monkeypatch): def mock_return(_, __, ___): return True - def _app_token(allowed_corpora_ids): + def _app_token(allowed_corpora_ids: list[str]): subject = "CCLW" audience = "localhost" input_str = f"{allowed_corpora_ids};{subject};{audience}" diff --git a/tests/non_search/routers/lookups/test_config.py b/tests/non_search/routers/lookups/test_config.py index 774157d1..40578901 100644 --- a/tests/non_search/routers/lookups/test_config.py +++ b/tests/non_search/routers/lookups/test_config.py @@ -210,8 +210,8 @@ def test_config_endpoint_cclw_stats(data_client, data_db, valid_token): @pytest.mark.parametrize( "allowed_corpora_ids, expected_organisation, other_organisation", [ - ("UNFCCC.corpus.i00000001.n0000", "UNFCCC", "CCLW"), - ("CCLW.corpus.i00000001.n0000", "CCLW", "UNFCCC"), + (["UNFCCC.corpus.i00000001.n0000"], "UNFCCC", "CCLW"), + (["CCLW.corpus.i00000001.n0000"], "CCLW", "UNFCCC"), ], ) def test_config_endpoint_returns_stats_for_allowed_corpora_only( diff --git a/tests/unit/app/core/test_organisation.py b/tests/unit/app/core/test_organisation.py index c025c4eb..2c6c7718 100644 --- a/tests/unit/app/core/test_organisation.py +++ b/tests/unit/app/core/test_organisation.py @@ -35,7 +35,7 @@ def test_expected_organisations_present(data_db: Session): orgs = get_organisations( - data_db, ["UNFCCC.corpus.i00000001.n0000,CCLW.corpus.i00000001.n0000"] + data_db, ["UNFCCC.corpus.i00000001.n0000", "CCLW.corpus.i00000001.n0000"] ) assert len(orgs) == EXPECTED_NUM_ORGS From 6433fa86a2653d685f6c4c8f1978ce4d44b77736 Mon Sep 17 00:00:00 2001 From: Anna Pokorska Date: Tue, 3 Dec 2024 15:42:21 +0000 Subject: [PATCH 6/8] Undo --- tests/conftest.py | 2 +- tests/non_search/routers/lookups/test_config.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 9780caf8..cc51f1fd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -165,7 +165,7 @@ def app_token_factory(monkeypatch): def mock_return(_, __, ___): return True - def _app_token(allowed_corpora_ids: list[str]): + def _app_token(allowed_corpora_ids): subject = "CCLW" audience = "localhost" input_str = f"{allowed_corpora_ids};{subject};{audience}" diff --git a/tests/non_search/routers/lookups/test_config.py b/tests/non_search/routers/lookups/test_config.py index 40578901..774157d1 100644 --- a/tests/non_search/routers/lookups/test_config.py +++ b/tests/non_search/routers/lookups/test_config.py @@ -210,8 +210,8 @@ def test_config_endpoint_cclw_stats(data_client, data_db, valid_token): @pytest.mark.parametrize( "allowed_corpora_ids, expected_organisation, other_organisation", [ - (["UNFCCC.corpus.i00000001.n0000"], "UNFCCC", "CCLW"), - (["CCLW.corpus.i00000001.n0000"], "CCLW", "UNFCCC"), + ("UNFCCC.corpus.i00000001.n0000", "UNFCCC", "CCLW"), + ("CCLW.corpus.i00000001.n0000", "CCLW", "UNFCCC"), ], ) def test_config_endpoint_returns_stats_for_allowed_corpora_only( From aa535d25a9d8416b7565654af7a05936ffef3de9 Mon Sep 17 00:00:00 2001 From: Anna Pokorska Date: Tue, 3 Dec 2024 17:04:57 +0000 Subject: [PATCH 7/8] Return all orgs if allowed corpora list is empty --- app/repository/organisation.py | 10 +-- app/service/custom_app.py | 1 + .../non_search/routers/lookups/test_config.py | 64 +++++++++++++++++++ 3 files changed, 70 insertions(+), 5 deletions(-) diff --git a/app/repository/organisation.py b/app/repository/organisation.py index 1a710821..f2947066 100644 --- a/app/repository/organisation.py +++ b/app/repository/organisation.py @@ -88,9 +88,9 @@ def get_organisation_config(db: Session, org: Organisation) -> OrganisationConfi def get_organisations(db: Session, allowed_corpora: list[str]) -> list[Organisation]: - return ( - db.query(Organisation) - .join(Corpus, Corpus.organisation_id == Organisation.id) - .filter(Corpus.import_id.in_(allowed_corpora)) - .all() + query = db.query(Organisation).join( + Corpus, Corpus.organisation_id == Organisation.id ) + if allowed_corpora != []: + query = query.filter(Corpus.import_id.in_(allowed_corpora)) + return query.all() diff --git a/app/service/custom_app.py b/app/service/custom_app.py index 6e89dd63..82cb791c 100644 --- a/app/service/custom_app.py +++ b/app/service/custom_app.py @@ -260,4 +260,5 @@ def decode_and_validate( # First corpora validation is app token against DB. At least one of the app token # corpora IDs must be present in the DB to continue the search request. + any_exist = False if not self.allowed_corpora_ids else True self.validate(db, any_exist) diff --git a/tests/non_search/routers/lookups/test_config.py b/tests/non_search/routers/lookups/test_config.py index 774157d1..734bc835 100644 --- a/tests/non_search/routers/lookups/test_config.py +++ b/tests/non_search/routers/lookups/test_config.py @@ -1,8 +1,12 @@ +import os +from datetime import datetime from http.client import OK from typing import Any from unittest.mock import MagicMock +import jwt import pytest +from dateutil.relativedelta import relativedelta from db_client.models.dfce.family import ( Family, FamilyCategory, @@ -12,6 +16,7 @@ from db_client.models.organisation import Corpus, CorpusType, Organisation from app.clients.db.session import SessionLocal +from app.service import security from app.service.util import tree_table_to_json LEN_ORG_CONFIG = 3 @@ -285,6 +290,65 @@ def test_config_endpoint_returns_stats_for_allowed_corpora_only( assert org_config == expected_org_config +def test_config_endpoint_returns_stats_for_all_orgs_if_no_allowed_corpora_in_app_token( + data_client, + data_db, +): + issued_at = datetime.utcnow() + to_encode = { + "allowed_corpora_ids": [], + "exp": issued_at + relativedelta(years=10), + "iat": int(datetime.timestamp(issued_at.replace(microsecond=0))), + "iss": "Climate Policy Radar", + "sub": "CPR", + "aud": "localhost", + } + app_token = jwt.encode( + to_encode, os.environ["TOKEN_SECRET_KEY"], algorithm=security.ALGORITHM + ) + # app_token = app_token_factory(",") + url_under_test = "/api/v1/config" + + cclw_corpus = ( + data_db.query(Corpus) + .join(Organisation, Organisation.id == Corpus.organisation_id) + .filter(Organisation.name == "CCLW") + .one() + ) + + unfccc_corpus = ( + data_db.query(Corpus) + .join(Organisation, Organisation.id == Corpus.organisation_id) + .filter(Organisation.name == "UNFCCC") + .one() + ) + + _add_family(data_db, "T.0.0.1", FamilyCategory.EXECUTIVE, cclw_corpus.import_id) + _add_family(data_db, "T.0.0.2", FamilyCategory.LEGISLATIVE, unfccc_corpus.import_id) + data_db.flush() + + response = data_client.get(url_under_test, headers={"app-token": app_token}) + + response_json = response.json() + org_config = response_json["organisations"] + + assert list(org_config.keys()) == ["CCLW", "UNFCCC"] + assert org_config["CCLW"]["total"] == 1 + assert org_config["UNFCCC"]["total"] == 1 + assert org_config["UNFCCC"]["count_by_category"] == { + "Executive": 0, + "Legislative": 1, + "MCF": 0, + "UNFCCC": 0, + } + assert org_config["CCLW"]["count_by_category"] == { + "Executive": 1, + "Legislative": 0, + "MCF": 0, + "UNFCCC": 0, + } + + class _MockColumn: def __init__(self, name): self.name = name From 2afbfb98c644c8617a02789545ab890503d151f7 Mon Sep 17 00:00:00 2001 From: Anna Pokorska Date: Tue, 3 Dec 2024 17:15:20 +0000 Subject: [PATCH 8/8] Remove commented out code --- tests/non_search/routers/lookups/test_config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/non_search/routers/lookups/test_config.py b/tests/non_search/routers/lookups/test_config.py index 734bc835..65fc56ce 100644 --- a/tests/non_search/routers/lookups/test_config.py +++ b/tests/non_search/routers/lookups/test_config.py @@ -306,7 +306,6 @@ def test_config_endpoint_returns_stats_for_all_orgs_if_no_allowed_corpora_in_app app_token = jwt.encode( to_encode, os.environ["TOKEN_SECRET_KEY"], algorithm=security.ALGORITHM ) - # app_token = app_token_factory(",") url_under_test = "/api/v1/config" cclw_corpus = (