From 01c686c1120040c3b62c4ff5a4f2777bd2ef16e6 Mon Sep 17 00:00:00 2001 From: Katy Baulch <46493669+katybaulch@users.noreply.github.com> Date: Tue, 17 Sep 2024 12:45:40 +0100 Subject: [PATCH] Move vespa ids search tests into a separate file (#333) * Move vespa IDs search tests into separate file for readability * Bump to 1.14.19 * Remove valid_token references from working branch --- pyproject.toml | 2 +- tests/search/test_vespa_ids_search.py | 141 +++++++++++++++++++++++++ tests/search/test_vespasearch.py | 145 +------------------------- 3 files changed, 143 insertions(+), 145 deletions(-) create mode 100644 tests/search/test_vespa_ids_search.py diff --git a/pyproject.toml b/pyproject.toml index 832ee81b..13a3f60e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "navigator_backend" -version = "1.14.18" +version = "1.14.19" description = "" authors = ["CPR-dev-team "] packages = [{ include = "app" }, { include = "tests" }] diff --git a/tests/search/test_vespa_ids_search.py b/tests/search/test_vespa_ids_search.py new file mode 100644 index 00000000..f0db2828 --- /dev/null +++ b/tests/search/test_vespa_ids_search.py @@ -0,0 +1,141 @@ +from typing import Mapping + +import pytest +from db_client.models.dfce import Slug +from db_client.models.dfce.family import FamilyDocument +from sqlalchemy.orm import Session + +from app.api.api_v1.routers import search +from tests.search.setup_search_tests import _populate_db_families + +SEARCH_ENDPOINT = "/api/v1/searches" + + +def _make_search_request(client, params: Mapping[str, str]): + response = client.post(SEARCH_ENDPOINT, json=params) + assert response.status_code == 200, response.text + return response.json() + + +def _doc_ids_from_response(test_db: Session, response: dict) -> list[str]: + """The response doesnt know about ids, so we look them up using the slug""" + document_ids = [] + for fam in response["families"]: + for doc in fam["family_documents"]: + family_document = ( + test_db.query(FamilyDocument) + .join(Slug, Slug.family_document_import_id == FamilyDocument.import_id) + .filter(Slug.name == doc["document_slug"]) + .one() + ) + document_ids.append(family_document.import_id) + + return document_ids + + +def _fam_ids_from_response(test_db, response) -> list[str]: + """The response doesnt know about ids, so we look them up using the slug""" + family_ids = [] + for fam in response["families"]: + family_document = ( + test_db.query(FamilyDocument) + .join(Slug, Slug.family_import_id == FamilyDocument.family_import_id) + .filter(Slug.name == fam["family_slug"]) + .one() + ) + family_ids.append(family_document.family_import_id) + return family_ids + + +@pytest.mark.parametrize( + "family_ids", + [ + ["CCLW.family.1385.0"], + ["CCLW.family.10246.0", "CCLW.family.8633.0"], + ["CCLW.family.10246.0", "CCLW.family.8633.0", "UNFCCC.family.1267.0"], + ], +) +@pytest.mark.search +def test_family_ids_search(test_vespa, data_db, monkeypatch, data_client, family_ids): + monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) + _populate_db_families(data_db) + + params = { + "query_string": "the", + "family_ids": family_ids, + } + + response = _make_search_request(data_client, params) + + got_family_ids = _fam_ids_from_response(data_db, response) + assert sorted(got_family_ids) == sorted(family_ids) + + +@pytest.mark.parametrize( + "document_ids", + [ + ["CCLW.executive.1385.5336"], + ["CCLW.executive.10246.4861", "UNFCCC.non-party.1267.0"], + [ + "CCLW.executive.8633.3052", + "UNFCCC.non-party.1267.0", + "CCLW.executive.10246.4861", + ], + ], +) +@pytest.mark.search +def test_document_ids_search( + test_vespa, data_db, monkeypatch, data_client, document_ids +): + monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) + _populate_db_families(data_db) + + params = { + "query_string": "the", + "document_ids": document_ids, + } + response = _make_search_request(data_client, params) + + got_document_ids = _doc_ids_from_response(data_db, response) + assert sorted(got_document_ids) == sorted(document_ids) + + +@pytest.mark.search +def test_document_ids_and_family_ids_search( + test_vespa, data_db, monkeypatch, data_client +): + monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) + _populate_db_families(data_db) + + # The doc doesnt belong to the family, so we should get no results + family_ids = ["UNFCCC.family.1267.0"] + document_ids = ["CCLW.executive.10246.4861"] + params = { + "query_string": "the", + "family_ids": family_ids, + "document_ids": document_ids, + } + + response = _make_search_request(data_client, params) + assert len(response["families"]) == 0 + + +@pytest.mark.search +def test_empty_ids_dont_limit_result(test_vespa, data_db, monkeypatch, data_client): + monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) + _populate_db_families(data_db) + + # We'd expect this to be interpreted as 'unlimited' + params = { + "query_string": "the", + "family_ids": [], + "document_ids": [], + } + + response = _make_search_request(data_client, params) + + got_document_ids = _doc_ids_from_response(data_db, response) + got_family_ids = _fam_ids_from_response(data_db, response) + + assert len(got_family_ids) > 1 + assert len(got_document_ids) > 1 diff --git a/tests/search/test_vespasearch.py b/tests/search/test_vespasearch.py index 8a9089e2..e9a2f565 100644 --- a/tests/search/test_vespasearch.py +++ b/tests/search/test_vespasearch.py @@ -2,10 +2,9 @@ from typing import Mapping import pytest -from db_client.models.dfce import Geography, Slug +from db_client.models.dfce import Geography from db_client.models.dfce.family import FamilyDocument from sqlalchemy import update -from sqlalchemy.orm import Session from app.api.api_v1.routers import search from app.core.lookups import get_country_slug_from_country_code @@ -27,36 +26,6 @@ def _make_search_request(client, params: Mapping[str, str]): return response.json() -def _doc_ids_from_response(test_db: Session, response: dict) -> list[str]: - """The response doesnt know about ids, so we look them up using the slug""" - document_ids = [] - for fam in response["families"]: - for doc in fam["family_documents"]: - family_document = ( - test_db.query(FamilyDocument) - .join(Slug, Slug.family_document_import_id == FamilyDocument.import_id) - .filter(Slug.name == doc["document_slug"]) - .one() - ) - document_ids.append(family_document.import_id) - - return document_ids - - -def _fam_ids_from_response(test_db, response) -> list[str]: - """The response doesnt know about ids, so we look them up using the slug""" - family_ids = [] - for fam in response["families"]: - family_document = ( - test_db.query(FamilyDocument) - .join(Slug, Slug.family_import_id == FamilyDocument.family_import_id) - .filter(Slug.name == fam["family_slug"]) - .one() - ) - family_ids.append(family_document.family_import_id) - return family_ids - - @pytest.mark.search def test_empty_search_term_performs_browse( test_vespa, data_client, data_db, mocker, monkeypatch @@ -654,115 +623,3 @@ def test_accents_ignored( request_time_ms = 1000 * (end - start) assert 0 < body["query_time_ms"] < body["total_time_ms"] < request_time_ms - - -@pytest.mark.parametrize( - "family_ids", - [ - ["CCLW.family.1385.0"], - ["CCLW.family.10246.0", "CCLW.family.8633.0"], - ["CCLW.family.10246.0", "CCLW.family.8633.0", "UNFCCC.family.1267.0"], - ], -) -@pytest.mark.search -def test_family_ids_search( - test_vespa, - data_db, - monkeypatch, - data_client, - family_ids, -): - monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) - _populate_db_families(data_db) - - params = { - "query_string": "the", - "family_ids": family_ids, - } - - response = _make_search_request(data_client, params) - - got_family_ids = _fam_ids_from_response(data_db, response) - assert sorted(got_family_ids) == sorted(family_ids) - - -@pytest.mark.parametrize( - "document_ids", - [ - ["CCLW.executive.1385.5336"], - ["CCLW.executive.10246.4861", "UNFCCC.non-party.1267.0"], - [ - "CCLW.executive.8633.3052", - "UNFCCC.non-party.1267.0", - "CCLW.executive.10246.4861", - ], - ], -) -@pytest.mark.search -def test_document_ids_search( - test_vespa, - data_db, - monkeypatch, - data_client, - document_ids, -): - monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) - _populate_db_families(data_db) - - params = { - "query_string": "the", - "document_ids": document_ids, - } - response = _make_search_request(data_client, params) - - got_document_ids = _doc_ids_from_response(data_db, response) - assert sorted(got_document_ids) == sorted(document_ids) - - -@pytest.mark.search -def test_document_ids_and_family_ids_search( - test_vespa, - data_db, - monkeypatch, - data_client, -): - monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) - _populate_db_families(data_db) - - # The doc doesnt belong to the family, so we should get no results - family_ids = ["UNFCCC.family.1267.0"] - document_ids = ["CCLW.executive.10246.4861"] - params = { - "query_string": "the", - "family_ids": family_ids, - "document_ids": document_ids, - } - - response = _make_search_request(data_client, params) - assert len(response["families"]) == 0 - - -@pytest.mark.search -def test_empty_ids_dont_limit_result( - test_vespa, - data_db, - monkeypatch, - data_client, -): - monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) - _populate_db_families(data_db) - - # We'd expect this to be interpreted as 'unlimited' - params = { - "query_string": "the", - "family_ids": [], - "document_ids": [], - } - - response = _make_search_request(data_client, params) - - got_document_ids = _doc_ids_from_response(data_db, response) - got_family_ids = _fam_ids_from_response(data_db, response) - - assert len(got_family_ids) > 1 - assert len(got_document_ids) > 1