From cceb8a546629d991cf99e2816d3018203992b2c8 Mon Sep 17 00:00:00 2001 From: Katy Baulch <46493669+katybaulch@users.noreply.github.com> Date: Tue, 17 Sep 2024 13:09:51 +0100 Subject: [PATCH 1/5] Move vespa search tests & the search_fixtures they use under dedicated sub-folder (#334) * Move vespa search tests under dedicated vespa folder * Move /search_fixtures under vespa search folder & rename to fixtures * Bump to 1.14.20 --- .trunk/trunk.yaml | 2 +- makefile-docker.defs | 10 +++++----- pyproject.toml | 2 +- .../{ => vespa}/data_download/test_this_search.py | 2 +- .../{ => vespa}/data_download/test_whole_database.py | 2 +- .../fixtures}/vespa_document_passage.json | 0 .../fixtures}/vespa_family_document.json | 0 .../fixtures}/vespa_search_weights.json | 0 .../vespa_test_schema/query-profiles/default.xml | 0 .../vespa_test_schema/schemas/document_passage.sd | 0 .../vespa_test_schema/schemas/family_document.sd | 0 .../vespa_test_schema/schemas/search_weights.sd | 0 .../fixtures}/vespa_test_schema/services.xml | 0 tests/search/{ => vespa}/setup_search_tests.py | 2 +- tests/search/{ => vespa}/test_vespa_ids_search.py | 2 +- tests/search/{ => vespa}/test_vespasearch.py | 2 +- 16 files changed, 12 insertions(+), 12 deletions(-) rename tests/search/{ => vespa}/data_download/test_this_search.py (97%) rename tests/search/{ => vespa}/data_download/test_whole_database.py (92%) rename tests/search/{search_fixtures => vespa/fixtures}/vespa_document_passage.json (100%) rename tests/search/{search_fixtures => vespa/fixtures}/vespa_family_document.json (100%) rename tests/search/{search_fixtures => vespa/fixtures}/vespa_search_weights.json (100%) rename tests/search/{search_fixtures => vespa/fixtures}/vespa_test_schema/query-profiles/default.xml (100%) rename tests/search/{search_fixtures => vespa/fixtures}/vespa_test_schema/schemas/document_passage.sd (100%) rename tests/search/{search_fixtures => vespa/fixtures}/vespa_test_schema/schemas/family_document.sd (100%) rename tests/search/{search_fixtures => vespa/fixtures}/vespa_test_schema/schemas/search_weights.sd (100%) rename tests/search/{search_fixtures => vespa/fixtures}/vespa_test_schema/services.xml (100%) rename tests/search/{ => vespa}/setup_search_tests.py (99%) rename tests/search/{ => vespa}/test_vespa_ids_search.py (98%) rename tests/search/{ => vespa}/test_vespasearch.py (99%) diff --git a/.trunk/trunk.yaml b/.trunk/trunk.yaml index 2065f308..13b8a442 100644 --- a/.trunk/trunk.yaml +++ b/.trunk/trunk.yaml @@ -39,7 +39,7 @@ lint: paths: # Ignore test data JSON files - tests/data/**/*.json - - tests/search/search_fixtures/**/*.json + - tests/search/vespa/fixtures/**/*.json - scripts/** - linters: [markdownlint] paths: diff --git a/makefile-docker.defs b/makefile-docker.defs index bb92d124..09ac26f0 100644 --- a/makefile-docker.defs +++ b/makefile-docker.defs @@ -80,17 +80,17 @@ vespa_healthy: .ONESHELL: vespa_deploy_schema: vespa config set target local - @vespa deploy tests/search/search_fixtures/vespa_test_schema --wait 300 + @vespa deploy tests/search/vespa/fixtures/vespa_test_schema --wait 300 .ONESHELL: vespa_load_data: vespa config set target local - vespa feed --progress=3 tests/search/search_fixtures/vespa_search_weights.json - vespa feed --progress=3 tests/search/search_fixtures/vespa_family_document.json - vespa feed --progress=3 tests/search/search_fixtures/vespa_document_passage.json + vespa feed --progress=3 tests/search/vespa/fixtures/vespa_search_weights.json + vespa feed --progress=3 tests/search/vespa/fixtures/vespa_family_document.json + vespa feed --progress=3 tests/search/vespa/fixtures/vespa_document_passage.json vespa_setup: vespa_confirm_cli_installed vespa_healthy vespa_deploy_schema vespa_load_data - # Deploys a vespa application to a local vespa container and loads search_fixtures + # Deploys a vespa application to a local vespa container and loads search fixtures .ONESHELL: test_search: diff --git a/pyproject.toml b/pyproject.toml index 13a3f60e..11a21afc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "navigator_backend" -version = "1.14.19" +version = "1.14.20" description = "" authors = ["CPR-dev-team "] packages = [{ include = "app" }, { include = "tests" }] diff --git a/tests/search/data_download/test_this_search.py b/tests/search/vespa/data_download/test_this_search.py similarity index 97% rename from tests/search/data_download/test_this_search.py rename to tests/search/vespa/data_download/test_this_search.py index f9bdd713..8dad470d 100644 --- a/tests/search/data_download/test_this_search.py +++ b/tests/search/vespa/data_download/test_this_search.py @@ -6,7 +6,7 @@ import pytest from app.api.api_v1.routers import search -from tests.search.setup_search_tests import _populate_db_families +from tests.search.vespa.setup_search_tests import _populate_db_families SEARCH_ENDPOINT = "/api/v1/searches" CSV_DOWNLOAD_ENDPOINT = "/api/v1/searches/download-csv" diff --git a/tests/search/data_download/test_whole_database.py b/tests/search/vespa/data_download/test_whole_database.py similarity index 92% rename from tests/search/data_download/test_whole_database.py rename to tests/search/vespa/data_download/test_whole_database.py index 7be699e8..74896a58 100644 --- a/tests/search/data_download/test_whole_database.py +++ b/tests/search/vespa/data_download/test_whole_database.py @@ -2,7 +2,7 @@ import pytest -from tests.search.setup_search_tests import _populate_db_families +from tests.search.vespa.setup_search_tests import _populate_db_families ALL_DATA_DOWNLOAD_ENDPOINT = "/api/v1/searches/download-all-data" diff --git a/tests/search/search_fixtures/vespa_document_passage.json b/tests/search/vespa/fixtures/vespa_document_passage.json similarity index 100% rename from tests/search/search_fixtures/vespa_document_passage.json rename to tests/search/vespa/fixtures/vespa_document_passage.json diff --git a/tests/search/search_fixtures/vespa_family_document.json b/tests/search/vespa/fixtures/vespa_family_document.json similarity index 100% rename from tests/search/search_fixtures/vespa_family_document.json rename to tests/search/vespa/fixtures/vespa_family_document.json diff --git a/tests/search/search_fixtures/vespa_search_weights.json b/tests/search/vespa/fixtures/vespa_search_weights.json similarity index 100% rename from tests/search/search_fixtures/vespa_search_weights.json rename to tests/search/vespa/fixtures/vespa_search_weights.json diff --git a/tests/search/search_fixtures/vespa_test_schema/query-profiles/default.xml b/tests/search/vespa/fixtures/vespa_test_schema/query-profiles/default.xml similarity index 100% rename from tests/search/search_fixtures/vespa_test_schema/query-profiles/default.xml rename to tests/search/vespa/fixtures/vespa_test_schema/query-profiles/default.xml diff --git a/tests/search/search_fixtures/vespa_test_schema/schemas/document_passage.sd b/tests/search/vespa/fixtures/vespa_test_schema/schemas/document_passage.sd similarity index 100% rename from tests/search/search_fixtures/vespa_test_schema/schemas/document_passage.sd rename to tests/search/vespa/fixtures/vespa_test_schema/schemas/document_passage.sd diff --git a/tests/search/search_fixtures/vespa_test_schema/schemas/family_document.sd b/tests/search/vespa/fixtures/vespa_test_schema/schemas/family_document.sd similarity index 100% rename from tests/search/search_fixtures/vespa_test_schema/schemas/family_document.sd rename to tests/search/vespa/fixtures/vespa_test_schema/schemas/family_document.sd diff --git a/tests/search/search_fixtures/vespa_test_schema/schemas/search_weights.sd b/tests/search/vespa/fixtures/vespa_test_schema/schemas/search_weights.sd similarity index 100% rename from tests/search/search_fixtures/vespa_test_schema/schemas/search_weights.sd rename to tests/search/vespa/fixtures/vespa_test_schema/schemas/search_weights.sd diff --git a/tests/search/search_fixtures/vespa_test_schema/services.xml b/tests/search/vespa/fixtures/vespa_test_schema/services.xml similarity index 100% rename from tests/search/search_fixtures/vespa_test_schema/services.xml rename to tests/search/vespa/fixtures/vespa_test_schema/services.xml diff --git a/tests/search/setup_search_tests.py b/tests/search/vespa/setup_search_tests.py similarity index 99% rename from tests/search/setup_search_tests.py rename to tests/search/vespa/setup_search_tests.py index f4a6732c..a58d6f75 100644 --- a/tests/search/setup_search_tests.py +++ b/tests/search/vespa/setup_search_tests.py @@ -27,7 +27,7 @@ from sqlalchemy.orm import Session VESPA_FIXTURE_COUNT = 5 -FIXTURE_DIR = Path(__file__).parent / "search_fixtures" +FIXTURE_DIR = Path(__file__).parent / "fixtures" VESPA_FAMILY_PATH = FIXTURE_DIR / "vespa_family_document.json" VESPA_DOCUMENT_PATH = FIXTURE_DIR / "vespa_document_passage.json" diff --git a/tests/search/test_vespa_ids_search.py b/tests/search/vespa/test_vespa_ids_search.py similarity index 98% rename from tests/search/test_vespa_ids_search.py rename to tests/search/vespa/test_vespa_ids_search.py index f0db2828..203490da 100644 --- a/tests/search/test_vespa_ids_search.py +++ b/tests/search/vespa/test_vespa_ids_search.py @@ -6,7 +6,7 @@ from sqlalchemy.orm import Session from app.api.api_v1.routers import search -from tests.search.setup_search_tests import _populate_db_families +from tests.search.vespa.setup_search_tests import _populate_db_families SEARCH_ENDPOINT = "/api/v1/searches" diff --git a/tests/search/test_vespasearch.py b/tests/search/vespa/test_vespasearch.py similarity index 99% rename from tests/search/test_vespasearch.py rename to tests/search/vespa/test_vespasearch.py index e9a2f565..21031707 100644 --- a/tests/search/test_vespasearch.py +++ b/tests/search/vespa/test_vespasearch.py @@ -8,7 +8,7 @@ from app.api.api_v1.routers import search from app.core.lookups import get_country_slug_from_country_code -from tests.search.setup_search_tests import ( +from tests.search.vespa.setup_search_tests import ( VESPA_FIXTURE_COUNT, _create_document, _create_family, From ce6f48163416d186cda8ffbce510f86d3a9e5744 Mon Sep 17 00:00:00 2001 From: Katy Baulch <46493669+katybaulch@users.noreply.github.com> Date: Tue, 17 Sep 2024 13:32:28 +0100 Subject: [PATCH 2/5] Move vespa search result order tests into a separate file (#335) * Move vespa search tests under dedicated vespa folder * Move /search_fixtures under vespa search folder & rename to fixtures * Bump to 1.14.20 * Move vespa search result order tests to separate file * Bump to 1.14.19 --- pyproject.toml | 2 +- .../vespa/test_vespa_search_result_order.py | 61 +++++++++++++++++++ tests/search/vespa/test_vespasearch.py | 44 ------------- 3 files changed, 62 insertions(+), 45 deletions(-) create mode 100644 tests/search/vespa/test_vespa_search_result_order.py diff --git a/pyproject.toml b/pyproject.toml index 11a21afc..13a3f60e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "navigator_backend" -version = "1.14.20" +version = "1.14.19" description = "" authors = ["CPR-dev-team "] packages = [{ include = "app" }, { include = "tests" }] diff --git a/tests/search/vespa/test_vespa_search_result_order.py b/tests/search/vespa/test_vespa_search_result_order.py new file mode 100644 index 00000000..b2412604 --- /dev/null +++ b/tests/search/vespa/test_vespa_search_result_order.py @@ -0,0 +1,61 @@ +from typing import Mapping + +import pytest + +from app.api.api_v1.routers import search +from tests.search.vespa.setup_search_tests import ( + VESPA_FIXTURE_COUNT, + _populate_db_families, +) + +SEARCH_ENDPOINT = "/api/v1/searches" + + +def _make_search_request(client, params: Mapping[str, str]): + response = client.post(SEARCH_ENDPOINT, json=params) + assert response.status_code == 200, response.text + return response.json() + + +@pytest.mark.search +@pytest.mark.parametrize("label, query", [("search", "the"), ("browse", "")]) +def test_result_order_score( + label, query, test_vespa, data_db, monkeypatch, data_client +): + monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) + _populate_db_families(data_db) + + params = { + "query_string": query, + "sort_field": "date", + "sort_order": "asc", + } + asc_date_body = _make_search_request(data_client, params) + asc_dates = [f["family_date"] for f in asc_date_body["families"]] + + params["sort_order"] = "desc" + desc_date_body = _make_search_request(data_client, params) + desc_dates = [f["family_date"] for f in desc_date_body["families"]] + + assert VESPA_FIXTURE_COUNT == len(asc_dates) == len(desc_dates) + assert asc_dates == list(reversed(desc_dates)) + assert asc_dates[0] < desc_dates[0] + assert asc_dates[-1] > desc_dates[-1] + + +@pytest.mark.search +@pytest.mark.parametrize("label, query", [("search", "the"), ("browse", "")]) +def test_result_order_title( + label, query, test_vespa, data_db, monkeypatch, data_client +): + monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) + _populate_db_families(data_db) + + params = { + "query_string": query, + "sort_field": "title", + "sort_order": "asc", + } + + # Scope of test is to confirm this does not cause a failure + _ = _make_search_request(data_client, params) diff --git a/tests/search/vespa/test_vespasearch.py b/tests/search/vespa/test_vespasearch.py index 21031707..cd8f8ddb 100644 --- a/tests/search/vespa/test_vespasearch.py +++ b/tests/search/vespa/test_vespasearch.py @@ -434,50 +434,6 @@ def test_multiple_filters(label, query, test_vespa, data_db, monkeypatch, data_c _ = _make_search_request(data_client, params) -@pytest.mark.search -@pytest.mark.parametrize("label, query", [("search", "the"), ("browse", "")]) -def test_result_order_score( - label, query, test_vespa, data_db, monkeypatch, data_client -): - monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) - _populate_db_families(data_db) - - params = { - "query_string": query, - "sort_field": "date", - "sort_order": "asc", - } - asc_date_body = _make_search_request(data_client, params) - asc_dates = [f["family_date"] for f in asc_date_body["families"]] - - params["sort_order"] = "desc" - desc_date_body = _make_search_request(data_client, params) - desc_dates = [f["family_date"] for f in desc_date_body["families"]] - - assert VESPA_FIXTURE_COUNT == len(asc_dates) == len(desc_dates) - assert asc_dates == list(reversed(desc_dates)) - assert asc_dates[0] < desc_dates[0] - assert asc_dates[-1] > desc_dates[-1] - - -@pytest.mark.search -@pytest.mark.parametrize("label, query", [("search", "the"), ("browse", "")]) -def test_result_order_title( - label, query, test_vespa, data_db, monkeypatch, data_client -): - monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) - _populate_db_families(data_db) - - params = { - "query_string": query, - "sort_field": "title", - "sort_order": "asc", - } - - # Scope of test is to confirm this does not cause a failure - _ = _make_search_request(data_client, params) - - @pytest.mark.search def test_continuation_token__families(test_vespa, data_db, monkeypatch, data_client): monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) From 6dc4c7f78c997232f08d3d697db4ede324e3115f Mon Sep 17 00:00:00 2001 From: Katy Baulch <46493669+katybaulch@users.noreply.github.com> Date: Tue, 17 Sep 2024 13:48:14 +0100 Subject: [PATCH 3/5] Move continuation token vespa search tests to separate file (#336) * Move vespa search tests under dedicated vespa folder * Move /search_fixtures under vespa search folder & rename to fixtures * Bump to 1.14.20 * Move vespa search result order tests to separate file * Bump to 1.14.19 * Move vespa search continuation token tests to separate file * Group pagination and continuation token tests --- .../vespa/test_vespa_search_pagination.py | 163 ++++++++++++++++++ tests/search/vespa/test_vespasearch.py | 146 ---------------- 2 files changed, 163 insertions(+), 146 deletions(-) create mode 100644 tests/search/vespa/test_vespa_search_pagination.py diff --git a/tests/search/vespa/test_vespa_search_pagination.py b/tests/search/vespa/test_vespa_search_pagination.py new file mode 100644 index 00000000..7e9c6664 --- /dev/null +++ b/tests/search/vespa/test_vespa_search_pagination.py @@ -0,0 +1,163 @@ +from typing import Mapping + +import pytest + +from app.api.api_v1.routers import search +from tests.search.vespa.setup_search_tests import ( + VESPA_FIXTURE_COUNT, + _populate_db_families, +) + +SEARCH_ENDPOINT = "/api/v1/searches" + + +def _make_search_request(client, params: Mapping[str, str]): + response = client.post(SEARCH_ENDPOINT, json=params) + assert response.status_code == 200, response.text + return response.json() + + +@pytest.mark.search +def test_simple_pagination_families(test_vespa, data_client, data_db, monkeypatch): + monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) + _populate_db_families(data_db) + + PAGE_SIZE = 2 + + # Query one + params = { + "query_string": "and", + "page_size": PAGE_SIZE, + "offset": 0, + } + body_one = _make_search_request(data_client, params) + assert body_one["hits"] == VESPA_FIXTURE_COUNT + assert len(body_one["families"]) == PAGE_SIZE + assert ( + body_one["families"][0]["family_slug"] + == "agriculture-sector-plan-2015-2019_7999" + ) + assert ( + body_one["families"][1]["family_slug"] + == "national-environment-policy-of-guinea_f0df" + ) + + # Query two + params = { + "query_string": "and", + "page_size": PAGE_SIZE, + "offset": 2, + } + body_two = _make_search_request(data_client, params) + assert body_two["hits"] == VESPA_FIXTURE_COUNT + assert len(body_two["families"]) == PAGE_SIZE + assert ( + body_two["families"][0]["family_slug"] + == "national-energy-policy-and-energy-action-plan_9262" + ) + assert ( + body_two["families"][1]["family_slug"] + == "submission-to-the-unfccc-ahead-of-the-first-technical-dialogue_e760" + ) + + +@pytest.mark.search +def test_continuation_token__families(test_vespa, data_db, monkeypatch, data_client): + monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) + + _populate_db_families(data_db) + + params = {"query_string": "the", "limit": 2, "page_size": 1} + response = _make_search_request(data_client, params) + continuation = response["continuation_token"] + first_family_ids = [f["family_slug"] for f in response["families"]] + + # Confirm we have grabbed a subset of all results + assert len(response["families"]) < response["total_family_hits"] + + # Get next results set + params = {"query_string": "the", "continuation_tokens": [continuation]} + response = _make_search_request(data_client, params) + second_family_ids = [f["family_slug"] for f in response["families"]] + + # Confirm we actually got different results + assert sorted(first_family_ids) != sorted(second_family_ids) + + # Go back to prev and confirm its what we had initially + params = { + "query_string": "the", + "continuation_tokens": [response["prev_continuation_token"]], + "limit": 2, + "page_size": 1, + } + response = _make_search_request(data_client, params) + prev_family_ids = [f["family_slug"] for f in response["families"]] + + assert sorted(first_family_ids) == sorted(prev_family_ids) + + +@pytest.mark.search +def test_continuation_token__passages(test_vespa, data_db, monkeypatch, data_client): + monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) + + _populate_db_families(data_db) + + # Get second set of families + params = { + "query_string": "the", + "document_ids": ["CCLW.executive.10246.4861", "CCLW.executive.4934.1571"], + "limit": 1, + "page_size": 1, + } + first_family = _make_search_request(data_client, params) + params["continuation_tokens"] = [first_family["continuation_token"]] + second_family_first_passages = _make_search_request(data_client, params) + second_family_first_passages_ids = [ + h["text_block_id"] + for h in second_family_first_passages["families"][0]["family_documents"][0][ + "document_passage_matches" + ] + ] + + # Get next set of passages + this_family_continuation = second_family_first_passages["this_continuation_token"] + next_passages_continuation = second_family_first_passages["families"][0][ + "continuation_token" + ] + params["continuation_tokens"] = [ + this_family_continuation, + next_passages_continuation, + ] + second_family_second_passages = _make_search_request(data_client, params) + second_family_second_passages_ids = [ + h["text_block_id"] + for h in second_family_second_passages["families"][0]["family_documents"][0][ + "document_passage_matches" + ] + ] + + # Confirm we actually got different results + assert sorted(second_family_first_passages_ids) != sorted( + second_family_second_passages_ids + ) + + # Go to previous set and confirm its the same + prev_passages_continuation = second_family_second_passages["families"][0][ + "prev_continuation_token" + ] + + params["continuation_tokens"] = [ + this_family_continuation, + prev_passages_continuation, + ] + response = _make_search_request(data_client, params) + second_family_prev_passages_ids = [ + h["text_block_id"] + for h in response["families"][0]["family_documents"][0][ + "document_passage_matches" + ] + ] + + assert sorted(second_family_second_passages_ids) != sorted( + second_family_prev_passages_ids + ) diff --git a/tests/search/vespa/test_vespasearch.py b/tests/search/vespa/test_vespasearch.py index cd8f8ddb..4cae6e81 100644 --- a/tests/search/vespa/test_vespasearch.py +++ b/tests/search/vespa/test_vespasearch.py @@ -45,50 +45,6 @@ def test_empty_search_term_performs_browse( query_spy.assert_called_once() -@pytest.mark.search -def test_simple_pagination_families(test_vespa, data_client, data_db, monkeypatch): - monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) - _populate_db_families(data_db) - - PAGE_SIZE = 2 - - # Query one - params = { - "query_string": "and", - "page_size": PAGE_SIZE, - "offset": 0, - } - body_one = _make_search_request(data_client, params) - assert body_one["hits"] == VESPA_FIXTURE_COUNT - assert len(body_one["families"]) == PAGE_SIZE - assert ( - body_one["families"][0]["family_slug"] - == "agriculture-sector-plan-2015-2019_7999" - ) - assert ( - body_one["families"][1]["family_slug"] - == "national-environment-policy-of-guinea_f0df" - ) - - # Query two - params = { - "query_string": "and", - "page_size": PAGE_SIZE, - "offset": 2, - } - body_two = _make_search_request(data_client, params) - assert body_two["hits"] == VESPA_FIXTURE_COUNT - assert len(body_two["families"]) == PAGE_SIZE - assert ( - body_two["families"][0]["family_slug"] - == "national-energy-policy-and-energy-action-plan_9262" - ) - assert ( - body_two["families"][1]["family_slug"] - == "submission-to-the-unfccc-ahead-of-the-first-technical-dialogue_e760" - ) - - @pytest.mark.search @pytest.mark.parametrize("exact_match", [True, False]) def test_search_body_valid(exact_match, test_vespa, data_client, data_db, monkeypatch): @@ -434,108 +390,6 @@ def test_multiple_filters(label, query, test_vespa, data_db, monkeypatch, data_c _ = _make_search_request(data_client, params) -@pytest.mark.search -def test_continuation_token__families(test_vespa, data_db, monkeypatch, data_client): - monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) - - _populate_db_families(data_db) - - params = {"query_string": "the", "limit": 2, "page_size": 1} - response = _make_search_request(data_client, params) - continuation = response["continuation_token"] - first_family_ids = [f["family_slug"] for f in response["families"]] - - # Confirm we have grabbed a subset of all results - assert len(response["families"]) < response["total_family_hits"] - - # Get next results set - params = {"query_string": "the", "continuation_tokens": [continuation]} - response = _make_search_request(data_client, params) - second_family_ids = [f["family_slug"] for f in response["families"]] - - # Confirm we actually got different results - assert sorted(first_family_ids) != sorted(second_family_ids) - - # Go back to prev and confirm its what we had initially - params = { - "query_string": "the", - "continuation_tokens": [response["prev_continuation_token"]], - "limit": 2, - "page_size": 1, - } - response = _make_search_request(data_client, params) - prev_family_ids = [f["family_slug"] for f in response["families"]] - - assert sorted(first_family_ids) == sorted(prev_family_ids) - - -@pytest.mark.search -def test_continuation_token__passages(test_vespa, data_db, monkeypatch, data_client): - monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) - - _populate_db_families(data_db) - - # Get second set of families - params = { - "query_string": "the", - "document_ids": ["CCLW.executive.10246.4861", "CCLW.executive.4934.1571"], - "limit": 1, - "page_size": 1, - } - first_family = _make_search_request(data_client, params) - params["continuation_tokens"] = [first_family["continuation_token"]] - second_family_first_passages = _make_search_request(data_client, params) - second_family_first_passages_ids = [ - h["text_block_id"] - for h in second_family_first_passages["families"][0]["family_documents"][0][ - "document_passage_matches" - ] - ] - - # Get next set of passages - this_family_continuation = second_family_first_passages["this_continuation_token"] - next_passages_continuation = second_family_first_passages["families"][0][ - "continuation_token" - ] - params["continuation_tokens"] = [ - this_family_continuation, - next_passages_continuation, - ] - second_family_second_passages = _make_search_request(data_client, params) - second_family_second_passages_ids = [ - h["text_block_id"] - for h in second_family_second_passages["families"][0]["family_documents"][0][ - "document_passage_matches" - ] - ] - - # Confirm we actually got different results - assert sorted(second_family_first_passages_ids) != sorted( - second_family_second_passages_ids - ) - - # Go to previous set and confirm its the same - prev_passages_continuation = second_family_second_passages["families"][0][ - "prev_continuation_token" - ] - - params["continuation_tokens"] = [ - this_family_continuation, - prev_passages_continuation, - ] - response = _make_search_request(data_client, params) - second_family_prev_passages_ids = [ - h["text_block_id"] - for h in response["families"][0]["family_documents"][0][ - "document_passage_matches" - ] - ] - - assert sorted(second_family_second_passages_ids) != sorted( - second_family_prev_passages_ids - ) - - @pytest.mark.search def test_case_insensitivity(test_vespa, data_db, monkeypatch, data_client): monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) From b16284bbf753a1f178d0369f86f53d1c07d4e5bf Mon Sep 17 00:00:00 2001 From: Katy Baulch <46493669+katybaulch@users.noreply.github.com> Date: Tue, 17 Sep 2024 14:01:33 +0100 Subject: [PATCH 4/5] Move keyword and range vespa search tests into separate file (#337) * Move vespa search tests under dedicated vespa folder * Move /search_fixtures under vespa search folder & rename to fixtures * Bump to 1.14.20 * Move vespa search result order tests to separate file * Bump to 1.14.19 * Move vespa search continuation token tests to separate file * Move keyword and range vespa search tests into separate file * Delete test_vespa_search_cont_tokens.py * Move _make_search_request into vespa search setup --- tests/search/vespa/setup_search_tests.py | 9 + .../test_range_and_keyword_filters_search.py | 182 +++++++++++++++++ tests/search/vespa/test_vespa_ids_search.py | 15 +- .../vespa/test_vespa_search_pagination.py | 11 +- .../vespa/test_vespa_search_result_order.py | 11 +- tests/search/vespa/test_vespasearch.py | 185 +----------------- 6 files changed, 199 insertions(+), 214 deletions(-) create mode 100644 tests/search/vespa/test_range_and_keyword_filters_search.py diff --git a/tests/search/vespa/setup_search_tests.py b/tests/search/vespa/setup_search_tests.py index a58d6f75..93a308d4 100644 --- a/tests/search/vespa/setup_search_tests.py +++ b/tests/search/vespa/setup_search_tests.py @@ -26,6 +26,15 @@ from db_client.models.organisation.corpus import Corpus, CorpusType, Organisation from sqlalchemy.orm import Session +SEARCH_ENDPOINT = "/api/v1/searches" + + +def _make_search_request(client, params: Mapping[str, str]): + response = client.post(SEARCH_ENDPOINT, json=params) + assert response.status_code == 200, response.text + return response.json() + + VESPA_FIXTURE_COUNT = 5 FIXTURE_DIR = Path(__file__).parent / "fixtures" VESPA_FAMILY_PATH = FIXTURE_DIR / "vespa_family_document.json" diff --git a/tests/search/vespa/test_range_and_keyword_filters_search.py b/tests/search/vespa/test_range_and_keyword_filters_search.py new file mode 100644 index 00000000..6bf0520d --- /dev/null +++ b/tests/search/vespa/test_range_and_keyword_filters_search.py @@ -0,0 +1,182 @@ +import pytest +from db_client.models.dfce import Geography + +from app.api.api_v1.routers import search +from app.core.lookups import get_country_slug_from_country_code +from tests.search.vespa.setup_search_tests import ( + SEARCH_ENDPOINT, + VESPA_FIXTURE_COUNT, + _make_search_request, + _populate_db_families, +) + + +@pytest.mark.search +@pytest.mark.parametrize("label,query", [("search", "the"), ("browse", "")]) +def test_keyword_country_filters( + label, query, test_vespa, data_client, data_db, monkeypatch +): + monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) + _populate_db_families(data_db) + base_params = {"query_string": query} + + # Get all documents and iterate over their country codes to confirm that each are + # the specific one that is returned in the query (as they each have a unique + # country code) + all_body = _make_search_request(data_client, params=base_params) + families = [f for f in all_body["families"]] + assert len(families) == VESPA_FIXTURE_COUNT + + for family in families: + country_code = family["family_geography"] + + country_slug = get_country_slug_from_country_code(data_db, country_code) + + params = {**base_params, **{"keyword_filters": {"countries": [country_slug]}}} + body_with_filters = _make_search_request(data_client, params=params) + filtered_family_slugs = [ + f["family_slug"] for f in body_with_filters["families"] + ] + assert len(filtered_family_slugs) == 1 + assert family["family_slug"] in filtered_family_slugs + + +@pytest.mark.search +@pytest.mark.parametrize("label,query", [("search", "the"), ("browse", "")]) +def test_keyword_region_filters( + label, query, test_vespa, data_client, data_db, monkeypatch +): + monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) + _populate_db_families(data_db) + base_params = {"query_string": query} + + # Get regions of all documents and iterate over them + # to confirm the originals are returned when filtered on + all_body = _make_search_request(data_client, params=base_params) + families = [f for f in all_body["families"]] + assert len(families) == VESPA_FIXTURE_COUNT + + for family in families: + country_code = family["family_geography"] + + # Fixture for UNFCCC.non-party.1267.0 has a non geography (XAA) + if country_code == "Other": + return + + parent_id = ( + data_db.query(Geography) + .filter(Geography.value == country_code) + .first() + .parent_id + ) + region = data_db.query(Geography).filter(Geography.id == parent_id).first() + + params = {**base_params, **{"keyword_filters": {"regions": [region.slug]}}} + body_with_filters = _make_search_request(data_client, params=params) + filtered_family_slugs = [ + f["family_slug"] for f in body_with_filters["families"] + ] + assert family["family_slug"] in filtered_family_slugs + + +@pytest.mark.search +@pytest.mark.parametrize("label,query", [("search", "the"), ("browse", "")]) +def test_keyword_region_and_country_filters( + label, query, test_vespa, data_client, data_db, monkeypatch +): + monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) + _populate_db_families(data_db) + + # Filtering on one region and one country should return the one match + base_params = { + "query_string": query, + "keyword_filters": { + "regions": ["europe-central-asia"], + "countries": ["ITA"], + }, + } + + body = _make_search_request(data_client, params=base_params) + + assert len(body["families"]) == 1 + assert body["families"][0]["family_name"] == "National Energy Strategy" + + +@pytest.mark.search +@pytest.mark.parametrize("label,query", [("search", "the"), ("browse", "")]) +def test_invalid_keyword_filters( + label, query, test_vespa, data_db, monkeypatch, data_client +): + monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) + _populate_db_families(data_db) + + response = data_client.post( + SEARCH_ENDPOINT, + json={ + "query_string": query, + "keyword_filters": { + "geographies": ["kenya"], + "unknown_filter_no1": ["BOOM"], + }, + }, + ) + assert response.status_code == 422 + + +@pytest.mark.search +@pytest.mark.parametrize( + "year_range", [(None, None), (1900, None), (None, 2020), (1900, 2020)] +) +def test_year_range_filtered_in( + year_range, test_vespa, data_db, monkeypatch, data_client +): + monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) + _populate_db_families(data_db) + + # Search + params = {"query_string": "and", "year_range": year_range} + body = _make_search_request(data_client, params=params) + assert len(body["families"]) > 0 + + # Browse + params = {"query_string": "", "year_range": year_range} + body = _make_search_request(data_client, params=params) + assert len(body["families"]) > 0 + + +@pytest.mark.search +@pytest.mark.parametrize("year_range", [(None, 2010), (2024, None)]) +def test_year_range_filtered_out( + year_range, test_vespa, data_db, monkeypatch, data_client +): + monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) + _populate_db_families(data_db) + + # Search + params = {"query_string": "and", "year_range": year_range} + body = _make_search_request(data_client, params=params) + assert len(body["families"]) == 0 + + # Browse + params = {"query_string": "", "year_range": year_range} + body = _make_search_request(data_client, params=params) + assert len(body["families"]) == 0 + + +@pytest.mark.search +@pytest.mark.parametrize("label, query", [("search", "the"), ("browse", "")]) +def test_multiple_filters(label, query, test_vespa, data_db, monkeypatch, data_client): + monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) + _populate_db_families(data_db) + + params = { + "query_string": query, + "keyword_filters": { + "countries": ["south-korea"], + "sources": ["CCLW"], + "categories": ["Legislative"], + }, + "year_range": (1900, 2020), + } + + _ = _make_search_request(data_client, params) diff --git a/tests/search/vespa/test_vespa_ids_search.py b/tests/search/vespa/test_vespa_ids_search.py index 203490da..6f744dbc 100644 --- a/tests/search/vespa/test_vespa_ids_search.py +++ b/tests/search/vespa/test_vespa_ids_search.py @@ -1,20 +1,13 @@ -from typing import Mapping - import pytest from db_client.models.dfce import Slug from db_client.models.dfce.family import FamilyDocument from sqlalchemy.orm import Session from app.api.api_v1.routers import search -from tests.search.vespa.setup_search_tests import _populate_db_families - -SEARCH_ENDPOINT = "/api/v1/searches" - - -def _make_search_request(client, params: Mapping[str, str]): - response = client.post(SEARCH_ENDPOINT, json=params) - assert response.status_code == 200, response.text - return response.json() +from tests.search.vespa.setup_search_tests import ( + _make_search_request, + _populate_db_families, +) def _doc_ids_from_response(test_db: Session, response: dict) -> list[str]: diff --git a/tests/search/vespa/test_vespa_search_pagination.py b/tests/search/vespa/test_vespa_search_pagination.py index 7e9c6664..04890b28 100644 --- a/tests/search/vespa/test_vespa_search_pagination.py +++ b/tests/search/vespa/test_vespa_search_pagination.py @@ -1,21 +1,12 @@ -from typing import Mapping - import pytest from app.api.api_v1.routers import search from tests.search.vespa.setup_search_tests import ( VESPA_FIXTURE_COUNT, + _make_search_request, _populate_db_families, ) -SEARCH_ENDPOINT = "/api/v1/searches" - - -def _make_search_request(client, params: Mapping[str, str]): - response = client.post(SEARCH_ENDPOINT, json=params) - assert response.status_code == 200, response.text - return response.json() - @pytest.mark.search def test_simple_pagination_families(test_vespa, data_client, data_db, monkeypatch): diff --git a/tests/search/vespa/test_vespa_search_result_order.py b/tests/search/vespa/test_vespa_search_result_order.py index b2412604..8e8ad08d 100644 --- a/tests/search/vespa/test_vespa_search_result_order.py +++ b/tests/search/vespa/test_vespa_search_result_order.py @@ -1,21 +1,12 @@ -from typing import Mapping - import pytest from app.api.api_v1.routers import search from tests.search.vespa.setup_search_tests import ( VESPA_FIXTURE_COUNT, + _make_search_request, _populate_db_families, ) -SEARCH_ENDPOINT = "/api/v1/searches" - - -def _make_search_request(client, params: Mapping[str, str]): - response = client.post(SEARCH_ENDPOINT, json=params) - assert response.status_code == 200, response.text - return response.json() - @pytest.mark.search @pytest.mark.parametrize("label, query", [("search", "the"), ("browse", "")]) diff --git a/tests/search/vespa/test_vespasearch.py b/tests/search/vespa/test_vespasearch.py index 4cae6e81..15e897c0 100644 --- a/tests/search/vespa/test_vespasearch.py +++ b/tests/search/vespa/test_vespasearch.py @@ -1,30 +1,20 @@ import time -from typing import Mapping import pytest -from db_client.models.dfce import Geography from db_client.models.dfce.family import FamilyDocument from sqlalchemy import update from app.api.api_v1.routers import search -from app.core.lookups import get_country_slug_from_country_code from tests.search.vespa.setup_search_tests import ( - VESPA_FIXTURE_COUNT, + SEARCH_ENDPOINT, _create_document, _create_family, _create_family_event, _create_family_metadata, + _make_search_request, _populate_db_families, ) -SEARCH_ENDPOINT = "/api/v1/searches" - - -def _make_search_request(client, params: Mapping[str, str]): - response = client.post(SEARCH_ENDPOINT, json=params) - assert response.status_code == 200, response.text - return response.json() - @pytest.mark.search def test_empty_search_term_performs_browse( @@ -219,177 +209,6 @@ def test_search_with_deleted_docs(test_vespa, monkeypatch, data_client, data_db) assert len(all_deleted_body["families"]) == 0 -@pytest.mark.search -@pytest.mark.parametrize("label,query", [("search", "the"), ("browse", "")]) -def test_keyword_country_filters( - label, query, test_vespa, data_client, data_db, monkeypatch -): - monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) - _populate_db_families(data_db) - base_params = {"query_string": query} - - # Get all documents and iterate over their country codes to confirm that each are - # the specific one that is returned in the query (as they each have a unique - # country code) - all_body = _make_search_request(data_client, params=base_params) - families = [f for f in all_body["families"]] - assert len(families) == VESPA_FIXTURE_COUNT - - for family in families: - country_code = family["family_geography"] - - country_slug = get_country_slug_from_country_code(data_db, country_code) - - params = {**base_params, **{"keyword_filters": {"countries": [country_slug]}}} - body_with_filters = _make_search_request(data_client, params=params) - filtered_family_slugs = [ - f["family_slug"] for f in body_with_filters["families"] - ] - assert len(filtered_family_slugs) == 1 - assert family["family_slug"] in filtered_family_slugs - - -@pytest.mark.search -@pytest.mark.parametrize("label,query", [("search", "the"), ("browse", "")]) -def test_keyword_region_filters( - label, query, test_vespa, data_client, data_db, monkeypatch -): - monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) - _populate_db_families(data_db) - base_params = {"query_string": query} - - # Get regions of all documents and iterate over them - # to confirm the originals are returned when filtered on - all_body = _make_search_request(data_client, params=base_params) - families = [f for f in all_body["families"]] - assert len(families) == VESPA_FIXTURE_COUNT - - for family in families: - country_code = family["family_geography"] - - # Fixture for UNFCCC.non-party.1267.0 has a non geography (XAA) - if country_code == "Other": - return - - parent_id = ( - data_db.query(Geography) - .filter(Geography.value == country_code) - .first() - .parent_id - ) - region = data_db.query(Geography).filter(Geography.id == parent_id).first() - - params = {**base_params, **{"keyword_filters": {"regions": [region.slug]}}} - body_with_filters = _make_search_request(data_client, params=params) - filtered_family_slugs = [ - f["family_slug"] for f in body_with_filters["families"] - ] - assert family["family_slug"] in filtered_family_slugs - - -@pytest.mark.search -@pytest.mark.parametrize("label,query", [("search", "the"), ("browse", "")]) -def test_keyword_region_and_country_filters( - label, query, test_vespa, data_client, data_db, monkeypatch -): - monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) - _populate_db_families(data_db) - - # Filtering on one region and one country should return the one match - base_params = { - "query_string": query, - "keyword_filters": { - "regions": ["europe-central-asia"], - "countries": ["ITA"], - }, - } - - body = _make_search_request(data_client, params=base_params) - - assert len(body["families"]) == 1 - assert body["families"][0]["family_name"] == "National Energy Strategy" - - -@pytest.mark.search -@pytest.mark.parametrize("label,query", [("search", "the"), ("browse", "")]) -def test_invalid_keyword_filters( - label, query, test_vespa, data_db, monkeypatch, data_client -): - monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) - _populate_db_families(data_db) - - response = data_client.post( - SEARCH_ENDPOINT, - json={ - "query_string": query, - "keyword_filters": { - "geographies": ["kenya"], - "unknown_filter_no1": ["BOOM"], - }, - }, - ) - assert response.status_code == 422 - - -@pytest.mark.search -@pytest.mark.parametrize( - "year_range", [(None, None), (1900, None), (None, 2020), (1900, 2020)] -) -def test_year_range_filterered_in( - year_range, test_vespa, data_db, monkeypatch, data_client -): - monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) - _populate_db_families(data_db) - - # Search - params = {"query_string": "and", "year_range": year_range} - body = _make_search_request(data_client, params=params) - assert len(body["families"]) > 0 - - # Browse - params = {"query_string": "", "year_range": year_range} - body = _make_search_request(data_client, params=params) - assert len(body["families"]) > 0 - - -@pytest.mark.search -@pytest.mark.parametrize("year_range", [(None, 2010), (2024, None)]) -def test_year_range_filterered_out( - year_range, test_vespa, data_db, monkeypatch, data_client -): - monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) - _populate_db_families(data_db) - - # Search - params = {"query_string": "and", "year_range": year_range} - body = _make_search_request(data_client, params=params) - assert len(body["families"]) == 0 - - # Browse - params = {"query_string": "", "year_range": year_range} - body = _make_search_request(data_client, params=params) - assert len(body["families"]) == 0 - - -@pytest.mark.search -@pytest.mark.parametrize("label, query", [("search", "the"), ("browse", "")]) -def test_multiple_filters(label, query, test_vespa, data_db, monkeypatch, data_client): - monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) - _populate_db_families(data_db) - - params = { - "query_string": query, - "keyword_filters": { - "countries": ["south-korea"], - "sources": ["CCLW"], - "categories": ["Legislative"], - }, - "year_range": (1900, 2020), - } - - _ = _make_search_request(data_client, params) - - @pytest.mark.search def test_case_insensitivity(test_vespa, data_db, monkeypatch, data_client): monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) From 77a49102117431341f0ef03e8dcad882595a0d83 Mon Sep 17 00:00:00 2001 From: Katy Baulch <46493669+katybaulch@users.noreply.github.com> Date: Tue, 17 Sep 2024 14:19:42 +0100 Subject: [PATCH 5/5] Move vespa search tests for ignoring special chars & case to separate file (#338) * Move data download tests into parent folder * Move query insensitivity & special chars ignoring tests out * Rename from test_vespasearch * Bump to 1.14.20 --- pyproject.toml | 2 +- ....py => test_this_vespa_search_download.py} | 0 .../search/vespa/test_vespa_query_ignores.py | 54 +++++++++++++++++++ ...st_vespasearch.py => test_vespa_search.py} | 47 ---------------- ...ase.py => test_whole_database_download.py} | 0 5 files changed, 55 insertions(+), 48 deletions(-) rename tests/search/vespa/{data_download/test_this_search.py => test_this_vespa_search_download.py} (100%) create mode 100644 tests/search/vespa/test_vespa_query_ignores.py rename tests/search/vespa/{test_vespasearch.py => test_vespa_search.py} (81%) rename tests/search/vespa/{data_download/test_whole_database.py => test_whole_database_download.py} (100%) diff --git a/pyproject.toml b/pyproject.toml index 13a3f60e..11a21afc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "navigator_backend" -version = "1.14.19" +version = "1.14.20" description = "" authors = ["CPR-dev-team "] packages = [{ include = "app" }, { include = "tests" }] diff --git a/tests/search/vespa/data_download/test_this_search.py b/tests/search/vespa/test_this_vespa_search_download.py similarity index 100% rename from tests/search/vespa/data_download/test_this_search.py rename to tests/search/vespa/test_this_vespa_search_download.py diff --git a/tests/search/vespa/test_vespa_query_ignores.py b/tests/search/vespa/test_vespa_query_ignores.py new file mode 100644 index 00000000..c7fc9f69 --- /dev/null +++ b/tests/search/vespa/test_vespa_query_ignores.py @@ -0,0 +1,54 @@ +import time + +import pytest + +from app.api.api_v1.routers import search +from tests.search.vespa.setup_search_tests import ( + _make_search_request, + _populate_db_families, +) + + +@pytest.mark.search +def test_case_insensitivity(test_vespa, data_db, monkeypatch, data_client): + monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) + _populate_db_families(data_db) + + lower_body = _make_search_request(data_client, {"query_string": "the"}) + upper_body = _make_search_request(data_client, {"query_string": "THE"}) + + assert lower_body["families"] == upper_body["families"] + + +@pytest.mark.search +def test_punctuation_ignored(test_vespa, data_db, monkeypatch, data_client): + monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) + _populate_db_families(data_db) + + regular_body = _make_search_request(data_client, {"query_string": "the"}) + punc_body = _make_search_request(data_client, {"query_string": ", the."}) + accent_body = _make_search_request(data_client, {"query_string": "thë"}) + + assert ( + sorted([f["family_slug"] for f in punc_body["families"]]) + == sorted([f["family_slug"] for f in regular_body["families"]]) + == sorted([f["family_slug"] for f in accent_body["families"]]) + ) + + +@pytest.mark.search +def test_accents_ignored( + test_vespa, + data_db, + monkeypatch, + data_client, +): + monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) + _populate_db_families(data_db) + + start = time.time() + body = _make_search_request(data_client, {"query_string": "the"}) + end = time.time() + + request_time_ms = 1000 * (end - start) + assert 0 < body["query_time_ms"] < body["total_time_ms"] < request_time_ms diff --git a/tests/search/vespa/test_vespasearch.py b/tests/search/vespa/test_vespa_search.py similarity index 81% rename from tests/search/vespa/test_vespasearch.py rename to tests/search/vespa/test_vespa_search.py index 15e897c0..e34fbbd1 100644 --- a/tests/search/vespa/test_vespasearch.py +++ b/tests/search/vespa/test_vespa_search.py @@ -1,5 +1,3 @@ -import time - import pytest from db_client.models.dfce.family import FamilyDocument from sqlalchemy import update @@ -207,48 +205,3 @@ def test_search_with_deleted_docs(test_vespa, monkeypatch, data_client, data_db) all_deleted_count = len(all_deleted_body["families"]) assert start_family_count > one_deleted_count > all_deleted_count assert len(all_deleted_body["families"]) == 0 - - -@pytest.mark.search -def test_case_insensitivity(test_vespa, data_db, monkeypatch, data_client): - monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) - _populate_db_families(data_db) - - lower_body = _make_search_request(data_client, {"query_string": "the"}) - upper_body = _make_search_request(data_client, {"query_string": "THE"}) - - assert lower_body["families"] == upper_body["families"] - - -@pytest.mark.search -def test_punctuation_ignored(test_vespa, data_db, monkeypatch, data_client): - monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) - _populate_db_families(data_db) - - regular_body = _make_search_request(data_client, {"query_string": "the"}) - punc_body = _make_search_request(data_client, {"query_string": ", the."}) - accent_body = _make_search_request(data_client, {"query_string": "thë"}) - - assert ( - sorted([f["family_slug"] for f in punc_body["families"]]) - == sorted([f["family_slug"] for f in regular_body["families"]]) - == sorted([f["family_slug"] for f in accent_body["families"]]) - ) - - -@pytest.mark.search -def test_accents_ignored( - test_vespa, - data_db, - monkeypatch, - data_client, -): - monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa) - _populate_db_families(data_db) - - start = time.time() - body = _make_search_request(data_client, {"query_string": "the"}) - end = time.time() - - request_time_ms = 1000 * (end - start) - assert 0 < body["query_time_ms"] < body["total_time_ms"] < request_time_ms diff --git a/tests/search/vespa/data_download/test_whole_database.py b/tests/search/vespa/test_whole_database_download.py similarity index 100% rename from tests/search/vespa/data_download/test_whole_database.py rename to tests/search/vespa/test_whole_database_download.py