From cceb8a546629d991cf99e2816d3018203992b2c8 Mon Sep 17 00:00:00 2001
From: Katy Baulch <46493669+katybaulch@users.noreply.github.com>
Date: Tue, 17 Sep 2024 13:09:51 +0100
Subject: [PATCH 1/5] Move vespa search tests & the search_fixtures they use
 under dedicated sub-folder  (#334)

* Move vespa search tests under dedicated vespa folder

* Move /search_fixtures under vespa search folder & rename to fixtures

* Bump to 1.14.20
---
 .trunk/trunk.yaml                                      |  2 +-
 makefile-docker.defs                                   | 10 +++++-----
 pyproject.toml                                         |  2 +-
 .../{ => vespa}/data_download/test_this_search.py      |  2 +-
 .../{ => vespa}/data_download/test_whole_database.py   |  2 +-
 .../fixtures}/vespa_document_passage.json              |  0
 .../fixtures}/vespa_family_document.json               |  0
 .../fixtures}/vespa_search_weights.json                |  0
 .../vespa_test_schema/query-profiles/default.xml       |  0
 .../vespa_test_schema/schemas/document_passage.sd      |  0
 .../vespa_test_schema/schemas/family_document.sd       |  0
 .../vespa_test_schema/schemas/search_weights.sd        |  0
 .../fixtures}/vespa_test_schema/services.xml           |  0
 tests/search/{ => vespa}/setup_search_tests.py         |  2 +-
 tests/search/{ => vespa}/test_vespa_ids_search.py      |  2 +-
 tests/search/{ => vespa}/test_vespasearch.py           |  2 +-
 16 files changed, 12 insertions(+), 12 deletions(-)
 rename tests/search/{ => vespa}/data_download/test_this_search.py (97%)
 rename tests/search/{ => vespa}/data_download/test_whole_database.py (92%)
 rename tests/search/{search_fixtures => vespa/fixtures}/vespa_document_passage.json (100%)
 rename tests/search/{search_fixtures => vespa/fixtures}/vespa_family_document.json (100%)
 rename tests/search/{search_fixtures => vespa/fixtures}/vespa_search_weights.json (100%)
 rename tests/search/{search_fixtures => vespa/fixtures}/vespa_test_schema/query-profiles/default.xml (100%)
 rename tests/search/{search_fixtures => vespa/fixtures}/vespa_test_schema/schemas/document_passage.sd (100%)
 rename tests/search/{search_fixtures => vespa/fixtures}/vespa_test_schema/schemas/family_document.sd (100%)
 rename tests/search/{search_fixtures => vespa/fixtures}/vespa_test_schema/schemas/search_weights.sd (100%)
 rename tests/search/{search_fixtures => vespa/fixtures}/vespa_test_schema/services.xml (100%)
 rename tests/search/{ => vespa}/setup_search_tests.py (99%)
 rename tests/search/{ => vespa}/test_vespa_ids_search.py (98%)
 rename tests/search/{ => vespa}/test_vespasearch.py (99%)

diff --git a/.trunk/trunk.yaml b/.trunk/trunk.yaml
index 2065f308..13b8a442 100644
--- a/.trunk/trunk.yaml
+++ b/.trunk/trunk.yaml
@@ -39,7 +39,7 @@ lint:
       paths:
         # Ignore test data JSON files
         - tests/data/**/*.json
-        - tests/search/search_fixtures/**/*.json
+        - tests/search/vespa/fixtures/**/*.json
         - scripts/**
     - linters: [markdownlint]
       paths:
diff --git a/makefile-docker.defs b/makefile-docker.defs
index bb92d124..09ac26f0 100644
--- a/makefile-docker.defs
+++ b/makefile-docker.defs
@@ -80,17 +80,17 @@ vespa_healthy:
 .ONESHELL:
 vespa_deploy_schema:
 	vespa config set target local
-	@vespa deploy tests/search/search_fixtures/vespa_test_schema --wait 300
+	@vespa deploy tests/search/vespa/fixtures/vespa_test_schema --wait 300
 
 .ONESHELL:
 vespa_load_data:
 	vespa config set target local
-	vespa feed --progress=3 tests/search/search_fixtures/vespa_search_weights.json
-	vespa feed --progress=3 tests/search/search_fixtures/vespa_family_document.json
-	vespa feed --progress=3 tests/search/search_fixtures/vespa_document_passage.json
+	vespa feed --progress=3 tests/search/vespa/fixtures/vespa_search_weights.json
+	vespa feed --progress=3 tests/search/vespa/fixtures/vespa_family_document.json
+	vespa feed --progress=3 tests/search/vespa/fixtures/vespa_document_passage.json
 
 vespa_setup: vespa_confirm_cli_installed vespa_healthy vespa_deploy_schema vespa_load_data
-	# Deploys a vespa application to a local vespa container and loads search_fixtures
+	# Deploys a vespa application to a local vespa container and loads search fixtures
 
 .ONESHELL:
 test_search:
diff --git a/pyproject.toml b/pyproject.toml
index 13a3f60e..11a21afc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "navigator_backend"
-version = "1.14.19"
+version = "1.14.20"
 description = ""
 authors = ["CPR-dev-team <tech@climatepolicyradar.org>"]
 packages = [{ include = "app" }, { include = "tests" }]
diff --git a/tests/search/data_download/test_this_search.py b/tests/search/vespa/data_download/test_this_search.py
similarity index 97%
rename from tests/search/data_download/test_this_search.py
rename to tests/search/vespa/data_download/test_this_search.py
index f9bdd713..8dad470d 100644
--- a/tests/search/data_download/test_this_search.py
+++ b/tests/search/vespa/data_download/test_this_search.py
@@ -6,7 +6,7 @@
 import pytest
 
 from app.api.api_v1.routers import search
-from tests.search.setup_search_tests import _populate_db_families
+from tests.search.vespa.setup_search_tests import _populate_db_families
 
 SEARCH_ENDPOINT = "/api/v1/searches"
 CSV_DOWNLOAD_ENDPOINT = "/api/v1/searches/download-csv"
diff --git a/tests/search/data_download/test_whole_database.py b/tests/search/vespa/data_download/test_whole_database.py
similarity index 92%
rename from tests/search/data_download/test_whole_database.py
rename to tests/search/vespa/data_download/test_whole_database.py
index 7be699e8..74896a58 100644
--- a/tests/search/data_download/test_whole_database.py
+++ b/tests/search/vespa/data_download/test_whole_database.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from tests.search.setup_search_tests import _populate_db_families
+from tests.search.vespa.setup_search_tests import _populate_db_families
 
 ALL_DATA_DOWNLOAD_ENDPOINT = "/api/v1/searches/download-all-data"
 
diff --git a/tests/search/search_fixtures/vespa_document_passage.json b/tests/search/vespa/fixtures/vespa_document_passage.json
similarity index 100%
rename from tests/search/search_fixtures/vespa_document_passage.json
rename to tests/search/vespa/fixtures/vespa_document_passage.json
diff --git a/tests/search/search_fixtures/vespa_family_document.json b/tests/search/vespa/fixtures/vespa_family_document.json
similarity index 100%
rename from tests/search/search_fixtures/vespa_family_document.json
rename to tests/search/vespa/fixtures/vespa_family_document.json
diff --git a/tests/search/search_fixtures/vespa_search_weights.json b/tests/search/vespa/fixtures/vespa_search_weights.json
similarity index 100%
rename from tests/search/search_fixtures/vespa_search_weights.json
rename to tests/search/vespa/fixtures/vespa_search_weights.json
diff --git a/tests/search/search_fixtures/vespa_test_schema/query-profiles/default.xml b/tests/search/vespa/fixtures/vespa_test_schema/query-profiles/default.xml
similarity index 100%
rename from tests/search/search_fixtures/vespa_test_schema/query-profiles/default.xml
rename to tests/search/vespa/fixtures/vespa_test_schema/query-profiles/default.xml
diff --git a/tests/search/search_fixtures/vespa_test_schema/schemas/document_passage.sd b/tests/search/vespa/fixtures/vespa_test_schema/schemas/document_passage.sd
similarity index 100%
rename from tests/search/search_fixtures/vespa_test_schema/schemas/document_passage.sd
rename to tests/search/vespa/fixtures/vespa_test_schema/schemas/document_passage.sd
diff --git a/tests/search/search_fixtures/vespa_test_schema/schemas/family_document.sd b/tests/search/vespa/fixtures/vespa_test_schema/schemas/family_document.sd
similarity index 100%
rename from tests/search/search_fixtures/vespa_test_schema/schemas/family_document.sd
rename to tests/search/vespa/fixtures/vespa_test_schema/schemas/family_document.sd
diff --git a/tests/search/search_fixtures/vespa_test_schema/schemas/search_weights.sd b/tests/search/vespa/fixtures/vespa_test_schema/schemas/search_weights.sd
similarity index 100%
rename from tests/search/search_fixtures/vespa_test_schema/schemas/search_weights.sd
rename to tests/search/vespa/fixtures/vespa_test_schema/schemas/search_weights.sd
diff --git a/tests/search/search_fixtures/vespa_test_schema/services.xml b/tests/search/vespa/fixtures/vespa_test_schema/services.xml
similarity index 100%
rename from tests/search/search_fixtures/vespa_test_schema/services.xml
rename to tests/search/vespa/fixtures/vespa_test_schema/services.xml
diff --git a/tests/search/setup_search_tests.py b/tests/search/vespa/setup_search_tests.py
similarity index 99%
rename from tests/search/setup_search_tests.py
rename to tests/search/vespa/setup_search_tests.py
index f4a6732c..a58d6f75 100644
--- a/tests/search/setup_search_tests.py
+++ b/tests/search/vespa/setup_search_tests.py
@@ -27,7 +27,7 @@
 from sqlalchemy.orm import Session
 
 VESPA_FIXTURE_COUNT = 5
-FIXTURE_DIR = Path(__file__).parent / "search_fixtures"
+FIXTURE_DIR = Path(__file__).parent / "fixtures"
 VESPA_FAMILY_PATH = FIXTURE_DIR / "vespa_family_document.json"
 VESPA_DOCUMENT_PATH = FIXTURE_DIR / "vespa_document_passage.json"
 
diff --git a/tests/search/test_vespa_ids_search.py b/tests/search/vespa/test_vespa_ids_search.py
similarity index 98%
rename from tests/search/test_vespa_ids_search.py
rename to tests/search/vespa/test_vespa_ids_search.py
index f0db2828..203490da 100644
--- a/tests/search/test_vespa_ids_search.py
+++ b/tests/search/vespa/test_vespa_ids_search.py
@@ -6,7 +6,7 @@
 from sqlalchemy.orm import Session
 
 from app.api.api_v1.routers import search
-from tests.search.setup_search_tests import _populate_db_families
+from tests.search.vespa.setup_search_tests import _populate_db_families
 
 SEARCH_ENDPOINT = "/api/v1/searches"
 
diff --git a/tests/search/test_vespasearch.py b/tests/search/vespa/test_vespasearch.py
similarity index 99%
rename from tests/search/test_vespasearch.py
rename to tests/search/vespa/test_vespasearch.py
index e9a2f565..21031707 100644
--- a/tests/search/test_vespasearch.py
+++ b/tests/search/vespa/test_vespasearch.py
@@ -8,7 +8,7 @@
 
 from app.api.api_v1.routers import search
 from app.core.lookups import get_country_slug_from_country_code
-from tests.search.setup_search_tests import (
+from tests.search.vespa.setup_search_tests import (
     VESPA_FIXTURE_COUNT,
     _create_document,
     _create_family,

From ce6f48163416d186cda8ffbce510f86d3a9e5744 Mon Sep 17 00:00:00 2001
From: Katy Baulch <46493669+katybaulch@users.noreply.github.com>
Date: Tue, 17 Sep 2024 13:32:28 +0100
Subject: [PATCH 2/5] Move vespa search result order tests into a separate file
 (#335)

* Move vespa search tests under dedicated vespa folder

* Move /search_fixtures under vespa search folder & rename to fixtures

* Bump to 1.14.20

* Move vespa search result order tests to separate file

* Bump to 1.14.19
---
 pyproject.toml                                |  2 +-
 .../vespa/test_vespa_search_result_order.py   | 61 +++++++++++++++++++
 tests/search/vespa/test_vespasearch.py        | 44 -------------
 3 files changed, 62 insertions(+), 45 deletions(-)
 create mode 100644 tests/search/vespa/test_vespa_search_result_order.py

diff --git a/pyproject.toml b/pyproject.toml
index 11a21afc..13a3f60e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "navigator_backend"
-version = "1.14.20"
+version = "1.14.19"
 description = ""
 authors = ["CPR-dev-team <tech@climatepolicyradar.org>"]
 packages = [{ include = "app" }, { include = "tests" }]
diff --git a/tests/search/vespa/test_vespa_search_result_order.py b/tests/search/vespa/test_vespa_search_result_order.py
new file mode 100644
index 00000000..b2412604
--- /dev/null
+++ b/tests/search/vespa/test_vespa_search_result_order.py
@@ -0,0 +1,61 @@
+from typing import Mapping
+
+import pytest
+
+from app.api.api_v1.routers import search
+from tests.search.vespa.setup_search_tests import (
+    VESPA_FIXTURE_COUNT,
+    _populate_db_families,
+)
+
+SEARCH_ENDPOINT = "/api/v1/searches"
+
+
+def _make_search_request(client, params: Mapping[str, str]):
+    response = client.post(SEARCH_ENDPOINT, json=params)
+    assert response.status_code == 200, response.text
+    return response.json()
+
+
+@pytest.mark.search
+@pytest.mark.parametrize("label, query", [("search", "the"), ("browse", "")])
+def test_result_order_score(
+    label, query, test_vespa, data_db, monkeypatch, data_client
+):
+    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
+    _populate_db_families(data_db)
+
+    params = {
+        "query_string": query,
+        "sort_field": "date",
+        "sort_order": "asc",
+    }
+    asc_date_body = _make_search_request(data_client, params)
+    asc_dates = [f["family_date"] for f in asc_date_body["families"]]
+
+    params["sort_order"] = "desc"
+    desc_date_body = _make_search_request(data_client, params)
+    desc_dates = [f["family_date"] for f in desc_date_body["families"]]
+
+    assert VESPA_FIXTURE_COUNT == len(asc_dates) == len(desc_dates)
+    assert asc_dates == list(reversed(desc_dates))
+    assert asc_dates[0] < desc_dates[0]
+    assert asc_dates[-1] > desc_dates[-1]
+
+
+@pytest.mark.search
+@pytest.mark.parametrize("label, query", [("search", "the"), ("browse", "")])
+def test_result_order_title(
+    label, query, test_vespa, data_db, monkeypatch, data_client
+):
+    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
+    _populate_db_families(data_db)
+
+    params = {
+        "query_string": query,
+        "sort_field": "title",
+        "sort_order": "asc",
+    }
+
+    # Scope of test is to confirm this does not cause a failure
+    _ = _make_search_request(data_client, params)
diff --git a/tests/search/vespa/test_vespasearch.py b/tests/search/vespa/test_vespasearch.py
index 21031707..cd8f8ddb 100644
--- a/tests/search/vespa/test_vespasearch.py
+++ b/tests/search/vespa/test_vespasearch.py
@@ -434,50 +434,6 @@ def test_multiple_filters(label, query, test_vespa, data_db, monkeypatch, data_c
     _ = _make_search_request(data_client, params)
 
 
-@pytest.mark.search
-@pytest.mark.parametrize("label, query", [("search", "the"), ("browse", "")])
-def test_result_order_score(
-    label, query, test_vespa, data_db, monkeypatch, data_client
-):
-    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
-    _populate_db_families(data_db)
-
-    params = {
-        "query_string": query,
-        "sort_field": "date",
-        "sort_order": "asc",
-    }
-    asc_date_body = _make_search_request(data_client, params)
-    asc_dates = [f["family_date"] for f in asc_date_body["families"]]
-
-    params["sort_order"] = "desc"
-    desc_date_body = _make_search_request(data_client, params)
-    desc_dates = [f["family_date"] for f in desc_date_body["families"]]
-
-    assert VESPA_FIXTURE_COUNT == len(asc_dates) == len(desc_dates)
-    assert asc_dates == list(reversed(desc_dates))
-    assert asc_dates[0] < desc_dates[0]
-    assert asc_dates[-1] > desc_dates[-1]
-
-
-@pytest.mark.search
-@pytest.mark.parametrize("label, query", [("search", "the"), ("browse", "")])
-def test_result_order_title(
-    label, query, test_vespa, data_db, monkeypatch, data_client
-):
-    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
-    _populate_db_families(data_db)
-
-    params = {
-        "query_string": query,
-        "sort_field": "title",
-        "sort_order": "asc",
-    }
-
-    # Scope of test is to confirm this does not cause a failure
-    _ = _make_search_request(data_client, params)
-
-
 @pytest.mark.search
 def test_continuation_token__families(test_vespa, data_db, monkeypatch, data_client):
     monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)

From 6dc4c7f78c997232f08d3d697db4ede324e3115f Mon Sep 17 00:00:00 2001
From: Katy Baulch <46493669+katybaulch@users.noreply.github.com>
Date: Tue, 17 Sep 2024 13:48:14 +0100
Subject: [PATCH 3/5] Move continuation token vespa search tests to separate
 file (#336)

* Move vespa search tests under dedicated vespa folder

* Move /search_fixtures under vespa search folder & rename to fixtures

* Bump to 1.14.20

* Move vespa search result order tests to separate file

* Bump to 1.14.19

* Move vespa search continuation token tests to separate file

* Group pagination and continuation token tests
---
 .../vespa/test_vespa_search_pagination.py     | 163 ++++++++++++++++++
 tests/search/vespa/test_vespasearch.py        | 146 ----------------
 2 files changed, 163 insertions(+), 146 deletions(-)
 create mode 100644 tests/search/vespa/test_vespa_search_pagination.py

diff --git a/tests/search/vespa/test_vespa_search_pagination.py b/tests/search/vespa/test_vespa_search_pagination.py
new file mode 100644
index 00000000..7e9c6664
--- /dev/null
+++ b/tests/search/vespa/test_vespa_search_pagination.py
@@ -0,0 +1,163 @@
+from typing import Mapping
+
+import pytest
+
+from app.api.api_v1.routers import search
+from tests.search.vespa.setup_search_tests import (
+    VESPA_FIXTURE_COUNT,
+    _populate_db_families,
+)
+
+SEARCH_ENDPOINT = "/api/v1/searches"
+
+
+def _make_search_request(client, params: Mapping[str, str]):
+    response = client.post(SEARCH_ENDPOINT, json=params)
+    assert response.status_code == 200, response.text
+    return response.json()
+
+
+@pytest.mark.search
+def test_simple_pagination_families(test_vespa, data_client, data_db, monkeypatch):
+    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
+    _populate_db_families(data_db)
+
+    PAGE_SIZE = 2
+
+    # Query one
+    params = {
+        "query_string": "and",
+        "page_size": PAGE_SIZE,
+        "offset": 0,
+    }
+    body_one = _make_search_request(data_client, params)
+    assert body_one["hits"] == VESPA_FIXTURE_COUNT
+    assert len(body_one["families"]) == PAGE_SIZE
+    assert (
+        body_one["families"][0]["family_slug"]
+        == "agriculture-sector-plan-2015-2019_7999"
+    )
+    assert (
+        body_one["families"][1]["family_slug"]
+        == "national-environment-policy-of-guinea_f0df"
+    )
+
+    # Query two
+    params = {
+        "query_string": "and",
+        "page_size": PAGE_SIZE,
+        "offset": 2,
+    }
+    body_two = _make_search_request(data_client, params)
+    assert body_two["hits"] == VESPA_FIXTURE_COUNT
+    assert len(body_two["families"]) == PAGE_SIZE
+    assert (
+        body_two["families"][0]["family_slug"]
+        == "national-energy-policy-and-energy-action-plan_9262"
+    )
+    assert (
+        body_two["families"][1]["family_slug"]
+        == "submission-to-the-unfccc-ahead-of-the-first-technical-dialogue_e760"
+    )
+
+
+@pytest.mark.search
+def test_continuation_token__families(test_vespa, data_db, monkeypatch, data_client):
+    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
+
+    _populate_db_families(data_db)
+
+    params = {"query_string": "the", "limit": 2, "page_size": 1}
+    response = _make_search_request(data_client, params)
+    continuation = response["continuation_token"]
+    first_family_ids = [f["family_slug"] for f in response["families"]]
+
+    # Confirm we have grabbed a subset of all results
+    assert len(response["families"]) < response["total_family_hits"]
+
+    # Get next results set
+    params = {"query_string": "the", "continuation_tokens": [continuation]}
+    response = _make_search_request(data_client, params)
+    second_family_ids = [f["family_slug"] for f in response["families"]]
+
+    # Confirm we actually got different results
+    assert sorted(first_family_ids) != sorted(second_family_ids)
+
+    # Go back to prev and confirm its what we had initially
+    params = {
+        "query_string": "the",
+        "continuation_tokens": [response["prev_continuation_token"]],
+        "limit": 2,
+        "page_size": 1,
+    }
+    response = _make_search_request(data_client, params)
+    prev_family_ids = [f["family_slug"] for f in response["families"]]
+
+    assert sorted(first_family_ids) == sorted(prev_family_ids)
+
+
+@pytest.mark.search
+def test_continuation_token__passages(test_vespa, data_db, monkeypatch, data_client):
+    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
+
+    _populate_db_families(data_db)
+
+    # Get second set of families
+    params = {
+        "query_string": "the",
+        "document_ids": ["CCLW.executive.10246.4861", "CCLW.executive.4934.1571"],
+        "limit": 1,
+        "page_size": 1,
+    }
+    first_family = _make_search_request(data_client, params)
+    params["continuation_tokens"] = [first_family["continuation_token"]]
+    second_family_first_passages = _make_search_request(data_client, params)
+    second_family_first_passages_ids = [
+        h["text_block_id"]
+        for h in second_family_first_passages["families"][0]["family_documents"][0][
+            "document_passage_matches"
+        ]
+    ]
+
+    # Get next set of passages
+    this_family_continuation = second_family_first_passages["this_continuation_token"]
+    next_passages_continuation = second_family_first_passages["families"][0][
+        "continuation_token"
+    ]
+    params["continuation_tokens"] = [
+        this_family_continuation,
+        next_passages_continuation,
+    ]
+    second_family_second_passages = _make_search_request(data_client, params)
+    second_family_second_passages_ids = [
+        h["text_block_id"]
+        for h in second_family_second_passages["families"][0]["family_documents"][0][
+            "document_passage_matches"
+        ]
+    ]
+
+    # Confirm we actually got different results
+    assert sorted(second_family_first_passages_ids) != sorted(
+        second_family_second_passages_ids
+    )
+
+    # Go to previous set and confirm its the same
+    prev_passages_continuation = second_family_second_passages["families"][0][
+        "prev_continuation_token"
+    ]
+
+    params["continuation_tokens"] = [
+        this_family_continuation,
+        prev_passages_continuation,
+    ]
+    response = _make_search_request(data_client, params)
+    second_family_prev_passages_ids = [
+        h["text_block_id"]
+        for h in response["families"][0]["family_documents"][0][
+            "document_passage_matches"
+        ]
+    ]
+
+    assert sorted(second_family_second_passages_ids) != sorted(
+        second_family_prev_passages_ids
+    )
diff --git a/tests/search/vespa/test_vespasearch.py b/tests/search/vespa/test_vespasearch.py
index cd8f8ddb..4cae6e81 100644
--- a/tests/search/vespa/test_vespasearch.py
+++ b/tests/search/vespa/test_vespasearch.py
@@ -45,50 +45,6 @@ def test_empty_search_term_performs_browse(
     query_spy.assert_called_once()
 
 
-@pytest.mark.search
-def test_simple_pagination_families(test_vespa, data_client, data_db, monkeypatch):
-    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
-    _populate_db_families(data_db)
-
-    PAGE_SIZE = 2
-
-    # Query one
-    params = {
-        "query_string": "and",
-        "page_size": PAGE_SIZE,
-        "offset": 0,
-    }
-    body_one = _make_search_request(data_client, params)
-    assert body_one["hits"] == VESPA_FIXTURE_COUNT
-    assert len(body_one["families"]) == PAGE_SIZE
-    assert (
-        body_one["families"][0]["family_slug"]
-        == "agriculture-sector-plan-2015-2019_7999"
-    )
-    assert (
-        body_one["families"][1]["family_slug"]
-        == "national-environment-policy-of-guinea_f0df"
-    )
-
-    # Query two
-    params = {
-        "query_string": "and",
-        "page_size": PAGE_SIZE,
-        "offset": 2,
-    }
-    body_two = _make_search_request(data_client, params)
-    assert body_two["hits"] == VESPA_FIXTURE_COUNT
-    assert len(body_two["families"]) == PAGE_SIZE
-    assert (
-        body_two["families"][0]["family_slug"]
-        == "national-energy-policy-and-energy-action-plan_9262"
-    )
-    assert (
-        body_two["families"][1]["family_slug"]
-        == "submission-to-the-unfccc-ahead-of-the-first-technical-dialogue_e760"
-    )
-
-
 @pytest.mark.search
 @pytest.mark.parametrize("exact_match", [True, False])
 def test_search_body_valid(exact_match, test_vespa, data_client, data_db, monkeypatch):
@@ -434,108 +390,6 @@ def test_multiple_filters(label, query, test_vespa, data_db, monkeypatch, data_c
     _ = _make_search_request(data_client, params)
 
 
-@pytest.mark.search
-def test_continuation_token__families(test_vespa, data_db, monkeypatch, data_client):
-    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
-
-    _populate_db_families(data_db)
-
-    params = {"query_string": "the", "limit": 2, "page_size": 1}
-    response = _make_search_request(data_client, params)
-    continuation = response["continuation_token"]
-    first_family_ids = [f["family_slug"] for f in response["families"]]
-
-    # Confirm we have grabbed a subset of all results
-    assert len(response["families"]) < response["total_family_hits"]
-
-    # Get next results set
-    params = {"query_string": "the", "continuation_tokens": [continuation]}
-    response = _make_search_request(data_client, params)
-    second_family_ids = [f["family_slug"] for f in response["families"]]
-
-    # Confirm we actually got different results
-    assert sorted(first_family_ids) != sorted(second_family_ids)
-
-    # Go back to prev and confirm its what we had initially
-    params = {
-        "query_string": "the",
-        "continuation_tokens": [response["prev_continuation_token"]],
-        "limit": 2,
-        "page_size": 1,
-    }
-    response = _make_search_request(data_client, params)
-    prev_family_ids = [f["family_slug"] for f in response["families"]]
-
-    assert sorted(first_family_ids) == sorted(prev_family_ids)
-
-
-@pytest.mark.search
-def test_continuation_token__passages(test_vespa, data_db, monkeypatch, data_client):
-    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
-
-    _populate_db_families(data_db)
-
-    # Get second set of families
-    params = {
-        "query_string": "the",
-        "document_ids": ["CCLW.executive.10246.4861", "CCLW.executive.4934.1571"],
-        "limit": 1,
-        "page_size": 1,
-    }
-    first_family = _make_search_request(data_client, params)
-    params["continuation_tokens"] = [first_family["continuation_token"]]
-    second_family_first_passages = _make_search_request(data_client, params)
-    second_family_first_passages_ids = [
-        h["text_block_id"]
-        for h in second_family_first_passages["families"][0]["family_documents"][0][
-            "document_passage_matches"
-        ]
-    ]
-
-    # Get next set of passages
-    this_family_continuation = second_family_first_passages["this_continuation_token"]
-    next_passages_continuation = second_family_first_passages["families"][0][
-        "continuation_token"
-    ]
-    params["continuation_tokens"] = [
-        this_family_continuation,
-        next_passages_continuation,
-    ]
-    second_family_second_passages = _make_search_request(data_client, params)
-    second_family_second_passages_ids = [
-        h["text_block_id"]
-        for h in second_family_second_passages["families"][0]["family_documents"][0][
-            "document_passage_matches"
-        ]
-    ]
-
-    # Confirm we actually got different results
-    assert sorted(second_family_first_passages_ids) != sorted(
-        second_family_second_passages_ids
-    )
-
-    # Go to previous set and confirm its the same
-    prev_passages_continuation = second_family_second_passages["families"][0][
-        "prev_continuation_token"
-    ]
-
-    params["continuation_tokens"] = [
-        this_family_continuation,
-        prev_passages_continuation,
-    ]
-    response = _make_search_request(data_client, params)
-    second_family_prev_passages_ids = [
-        h["text_block_id"]
-        for h in response["families"][0]["family_documents"][0][
-            "document_passage_matches"
-        ]
-    ]
-
-    assert sorted(second_family_second_passages_ids) != sorted(
-        second_family_prev_passages_ids
-    )
-
-
 @pytest.mark.search
 def test_case_insensitivity(test_vespa, data_db, monkeypatch, data_client):
     monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)

From b16284bbf753a1f178d0369f86f53d1c07d4e5bf Mon Sep 17 00:00:00 2001
From: Katy Baulch <46493669+katybaulch@users.noreply.github.com>
Date: Tue, 17 Sep 2024 14:01:33 +0100
Subject: [PATCH 4/5] Move keyword and range vespa search tests into separate
 file (#337)

* Move vespa search tests under dedicated vespa folder

* Move /search_fixtures under vespa search folder & rename to fixtures

* Bump to 1.14.20

* Move vespa search result order tests to separate file

* Bump to 1.14.19

* Move vespa search continuation token tests to separate file

* Move keyword and range vespa search tests into separate file

* Delete test_vespa_search_cont_tokens.py

* Move _make_search_request into vespa search setup
---
 tests/search/vespa/setup_search_tests.py      |   9 +
 .../test_range_and_keyword_filters_search.py  | 182 +++++++++++++++++
 tests/search/vespa/test_vespa_ids_search.py   |  15 +-
 .../vespa/test_vespa_search_pagination.py     |  11 +-
 .../vespa/test_vespa_search_result_order.py   |  11 +-
 tests/search/vespa/test_vespasearch.py        | 185 +-----------------
 6 files changed, 199 insertions(+), 214 deletions(-)
 create mode 100644 tests/search/vespa/test_range_and_keyword_filters_search.py

diff --git a/tests/search/vespa/setup_search_tests.py b/tests/search/vespa/setup_search_tests.py
index a58d6f75..93a308d4 100644
--- a/tests/search/vespa/setup_search_tests.py
+++ b/tests/search/vespa/setup_search_tests.py
@@ -26,6 +26,15 @@
 from db_client.models.organisation.corpus import Corpus, CorpusType, Organisation
 from sqlalchemy.orm import Session
 
+SEARCH_ENDPOINT = "/api/v1/searches"
+
+
+def _make_search_request(client, params: Mapping[str, str]):
+    response = client.post(SEARCH_ENDPOINT, json=params)
+    assert response.status_code == 200, response.text
+    return response.json()
+
+
 VESPA_FIXTURE_COUNT = 5
 FIXTURE_DIR = Path(__file__).parent / "fixtures"
 VESPA_FAMILY_PATH = FIXTURE_DIR / "vespa_family_document.json"
diff --git a/tests/search/vespa/test_range_and_keyword_filters_search.py b/tests/search/vespa/test_range_and_keyword_filters_search.py
new file mode 100644
index 00000000..6bf0520d
--- /dev/null
+++ b/tests/search/vespa/test_range_and_keyword_filters_search.py
@@ -0,0 +1,182 @@
+import pytest
+from db_client.models.dfce import Geography
+
+from app.api.api_v1.routers import search
+from app.core.lookups import get_country_slug_from_country_code
+from tests.search.vespa.setup_search_tests import (
+    SEARCH_ENDPOINT,
+    VESPA_FIXTURE_COUNT,
+    _make_search_request,
+    _populate_db_families,
+)
+
+
+@pytest.mark.search
+@pytest.mark.parametrize("label,query", [("search", "the"), ("browse", "")])
+def test_keyword_country_filters(
+    label, query, test_vespa, data_client, data_db, monkeypatch
+):
+    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
+    _populate_db_families(data_db)
+    base_params = {"query_string": query}
+
+    # Get all documents and iterate over their country codes to confirm that each are
+    # the specific one that is returned in the query (as they each have a unique
+    # country code)
+    all_body = _make_search_request(data_client, params=base_params)
+    families = [f for f in all_body["families"]]
+    assert len(families) == VESPA_FIXTURE_COUNT
+
+    for family in families:
+        country_code = family["family_geography"]
+
+        country_slug = get_country_slug_from_country_code(data_db, country_code)
+
+        params = {**base_params, **{"keyword_filters": {"countries": [country_slug]}}}
+        body_with_filters = _make_search_request(data_client, params=params)
+        filtered_family_slugs = [
+            f["family_slug"] for f in body_with_filters["families"]
+        ]
+        assert len(filtered_family_slugs) == 1
+        assert family["family_slug"] in filtered_family_slugs
+
+
+@pytest.mark.search
+@pytest.mark.parametrize("label,query", [("search", "the"), ("browse", "")])
+def test_keyword_region_filters(
+    label, query, test_vespa, data_client, data_db, monkeypatch
+):
+    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
+    _populate_db_families(data_db)
+    base_params = {"query_string": query}
+
+    # Get regions of all documents and iterate over them
+    # to confirm the originals are returned when filtered on
+    all_body = _make_search_request(data_client, params=base_params)
+    families = [f for f in all_body["families"]]
+    assert len(families) == VESPA_FIXTURE_COUNT
+
+    for family in families:
+        country_code = family["family_geography"]
+
+        # Fixture for UNFCCC.non-party.1267.0 has a non geography (XAA)
+        if country_code == "Other":
+            return
+
+        parent_id = (
+            data_db.query(Geography)
+            .filter(Geography.value == country_code)
+            .first()
+            .parent_id
+        )
+        region = data_db.query(Geography).filter(Geography.id == parent_id).first()
+
+        params = {**base_params, **{"keyword_filters": {"regions": [region.slug]}}}
+        body_with_filters = _make_search_request(data_client, params=params)
+        filtered_family_slugs = [
+            f["family_slug"] for f in body_with_filters["families"]
+        ]
+        assert family["family_slug"] in filtered_family_slugs
+
+
+@pytest.mark.search
+@pytest.mark.parametrize("label,query", [("search", "the"), ("browse", "")])
+def test_keyword_region_and_country_filters(
+    label, query, test_vespa, data_client, data_db, monkeypatch
+):
+    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
+    _populate_db_families(data_db)
+
+    # Filtering on one region and one country should return the one match
+    base_params = {
+        "query_string": query,
+        "keyword_filters": {
+            "regions": ["europe-central-asia"],
+            "countries": ["ITA"],
+        },
+    }
+
+    body = _make_search_request(data_client, params=base_params)
+
+    assert len(body["families"]) == 1
+    assert body["families"][0]["family_name"] == "National Energy Strategy"
+
+
+@pytest.mark.search
+@pytest.mark.parametrize("label,query", [("search", "the"), ("browse", "")])
+def test_invalid_keyword_filters(
+    label, query, test_vespa, data_db, monkeypatch, data_client
+):
+    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
+    _populate_db_families(data_db)
+
+    response = data_client.post(
+        SEARCH_ENDPOINT,
+        json={
+            "query_string": query,
+            "keyword_filters": {
+                "geographies": ["kenya"],
+                "unknown_filter_no1": ["BOOM"],
+            },
+        },
+    )
+    assert response.status_code == 422
+
+
+@pytest.mark.search
+@pytest.mark.parametrize(
+    "year_range", [(None, None), (1900, None), (None, 2020), (1900, 2020)]
+)
+def test_year_range_filtered_in(
+    year_range, test_vespa, data_db, monkeypatch, data_client
+):
+    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
+    _populate_db_families(data_db)
+
+    # Search
+    params = {"query_string": "and", "year_range": year_range}
+    body = _make_search_request(data_client, params=params)
+    assert len(body["families"]) > 0
+
+    # Browse
+    params = {"query_string": "", "year_range": year_range}
+    body = _make_search_request(data_client, params=params)
+    assert len(body["families"]) > 0
+
+
+@pytest.mark.search
+@pytest.mark.parametrize("year_range", [(None, 2010), (2024, None)])
+def test_year_range_filtered_out(
+    year_range, test_vespa, data_db, monkeypatch, data_client
+):
+    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
+    _populate_db_families(data_db)
+
+    # Search
+    params = {"query_string": "and", "year_range": year_range}
+    body = _make_search_request(data_client, params=params)
+    assert len(body["families"]) == 0
+
+    # Browse
+    params = {"query_string": "", "year_range": year_range}
+    body = _make_search_request(data_client, params=params)
+    assert len(body["families"]) == 0
+
+
+@pytest.mark.search
+@pytest.mark.parametrize("label, query", [("search", "the"), ("browse", "")])
+def test_multiple_filters(label, query, test_vespa, data_db, monkeypatch, data_client):
+    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
+    _populate_db_families(data_db)
+
+    params = {
+        "query_string": query,
+        "keyword_filters": {
+            "countries": ["south-korea"],
+            "sources": ["CCLW"],
+            "categories": ["Legislative"],
+        },
+        "year_range": (1900, 2020),
+    }
+
+    _ = _make_search_request(data_client, params)
diff --git a/tests/search/vespa/test_vespa_ids_search.py b/tests/search/vespa/test_vespa_ids_search.py
index 203490da..6f744dbc 100644
--- a/tests/search/vespa/test_vespa_ids_search.py
+++ b/tests/search/vespa/test_vespa_ids_search.py
@@ -1,20 +1,13 @@
-from typing import Mapping
-
 import pytest
 from db_client.models.dfce import Slug
 from db_client.models.dfce.family import FamilyDocument
 from sqlalchemy.orm import Session
 
 from app.api.api_v1.routers import search
-from tests.search.vespa.setup_search_tests import _populate_db_families
-
-SEARCH_ENDPOINT = "/api/v1/searches"
-
-
-def _make_search_request(client, params: Mapping[str, str]):
-    response = client.post(SEARCH_ENDPOINT, json=params)
-    assert response.status_code == 200, response.text
-    return response.json()
+from tests.search.vespa.setup_search_tests import (
+    _make_search_request,
+    _populate_db_families,
+)
 
 
 def _doc_ids_from_response(test_db: Session, response: dict) -> list[str]:
diff --git a/tests/search/vespa/test_vespa_search_pagination.py b/tests/search/vespa/test_vespa_search_pagination.py
index 7e9c6664..04890b28 100644
--- a/tests/search/vespa/test_vespa_search_pagination.py
+++ b/tests/search/vespa/test_vespa_search_pagination.py
@@ -1,21 +1,12 @@
-from typing import Mapping
-
 import pytest
 
 from app.api.api_v1.routers import search
 from tests.search.vespa.setup_search_tests import (
     VESPA_FIXTURE_COUNT,
+    _make_search_request,
     _populate_db_families,
 )
 
-SEARCH_ENDPOINT = "/api/v1/searches"
-
-
-def _make_search_request(client, params: Mapping[str, str]):
-    response = client.post(SEARCH_ENDPOINT, json=params)
-    assert response.status_code == 200, response.text
-    return response.json()
-
 
 @pytest.mark.search
 def test_simple_pagination_families(test_vespa, data_client, data_db, monkeypatch):
diff --git a/tests/search/vespa/test_vespa_search_result_order.py b/tests/search/vespa/test_vespa_search_result_order.py
index b2412604..8e8ad08d 100644
--- a/tests/search/vespa/test_vespa_search_result_order.py
+++ b/tests/search/vespa/test_vespa_search_result_order.py
@@ -1,21 +1,12 @@
-from typing import Mapping
-
 import pytest
 
 from app.api.api_v1.routers import search
 from tests.search.vespa.setup_search_tests import (
     VESPA_FIXTURE_COUNT,
+    _make_search_request,
     _populate_db_families,
 )
 
-SEARCH_ENDPOINT = "/api/v1/searches"
-
-
-def _make_search_request(client, params: Mapping[str, str]):
-    response = client.post(SEARCH_ENDPOINT, json=params)
-    assert response.status_code == 200, response.text
-    return response.json()
-
 
 @pytest.mark.search
 @pytest.mark.parametrize("label, query", [("search", "the"), ("browse", "")])
diff --git a/tests/search/vespa/test_vespasearch.py b/tests/search/vespa/test_vespasearch.py
index 4cae6e81..15e897c0 100644
--- a/tests/search/vespa/test_vespasearch.py
+++ b/tests/search/vespa/test_vespasearch.py
@@ -1,30 +1,20 @@
 import time
-from typing import Mapping
 
 import pytest
-from db_client.models.dfce import Geography
 from db_client.models.dfce.family import FamilyDocument
 from sqlalchemy import update
 
 from app.api.api_v1.routers import search
-from app.core.lookups import get_country_slug_from_country_code
 from tests.search.vespa.setup_search_tests import (
-    VESPA_FIXTURE_COUNT,
+    SEARCH_ENDPOINT,
     _create_document,
     _create_family,
     _create_family_event,
     _create_family_metadata,
+    _make_search_request,
     _populate_db_families,
 )
 
-SEARCH_ENDPOINT = "/api/v1/searches"
-
-
-def _make_search_request(client, params: Mapping[str, str]):
-    response = client.post(SEARCH_ENDPOINT, json=params)
-    assert response.status_code == 200, response.text
-    return response.json()
-
 
 @pytest.mark.search
 def test_empty_search_term_performs_browse(
@@ -219,177 +209,6 @@ def test_search_with_deleted_docs(test_vespa, monkeypatch, data_client, data_db)
     assert len(all_deleted_body["families"]) == 0
 
 
-@pytest.mark.search
-@pytest.mark.parametrize("label,query", [("search", "the"), ("browse", "")])
-def test_keyword_country_filters(
-    label, query, test_vespa, data_client, data_db, monkeypatch
-):
-    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
-    _populate_db_families(data_db)
-    base_params = {"query_string": query}
-
-    # Get all documents and iterate over their country codes to confirm that each are
-    # the specific one that is returned in the query (as they each have a unique
-    # country code)
-    all_body = _make_search_request(data_client, params=base_params)
-    families = [f for f in all_body["families"]]
-    assert len(families) == VESPA_FIXTURE_COUNT
-
-    for family in families:
-        country_code = family["family_geography"]
-
-        country_slug = get_country_slug_from_country_code(data_db, country_code)
-
-        params = {**base_params, **{"keyword_filters": {"countries": [country_slug]}}}
-        body_with_filters = _make_search_request(data_client, params=params)
-        filtered_family_slugs = [
-            f["family_slug"] for f in body_with_filters["families"]
-        ]
-        assert len(filtered_family_slugs) == 1
-        assert family["family_slug"] in filtered_family_slugs
-
-
-@pytest.mark.search
-@pytest.mark.parametrize("label,query", [("search", "the"), ("browse", "")])
-def test_keyword_region_filters(
-    label, query, test_vespa, data_client, data_db, monkeypatch
-):
-    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
-    _populate_db_families(data_db)
-    base_params = {"query_string": query}
-
-    # Get regions of all documents and iterate over them
-    # to confirm the originals are returned when filtered on
-    all_body = _make_search_request(data_client, params=base_params)
-    families = [f for f in all_body["families"]]
-    assert len(families) == VESPA_FIXTURE_COUNT
-
-    for family in families:
-        country_code = family["family_geography"]
-
-        # Fixture for UNFCCC.non-party.1267.0 has a non geography (XAA)
-        if country_code == "Other":
-            return
-
-        parent_id = (
-            data_db.query(Geography)
-            .filter(Geography.value == country_code)
-            .first()
-            .parent_id
-        )
-        region = data_db.query(Geography).filter(Geography.id == parent_id).first()
-
-        params = {**base_params, **{"keyword_filters": {"regions": [region.slug]}}}
-        body_with_filters = _make_search_request(data_client, params=params)
-        filtered_family_slugs = [
-            f["family_slug"] for f in body_with_filters["families"]
-        ]
-        assert family["family_slug"] in filtered_family_slugs
-
-
-@pytest.mark.search
-@pytest.mark.parametrize("label,query", [("search", "the"), ("browse", "")])
-def test_keyword_region_and_country_filters(
-    label, query, test_vespa, data_client, data_db, monkeypatch
-):
-    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
-    _populate_db_families(data_db)
-
-    # Filtering on one region and one country should return the one match
-    base_params = {
-        "query_string": query,
-        "keyword_filters": {
-            "regions": ["europe-central-asia"],
-            "countries": ["ITA"],
-        },
-    }
-
-    body = _make_search_request(data_client, params=base_params)
-
-    assert len(body["families"]) == 1
-    assert body["families"][0]["family_name"] == "National Energy Strategy"
-
-
-@pytest.mark.search
-@pytest.mark.parametrize("label,query", [("search", "the"), ("browse", "")])
-def test_invalid_keyword_filters(
-    label, query, test_vespa, data_db, monkeypatch, data_client
-):
-    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
-    _populate_db_families(data_db)
-
-    response = data_client.post(
-        SEARCH_ENDPOINT,
-        json={
-            "query_string": query,
-            "keyword_filters": {
-                "geographies": ["kenya"],
-                "unknown_filter_no1": ["BOOM"],
-            },
-        },
-    )
-    assert response.status_code == 422
-
-
-@pytest.mark.search
-@pytest.mark.parametrize(
-    "year_range", [(None, None), (1900, None), (None, 2020), (1900, 2020)]
-)
-def test_year_range_filterered_in(
-    year_range, test_vespa, data_db, monkeypatch, data_client
-):
-    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
-    _populate_db_families(data_db)
-
-    # Search
-    params = {"query_string": "and", "year_range": year_range}
-    body = _make_search_request(data_client, params=params)
-    assert len(body["families"]) > 0
-
-    # Browse
-    params = {"query_string": "", "year_range": year_range}
-    body = _make_search_request(data_client, params=params)
-    assert len(body["families"]) > 0
-
-
-@pytest.mark.search
-@pytest.mark.parametrize("year_range", [(None, 2010), (2024, None)])
-def test_year_range_filterered_out(
-    year_range, test_vespa, data_db, monkeypatch, data_client
-):
-    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
-    _populate_db_families(data_db)
-
-    # Search
-    params = {"query_string": "and", "year_range": year_range}
-    body = _make_search_request(data_client, params=params)
-    assert len(body["families"]) == 0
-
-    # Browse
-    params = {"query_string": "", "year_range": year_range}
-    body = _make_search_request(data_client, params=params)
-    assert len(body["families"]) == 0
-
-
-@pytest.mark.search
-@pytest.mark.parametrize("label, query", [("search", "the"), ("browse", "")])
-def test_multiple_filters(label, query, test_vespa, data_db, monkeypatch, data_client):
-    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
-    _populate_db_families(data_db)
-
-    params = {
-        "query_string": query,
-        "keyword_filters": {
-            "countries": ["south-korea"],
-            "sources": ["CCLW"],
-            "categories": ["Legislative"],
-        },
-        "year_range": (1900, 2020),
-    }
-
-    _ = _make_search_request(data_client, params)
-
-
 @pytest.mark.search
 def test_case_insensitivity(test_vespa, data_db, monkeypatch, data_client):
     monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)

From 77a49102117431341f0ef03e8dcad882595a0d83 Mon Sep 17 00:00:00 2001
From: Katy Baulch <46493669+katybaulch@users.noreply.github.com>
Date: Tue, 17 Sep 2024 14:19:42 +0100
Subject: [PATCH 5/5] Move vespa search tests for ignoring special chars & case
 to separate file (#338)

* Move data download tests into parent folder

* Move query insensitivity & special chars ignoring tests out

* Rename from test_vespasearch

* Bump to 1.14.20
---
 pyproject.toml                                |  2 +-
 ....py => test_this_vespa_search_download.py} |  0
 .../search/vespa/test_vespa_query_ignores.py  | 54 +++++++++++++++++++
 ...st_vespasearch.py => test_vespa_search.py} | 47 ----------------
 ...ase.py => test_whole_database_download.py} |  0
 5 files changed, 55 insertions(+), 48 deletions(-)
 rename tests/search/vespa/{data_download/test_this_search.py => test_this_vespa_search_download.py} (100%)
 create mode 100644 tests/search/vespa/test_vespa_query_ignores.py
 rename tests/search/vespa/{test_vespasearch.py => test_vespa_search.py} (81%)
 rename tests/search/vespa/{data_download/test_whole_database.py => test_whole_database_download.py} (100%)

diff --git a/pyproject.toml b/pyproject.toml
index 13a3f60e..11a21afc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "navigator_backend"
-version = "1.14.19"
+version = "1.14.20"
 description = ""
 authors = ["CPR-dev-team <tech@climatepolicyradar.org>"]
 packages = [{ include = "app" }, { include = "tests" }]
diff --git a/tests/search/vespa/data_download/test_this_search.py b/tests/search/vespa/test_this_vespa_search_download.py
similarity index 100%
rename from tests/search/vespa/data_download/test_this_search.py
rename to tests/search/vespa/test_this_vespa_search_download.py
diff --git a/tests/search/vespa/test_vespa_query_ignores.py b/tests/search/vespa/test_vespa_query_ignores.py
new file mode 100644
index 00000000..c7fc9f69
--- /dev/null
+++ b/tests/search/vespa/test_vespa_query_ignores.py
@@ -0,0 +1,54 @@
+import time
+
+import pytest
+
+from app.api.api_v1.routers import search
+from tests.search.vespa.setup_search_tests import (
+    _make_search_request,
+    _populate_db_families,
+)
+
+
+@pytest.mark.search
+def test_case_insensitivity(test_vespa, data_db, monkeypatch, data_client):
+    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
+    _populate_db_families(data_db)
+
+    lower_body = _make_search_request(data_client, {"query_string": "the"})
+    upper_body = _make_search_request(data_client, {"query_string": "THE"})
+
+    assert lower_body["families"] == upper_body["families"]
+
+
+@pytest.mark.search
+def test_punctuation_ignored(test_vespa, data_db, monkeypatch, data_client):
+    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
+    _populate_db_families(data_db)
+
+    regular_body = _make_search_request(data_client, {"query_string": "the"})
+    punc_body = _make_search_request(data_client, {"query_string": ", the."})
+    accent_body = _make_search_request(data_client, {"query_string": "thë"})
+
+    assert (
+        sorted([f["family_slug"] for f in punc_body["families"]])
+        == sorted([f["family_slug"] for f in regular_body["families"]])
+        == sorted([f["family_slug"] for f in accent_body["families"]])
+    )
+
+
+@pytest.mark.search
+def test_accents_ignored(
+    test_vespa,
+    data_db,
+    monkeypatch,
+    data_client,
+):
+    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
+    _populate_db_families(data_db)
+
+    start = time.time()
+    body = _make_search_request(data_client, {"query_string": "the"})
+    end = time.time()
+
+    request_time_ms = 1000 * (end - start)
+    assert 0 < body["query_time_ms"] < body["total_time_ms"] < request_time_ms
diff --git a/tests/search/vespa/test_vespasearch.py b/tests/search/vespa/test_vespa_search.py
similarity index 81%
rename from tests/search/vespa/test_vespasearch.py
rename to tests/search/vespa/test_vespa_search.py
index 15e897c0..e34fbbd1 100644
--- a/tests/search/vespa/test_vespasearch.py
+++ b/tests/search/vespa/test_vespa_search.py
@@ -1,5 +1,3 @@
-import time
-
 import pytest
 from db_client.models.dfce.family import FamilyDocument
 from sqlalchemy import update
@@ -207,48 +205,3 @@ def test_search_with_deleted_docs(test_vespa, monkeypatch, data_client, data_db)
     all_deleted_count = len(all_deleted_body["families"])
     assert start_family_count > one_deleted_count > all_deleted_count
     assert len(all_deleted_body["families"]) == 0
-
-
-@pytest.mark.search
-def test_case_insensitivity(test_vespa, data_db, monkeypatch, data_client):
-    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
-    _populate_db_families(data_db)
-
-    lower_body = _make_search_request(data_client, {"query_string": "the"})
-    upper_body = _make_search_request(data_client, {"query_string": "THE"})
-
-    assert lower_body["families"] == upper_body["families"]
-
-
-@pytest.mark.search
-def test_punctuation_ignored(test_vespa, data_db, monkeypatch, data_client):
-    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
-    _populate_db_families(data_db)
-
-    regular_body = _make_search_request(data_client, {"query_string": "the"})
-    punc_body = _make_search_request(data_client, {"query_string": ", the."})
-    accent_body = _make_search_request(data_client, {"query_string": "thë"})
-
-    assert (
-        sorted([f["family_slug"] for f in punc_body["families"]])
-        == sorted([f["family_slug"] for f in regular_body["families"]])
-        == sorted([f["family_slug"] for f in accent_body["families"]])
-    )
-
-
-@pytest.mark.search
-def test_accents_ignored(
-    test_vespa,
-    data_db,
-    monkeypatch,
-    data_client,
-):
-    monkeypatch.setattr(search, "_VESPA_CONNECTION", test_vespa)
-    _populate_db_families(data_db)
-
-    start = time.time()
-    body = _make_search_request(data_client, {"query_string": "the"})
-    end = time.time()
-
-    request_time_ms = 1000 * (end - start)
-    assert 0 < body["query_time_ms"] < body["total_time_ms"] < request_time_ms
diff --git a/tests/search/vespa/data_download/test_whole_database.py b/tests/search/vespa/test_whole_database_download.py
similarity index 100%
rename from tests/search/vespa/data_download/test_whole_database.py
rename to tests/search/vespa/test_whole_database_download.py