From ffe6815a0d221f12d2e97aed75bb036a676109f7 Mon Sep 17 00:00:00 2001 From: Mark Date: Wed, 22 Nov 2023 13:37:18 +0000 Subject: [PATCH 1/3] Adding one to vespa passage match pages. --- app/api/api_v1/schemas/search.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/app/api/api_v1/schemas/search.py b/app/api/api_v1/schemas/search.py index 71602579..9f3ecf72 100644 --- a/app/api/api_v1/schemas/search.py +++ b/app/api/api_v1/schemas/search.py @@ -81,6 +81,15 @@ class SearchResponseDocumentPassage(BaseModel): text_block_id: str text_block_page: Optional[int] text_block_coords: Optional[Sequence[Coord]] + + @validator("text_block_page", always=True) + @classmethod + def validate_page(cls, value): + """PDF page numbers must be incremented from our 0-indexed values.""" + if value is None: + return None + return value + 1 + class OpenSearchResponseMatchBase(BaseModel): From f8ae9dac78098f3c379a2b0792f96876609b5e9a Mon Sep 17 00:00:00 2001 From: Mark Date: Wed, 22 Nov 2023 14:22:14 +0000 Subject: [PATCH 2/3] Adding explicit test for the response objects. --- tests/unit/app/schemas/test_schemas.py | 60 +++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/tests/unit/app/schemas/test_schemas.py b/tests/unit/app/schemas/test_schemas.py index 6b36ab63..99b3ce86 100644 --- a/tests/unit/app/schemas/test_schemas.py +++ b/tests/unit/app/schemas/test_schemas.py @@ -1,7 +1,12 @@ import pytest from app.api.api_v1.schemas.document import FamilyDocumentResponse -from app.api.api_v1.schemas.search import SearchResponseFamilyDocument +from app.api.api_v1.schemas.search import ( + OpenSearchResponsePassageMatch, + SearchResponseDocumentPassage, + SearchResponseFamilyDocument, + OpenSearchResponseMatchBase, +) CLIMATE_LAWS_DOMAIN_PATHS = [ "climate-laws.org", @@ -98,3 +103,56 @@ def test_non_climate_laws_source_url_left_in_document(source_domain_path, scheme document_role=None, ) assert document_response.source_url == given_url + + +def test_search_responses() -> None: + """ + Test that instantiating Search Response objects is done correctly. + + Particularly testing of the validators. + """ + original_block_page = 0 + + original_block_data = { + "text": "example text", + "text_block_id": "p_0_b_0", + "text_block_page": original_block_page, + "text_block_coords": None, + } + + base_response_data = { + "document_name": "Sample Document", + "document_geography": "USA", + "document_description": "This is a sample document description.", + "document_sectors": ["Technology", "Healthcare"], + "document_source": "Sample Source", + "document_id": "sample_import_id_123", + "document_date": "2023-11-22", + "document_type": "PDF", + "document_source_url": "https://example.com/sample_document", + "document_cdn_object": "sample_cdn_object_reference", + "document_category": "Sample Category", + "document_content_type": "application/pdf", + "document_slug": "sample-document", + } + + # This is used for vespa responses + default_passage_response = SearchResponseDocumentPassage.parse_obj( + original_block_data + ) + + assert default_passage_response.text_block_page != original_block_page + assert default_passage_response.text_block_page == original_block_page + 1 + + response_base = OpenSearchResponseMatchBase.parse_obj(base_response_data) + + opensearch_passage_response = OpenSearchResponsePassageMatch( + **response_base.dict(), **original_block_data + ) + + assert opensearch_passage_response.text_block_page != original_block_page + assert opensearch_passage_response.text_block_page == original_block_page + 1 + + assert opensearch_passage_response.text_block_page == ( + default_passage_response.text_block_page + ) From f0cfc06523e0fe5611183e43d4070f9fdc5c1a92 Mon Sep 17 00:00:00 2001 From: Mark Date: Wed, 22 Nov 2023 14:43:43 +0000 Subject: [PATCH 3/3] Removing redundant test. --- tests/unit/app/schemas/test_schemas.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/unit/app/schemas/test_schemas.py b/tests/unit/app/schemas/test_schemas.py index 99b3ce86..4af33c9e 100644 --- a/tests/unit/app/schemas/test_schemas.py +++ b/tests/unit/app/schemas/test_schemas.py @@ -141,7 +141,6 @@ def test_search_responses() -> None: original_block_data ) - assert default_passage_response.text_block_page != original_block_page assert default_passage_response.text_block_page == original_block_page + 1 response_base = OpenSearchResponseMatchBase.parse_obj(base_response_data) @@ -150,7 +149,6 @@ def test_search_responses() -> None: **response_base.dict(), **original_block_data ) - assert opensearch_passage_response.text_block_page != original_block_page assert opensearch_passage_response.text_block_page == original_block_page + 1 assert opensearch_passage_response.text_block_page == (