Skip to content

Commit

Permalink
Adding one to vespa passage match pages. (#182)
Browse files Browse the repository at this point in the history
* Adding one to vespa passage match pages.

* Adding explicit test for the response objects.

* Removing redundant test.

---------

Co-authored-by: Mark <[email protected]>
  • Loading branch information
THOR300 and Mark authored Nov 22, 2023
1 parent 80cd88e commit b6425c7
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 1 deletion.
9 changes: 9 additions & 0 deletions app/api/api_v1/schemas/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,15 @@ class SearchResponseDocumentPassage(BaseModel):
text_block_id: str
text_block_page: Optional[int]
text_block_coords: Optional[Sequence[Coord]]

@validator("text_block_page", always=True)
@classmethod
def validate_page(cls, value):
"""PDF page numbers must be incremented from our 0-indexed values."""
if value is None:
return None
return value + 1



class OpenSearchResponseMatchBase(BaseModel):
Expand Down
58 changes: 57 additions & 1 deletion tests/unit/app/schemas/test_schemas.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import pytest

from app.api.api_v1.schemas.document import FamilyDocumentResponse
from app.api.api_v1.schemas.search import SearchResponseFamilyDocument
from app.api.api_v1.schemas.search import (
OpenSearchResponsePassageMatch,
SearchResponseDocumentPassage,
SearchResponseFamilyDocument,
OpenSearchResponseMatchBase,
)

CLIMATE_LAWS_DOMAIN_PATHS = [
"climate-laws.org",
Expand Down Expand Up @@ -98,3 +103,54 @@ def test_non_climate_laws_source_url_left_in_document(source_domain_path, scheme
document_role=None,
)
assert document_response.source_url == given_url


def test_search_responses() -> None:
"""
Test that instantiating Search Response objects is done correctly.
Particularly testing of the validators.
"""
original_block_page = 0

original_block_data = {
"text": "example text",
"text_block_id": "p_0_b_0",
"text_block_page": original_block_page,
"text_block_coords": None,
}

base_response_data = {
"document_name": "Sample Document",
"document_geography": "USA",
"document_description": "This is a sample document description.",
"document_sectors": ["Technology", "Healthcare"],
"document_source": "Sample Source",
"document_id": "sample_import_id_123",
"document_date": "2023-11-22",
"document_type": "PDF",
"document_source_url": "https://example.com/sample_document",
"document_cdn_object": "sample_cdn_object_reference",
"document_category": "Sample Category",
"document_content_type": "application/pdf",
"document_slug": "sample-document",
}

# This is used for vespa responses
default_passage_response = SearchResponseDocumentPassage.parse_obj(
original_block_data
)

assert default_passage_response.text_block_page == original_block_page + 1

response_base = OpenSearchResponseMatchBase.parse_obj(base_response_data)

opensearch_passage_response = OpenSearchResponsePassageMatch(
**response_base.dict(), **original_block_data
)

assert opensearch_passage_response.text_block_page == original_block_page + 1

assert opensearch_passage_response.text_block_page == (
default_passage_response.text_block_page
)

0 comments on commit b6425c7

Please sign in to comment.