Skip to content

Commit

Permalink
WIP: initial support for vespa search
Browse files Browse the repository at this point in the history
  • Loading branch information
Joel Wright committed Oct 10, 2023
1 parent 3935aec commit f8c2b7b
Show file tree
Hide file tree
Showing 3 changed files with 1,432 additions and 640 deletions.
30 changes: 21 additions & 9 deletions app/api/api_v1/routers/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from io import BytesIO
from typing import Mapping, Sequence

from cpr_data_access.search_adaptors import VespaSearchAdaptor # type: ignore
from fastapi import APIRouter, Depends, Request
from fastapi.responses import StreamingResponse
from sqlalchemy.orm import Session
Expand Down Expand Up @@ -39,10 +40,12 @@
_OPENSEARCH_INDEX_CONFIG = OpenSearchQueryConfig()
_DOCUMENT_EXTRA_INFO_CACHE = DocumentExtraCache()

_VESPA_CONNECTION = VespaSearchAdaptor()

search_router = APIRouter()


def _search_request(db: Session, search_body: SearchRequestBody) -> SearchResponse:
def _search_request(db: Session, search_body: SearchRequestBody, use_vespa: bool = False) -> SearchResponse:
if search_body.keyword_filters is not None:
search_body.keyword_filters = process_search_keyword_filters(
db,
Expand All @@ -56,19 +59,23 @@ def _search_request(db: Session, search_body: SearchRequestBody) -> SearchRespon
req=_get_browse_args_from_search_request_body(search_body),
)
else:
return _OPENSEARCH_CONNECTION.query_families(
search_request_body=search_body,
opensearch_internal_config=_OPENSEARCH_INDEX_CONFIG,
document_extra_info=_DOCUMENT_EXTRA_INFO_CACHE.get_document_extra_info(db),
preference="default_search_preference",
)
if use_vespa:
return _VESPA_CONNECTION.search(request=search_body)
else:
return _OPENSEARCH_CONNECTION.query_families(
search_request_body=search_body,
opensearch_internal_config=_OPENSEARCH_INDEX_CONFIG,
document_extra_info=_DOCUMENT_EXTRA_INFO_CACHE.get_document_extra_info(db),
preference="default_search_preference",
)


@search_router.post("/searches")
def search_documents(
request: Request,
search_body: SearchRequestBody,
db=Depends(get_db),
use_vespa: bool = False,
) -> SearchResponse:
"""Search for documents matching the search criteria."""
_LOGGER.info(
Expand All @@ -83,14 +90,15 @@ def search_documents(
_LOGGER.info(
"Starting search...",
)
return _search_request(db=db, search_body=search_body)
return _search_request(db=db, search_body=search_body, use_vespa=use_vespa)


@search_router.post("/searches/download-csv")
def download_search_documents(
request: Request,
search_body: SearchRequestBody,
db=Depends(get_db),
use_vespa: bool = False,
) -> StreamingResponse:
"""Download a CSV containing details of documents matching the search criteria."""
_LOGGER.info(
Expand All @@ -110,7 +118,11 @@ def download_search_documents(
_LOGGER.info(
"Starting search...",
)
search_response = _search_request(db=db, search_body=search_body)
search_response = _search_request(
db=db,
search_body=search_body,
use_vespa=use_vespa,
)
content_str = process_result_into_csv(db, search_response, is_browse=is_browse)

_LOGGER.debug(f"Downloading search results as CSV: {content_str}")
Expand Down
Loading

0 comments on commit f8c2b7b

Please sign in to comment.