From 4dfe4b869d2e526a9c5ee3bbdcd584c6909527d3 Mon Sep 17 00:00:00 2001 From: olaughter <51889566+olaughter@users.noreply.github.com> Date: Wed, 7 Feb 2024 10:29:12 +0000 Subject: [PATCH] Remove opensearch code (#220) * Remove opensearch code paths * Remove opensearch dev and test setup * Remove Opensearch documentation references --- .env.example | 28 - .env.local | 1 - .github/workflows/ci.yml | 9 - app/api/api_v1/routers/search.py | 52 +- app/api/api_v1/schemas/search.py | 52 +- app/core/config.py | 53 - app/core/search.py | 748 +------ app/core/sensitive_query_terms.tsv | 2987 ---------------------------- docker-compose.dev.yml | 13 - docker-compose.yml | 46 - docs/api/search.md | 8 +- makefile-docker.defs | 16 +- makefile-local.defs | 6 - poetry.lock | 209 +- pyproject.toml | 2 - tests/conftest.py | 15 - tests/routes/test_search.py | 1632 --------------- 17 files changed, 182 insertions(+), 5695 deletions(-) delete mode 100644 app/core/sensitive_query_terms.tsv delete mode 100644 tests/routes/test_search.py diff --git a/.env.example b/.env.example index aa7a053a..cc001e36 100644 --- a/.env.example +++ b/.env.example @@ -19,34 +19,6 @@ VESPA_SEARCH_LIMIT=150 # Shared search config INDEX_ENCODER_CACHE_FOLDER=/models -# Opensearch connection settings -OPENSEARCH_USER=admin -OPENSEARCH_PASSWORD=admin -OPENSEARCH_URL=http://opensearch-node1:9200 -OPENSEARCH_INDEX_PREFIX=navigator -OPENSEARCH_REQUEST_TIMEOUT=30 -OPENSEARCH_USE_SSL=False -OPENSEARCH_VERIFY_CERTS=False -OPENSEARCH_SSL_WARNINGS=False -OPENSEARCH_INDEX_EMBEDDING_DIM=768 - -# Opensearch query/index settings - optional -# Disabled as not used in deployment. Defaults are set within the application. -# OPENSEARCH_INDEX_INNER_PRODUCT_THRESHOLD=70.0 -# OPENSEARCH_INDEX_MAX_DOC_COUNT=100 -# OPENSEARCH_INDEX_MAX_PASSAGES_PER_DOC=10 -# OPENSEARCH_INDEX_KNN_K_VALUE=10000 -# OPENSEARCH_INDEX_N_PASSAGES_TO_SAMPLE_PER_SHARD=5000 -# OPENSEARCH_INDEX_NAME_BOOST=100 -# OPENSEARCH_INDEX_DESCRIPTION_BOOST=40 -# OPENSEARCH_INDEX_EMBEDDED_TEXT_BOOST=50 -# OPENSEARCH_JIT_MAX_DOC_COUNT=20 -# OPENSEARCH_INDEX_NAME_KEY=for_search_action_name -# OPENSEARCH_INDEX_DESCRIPTION_KEY=for_search_action_description -# OPENSEARCH_INDEX_DESCRIPTION_EMBEDDING_KEY=action_description_embedding -# OPENSEARCH_INDEX_INDEX_KEY=action_name_and_id -# OPENSEARCH_INDEX_TEXT_BLOCK_KEY=text_block_id - # Backend Superuser account information for admin SUPERUSER_EMAIL=user@navigator.com SUPERUSER_PASSWORD=password diff --git a/.env.local b/.env.local index 14c8e51a..fefa08f2 100644 --- a/.env.local +++ b/.env.local @@ -1,7 +1,6 @@ # api host API_HOST=http://localhost:8888 DATABASE_URL=postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@localhost:5432/${POSTGRES_USER} -OPENSEARCH_URL=http://localhost:9200 # Frontend NEXT_PUBLIC_API_URL=http://localhost:8000/api/v1 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index eeede64a..7c8c664a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -65,15 +65,6 @@ jobs: - name: Run backend search tests for vespa run: make test_search - - name: Run backend search tests for opensearch - run: make test_opensearch - - - name: Browse Benchmark opensearch - response times in ms - run: cat benchmark_browse.txt - - - name: Search Benchmark opensearch - response times in ms - run: cat benchmark_search.txt - - name: Log Dump if: always() run: docker-compose logs diff --git a/app/api/api_v1/routers/search.py b/app/api/api_v1/routers/search.py index c5aedc49..833535a6 100644 --- a/app/api/api_v1/routers/search.py +++ b/app/api/api_v1/routers/search.py @@ -34,24 +34,14 @@ from app.core.search import ( ENCODER, FilterField, - OpenSearchConfig, - OpenSearchConnection, - OpenSearchQueryConfig, create_vespa_search_params, process_result_into_csv, process_vespa_search_response, ) -from app.db.crud.document import DocumentExtraCache from app.db.session import get_db _LOGGER = logging.getLogger(__name__) -# Use configured environment for router -_OPENSEARCH_CONFIG = OpenSearchConfig() -_OPENSEARCH_CONNECTION = OpenSearchConnection(opensearch_config=_OPENSEARCH_CONFIG) -_OPENSEARCH_INDEX_CONFIG = OpenSearchQueryConfig() -_DOCUMENT_EXTRA_INFO_CACHE = DocumentExtraCache() - _VESPA_CONNECTION = VespaSearchAdapter( instance_url=VESPA_URL, cert_directory=VESPA_SECRETS_LOCATION, @@ -78,32 +68,22 @@ def _search_request( req=_get_browse_args_from_search_request_body(search_body), ) else: - if use_vespa: - data_access_search_params = create_vespa_search_params(db, search_body) - # TODO: we may wish to cache responses to improve pagination performance - try: - data_access_search_response = _VESPA_CONNECTION.search( - parameters=data_access_search_params - ) - except QueryError: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid Query" - ) - return process_vespa_search_response( - db, - data_access_search_response, - limit=search_body.limit, - offset=search_body.offset, - ).increment_pages() - else: - return _OPENSEARCH_CONNECTION.query_families( - search_request_body=search_body, - opensearch_internal_config=_OPENSEARCH_INDEX_CONFIG, - document_extra_info=_DOCUMENT_EXTRA_INFO_CACHE.get_document_extra_info( - db - ), - preference="default_search_preference", - ).increment_pages() + data_access_search_params = create_vespa_search_params(db, search_body) + # TODO: we may wish to cache responses to improve pagination performance + try: + data_access_search_response = _VESPA_CONNECTION.search( + parameters=data_access_search_params + ) + except QueryError: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid Query" + ) + return process_vespa_search_response( + db, + data_access_search_response, + limit=search_body.limit, + offset=search_body.offset, + ).increment_pages() @search_router.post("/searches") diff --git a/app/api/api_v1/schemas/search.py b/app/api/api_v1/schemas/search.py index 10d1c498..73c05080 100644 --- a/app/api/api_v1/schemas/search.py +++ b/app/api/api_v1/schemas/search.py @@ -12,28 +12,21 @@ class SortOrder(str, Enum): - """Sort ordering for use building OpenSearch query body.""" + """Sort ordering for use building query body.""" ASCENDING = "asc" DESCENDING = "desc" class SortField(str, Enum): - """Sort field for use building OpenSearch query body.""" + """Sort field for use building query body.""" DATE = "date" TITLE = "title" -class JitQuery(str, Enum): - """Flag used for determining if a jit query is to be used.""" - - ENABLED = "enabled" - DISABLED = "disabled" - - class FilterField(str, Enum): - """Filter field for use building OpenSearch query body.""" + """Filter field for use building query body.""" SOURCE = "sources" COUNTRY = "countries" @@ -84,45 +77,6 @@ class SearchResponseDocumentPassage(BaseModel): text_block_coords: Optional[Sequence[Coord]] = None -class OpenSearchResponseMatchBase(BaseModel): - """Describes matches returned by an OpenSearch query""" - - document_name: str - document_geography: str - document_description: str - document_sectors: Sequence[str] - document_source: str - document_id: str # Changed semantics to be import_id, not database id - document_date: str - document_type: str - document_source_url: Optional[str] = None - document_cdn_object: Optional[str] = None - document_category: str - document_content_type: Optional[str] = None - document_slug: str - - -class OpenSearchResponseNameMatch(OpenSearchResponseMatchBase): - """Describes matches returned by OpenSearch on Document name.""" - - for_search_document_name: str - - -class OpenSearchResponseDescriptionMatch(OpenSearchResponseMatchBase): - """Describes matches returned by OpenSearch on Document description.""" - - for_search_document_description: str - - -class OpenSearchResponsePassageMatch(OpenSearchResponseMatchBase): - """Describes matches returned by OpenSearch on Document passage.""" - - text: str - text_block_id: str - text_block_page: Optional[int] = None - text_block_coords: Optional[Sequence[Coord]] = None - - class SearchResponseFamilyDocument(BaseModel): """A single document in a search response.""" diff --git a/app/core/config.py b/app/core/config.py index 58b4ff30..b2de3277 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -9,59 +9,6 @@ PUBLIC_APP_URL = os.environ["PUBLIC_APP_URL"].rstrip("/") API_V1_STR = "/api/v1" -# OpenSearch Config -OPENSEARCH_URL = os.environ["OPENSEARCH_URL"] -OPENSEARCH_USERNAME = os.environ["OPENSEARCH_USER"] -OPENSEARCH_PASSWORD = os.environ["OPENSEARCH_PASSWORD"] -OPENSEARCH_INDEX_PREFIX = os.environ["OPENSEARCH_INDEX_PREFIX"] -OPENSEARCH_REQUEST_TIMEOUT: int = int(os.getenv("OPENSEARCH_REQUEST_TIMEOUT", "30")) -OPENSEARCH_USE_SSL: bool = os.getenv("OPENSEARCH_USE_SSL", "False").lower() == "true" -OPENSEARCH_VERIFY_CERTS: bool = ( - os.getenv("OPENSEARCH_VERIFY_CERTS", "False").lower() == "true" -) -OPENSEARCH_SSL_WARNINGS: bool = ( - os.getenv("OPENSEARCH_SSL_WARNINGS", "False").lower() == "true" -) -# OpenSearch Index Config -OPENSEARCH_INDEX_INNER_PRODUCT_THRESHOLD: float = float( - os.getenv("OPENSEARCH_INDEX_INNER_PRODUCT_THRESHOLD", "70.0") -) -OPENSEARCH_INDEX_MAX_DOC_COUNT: int = int( - os.getenv("OPENSEARCH_INDEX_MAX_DOC_COUNT", "140") -) -OPENSEARCH_INDEX_MAX_PASSAGES_PER_DOC: int = int( - os.getenv("OPENSEARCH_INDEX_MAX_PASSAGES_PER_DOC", "10") -) -OPENSEARCH_INDEX_KNN_K_VALUE = int(os.getenv("OPENSEARCH_INDEX_KNN_K_VALUE", "10000")) -OPENSEARCH_INDEX_N_PASSAGES_TO_SAMPLE_PER_SHARD: int = int( - os.getenv("OPENSEARCH_INDEX_N_PASSAGES_TO_SAMPLE_PER_SHARD", "5000") -) -OPENSEARCH_INDEX_NAME_BOOST: int = int(os.getenv("OPENSEARCH_INDEX_NAME_BOOST", "100")) -OPENSEARCH_INDEX_DESCRIPTION_BOOST: int = int( - os.getenv("OPENSEARCH_INDEX_DESCRIPTION_BOOST", "40") -) - -OPENSEARCH_INDEX_EMBEDDED_TEXT_BOOST: int = int( - os.getenv("OPENSEARCH_INDEX_EMBEDDED_TEXT_BOOOST", "50") -) - -OPENSEARCH_INDEX_NAME_KEY: str = os.getenv( - "OPENSEARCH_INDEX_NAME_KEY", "for_search_document_name" -) -OPENSEARCH_INDEX_DESCRIPTION_KEY: str = os.getenv( - "OPENSEARCH_INDEX_DESCRIPTION_KEY", "for_search_document_description" -) -OPENSEARCH_INDEX_DESCRIPTION_EMBEDDING_KEY: str = os.getenv( - "OPENSEARCH_INDEX_DESCRIPTION_EMBEDDING_KEY", "document_description_embedding" -) -OPENSEARCH_INDEX_INDEX_KEY: str = os.getenv( - "OPENSEARCH_INDEX_INDEX_KEY", "document_name_and_slug" -) -OPENSEARCH_INDEX_TEXT_BLOCK_KEY: str = os.getenv( - "OPENSEARCH_INDEX_TEXT_BLOCK_KEY", "text_block_id" -) -OPENSEARCH_JIT_MAX_DOC_COUNT: int = int(os.getenv("OPENSEARCH_JIT_MAX_DOC_COUNT", "20")) - # Vespa Config VESPA_SEARCH_LIMIT: int = int(os.getenv("VESPA_SEARCH_LIMIT", "100")) VESPA_SEARCH_MATCHES_PER_DOC: int = int( diff --git a/app/core/search.py b/app/core/search.py index 240d7509..c3aea6b5 100644 --- a/app/core/search.py +++ b/app/core/search.py @@ -1,13 +1,8 @@ import csv import itertools -import json import logging -import string -import time -from collections import OrderedDict, defaultdict -from dataclasses import dataclass +from collections import defaultdict from io import StringIO -from pathlib import Path from typing import Any, Mapping, Optional, Sequence, cast from cpr_data_access.embedding import Embedder @@ -17,17 +12,10 @@ from cpr_data_access.models.search import SearchParameters as DataAccessSearchParams from cpr_data_access.models.search import SearchResponse as DataAccessSearchResponse from cpr_data_access.models.search import filter_fields -from opensearchpy import JSONSerializer as jss -from opensearchpy import OpenSearch from sqlalchemy.orm import Session from app.api.api_v1.schemas.search import ( FilterField, - IncludedResults, - OpenSearchResponseDescriptionMatch, - OpenSearchResponseMatchBase, - OpenSearchResponseNameMatch, - OpenSearchResponsePassageMatch, SearchRequestBody, SearchResponse, SearchResponseDocumentPassage, @@ -38,28 +26,6 @@ ) from app.core.config import ( INDEX_ENCODER_CACHE_FOLDER, - OPENSEARCH_INDEX_DESCRIPTION_BOOST, - OPENSEARCH_INDEX_DESCRIPTION_EMBEDDING_KEY, - OPENSEARCH_INDEX_DESCRIPTION_KEY, - OPENSEARCH_INDEX_EMBEDDED_TEXT_BOOST, - OPENSEARCH_INDEX_INDEX_KEY, - OPENSEARCH_INDEX_INNER_PRODUCT_THRESHOLD, - OPENSEARCH_INDEX_KNN_K_VALUE, - OPENSEARCH_INDEX_MAX_DOC_COUNT, - OPENSEARCH_INDEX_MAX_PASSAGES_PER_DOC, - OPENSEARCH_INDEX_N_PASSAGES_TO_SAMPLE_PER_SHARD, - OPENSEARCH_INDEX_NAME_BOOST, - OPENSEARCH_INDEX_NAME_KEY, - OPENSEARCH_INDEX_PREFIX, - OPENSEARCH_INDEX_TEXT_BLOCK_KEY, - OPENSEARCH_JIT_MAX_DOC_COUNT, - OPENSEARCH_PASSWORD, - OPENSEARCH_REQUEST_TIMEOUT, - OPENSEARCH_SSL_WARNINGS, - OPENSEARCH_URL, - OPENSEARCH_USE_SSL, - OPENSEARCH_USERNAME, - OPENSEARCH_VERIFY_CERTS, PUBLIC_APP_URL, VESPA_SEARCH_LIMIT, VESPA_SEARCH_MATCHES_PER_DOC, @@ -82,21 +48,6 @@ ENCODER = Embedder(cache_folder=INDEX_ENCODER_CACHE_FOLDER) -# Map a sort field type to the document key used by OpenSearch -_SORT_FIELD_MAP: Mapping[SortField, str] = { - SortField.DATE: "document_date", - SortField.TITLE: "document_name", -} -# TODO: Map a filter field type to the document key used by OpenSearch -_FILTER_FIELD_MAP: Mapping[FilterField, str] = { - FilterField.SOURCE: "document_source", - FilterField.COUNTRY: "document_geography", - FilterField.CATEGORY: "document_category", - FilterField.LANGUAGE: "document_language", -} -_REQUIRED_FIELDS = ["document_name"] -_DEFAULT_SORT_ORDER = SortOrder.DESCENDING -_JSON_SERIALIZER = jss() _CSV_SEARCH_RESPONSE_COLUMNS = [ "Collection Name", "Collection Summary", @@ -116,703 +67,6 @@ ] -def _innerproduct_threshold_to_lucene_threshold(ip_thresh: float) -> float: - """ - Map inner product to lucene threashold. - - Opensearch documentation on mapping similarity functions to Lucene thresholds is - here: https://github.com/opensearch-project/k-NN/blob/main/src/main/java/org/opensearch/knn/index/SpaceType.java#L33 - - It defines 'inner product' as negative inner product i.e. a distance rather than - similarity measure, so we reverse the signs of inner product here compared to the - docs. - """ # noqa: E501 - if ip_thresh > 0: - return ip_thresh + 1 - else: - return 1 / (1 - ip_thresh) - - -def load_sensitive_query_terms() -> set[str]: - """ - Return sensitive query terms from the first column of a TSV file. - - Outputs are lowercased for case-insensitive matching. - - :return [set[str]]: sensitive query terms - """ - tsv_path = Path(__file__).parent / "sensitive_query_terms.tsv" - with open(tsv_path, "r") as tsv_file: - reader = csv.reader(tsv_file, delimiter="\t") - - # first column is group name, second column is keyword - sensitive_terms = set([row[1].lower().strip() for row in reader]) - - return sensitive_terms - - -@dataclass(frozen=True) -class OpenSearchQueryConfig: - """Configuration for searches sent to OpenSearch.""" - - name_boost: int = OPENSEARCH_INDEX_NAME_BOOST - description_boost: int = OPENSEARCH_INDEX_DESCRIPTION_BOOST - embedded_text_boost: int = OPENSEARCH_INDEX_EMBEDDED_TEXT_BOOST - lucene_threshold: float = _innerproduct_threshold_to_lucene_threshold( - OPENSEARCH_INDEX_INNER_PRODUCT_THRESHOLD - ) # TODO: tune me separately for descriptions? - max_doc_count: int = OPENSEARCH_INDEX_MAX_DOC_COUNT - max_passages_per_doc: int = OPENSEARCH_INDEX_MAX_PASSAGES_PER_DOC - n_passages_to_sample_per_shard: int = ( - OPENSEARCH_INDEX_N_PASSAGES_TO_SAMPLE_PER_SHARD # noqa - ) - k = OPENSEARCH_INDEX_KNN_K_VALUE - jit_max_doc_count: int = OPENSEARCH_JIT_MAX_DOC_COUNT - - -@dataclass -class OpenSearchConfig: - """Config for accessing an OpenSearch instance.""" - - url: str = OPENSEARCH_URL - username: str = OPENSEARCH_USERNAME - password: str = OPENSEARCH_PASSWORD - index_prefix: str = OPENSEARCH_INDEX_PREFIX - request_timeout: int = OPENSEARCH_REQUEST_TIMEOUT - use_ssl: bool = OPENSEARCH_USE_SSL - verify_certs: bool = OPENSEARCH_VERIFY_CERTS - ssl_show_warnings: bool = OPENSEARCH_SSL_WARNINGS - - -@dataclass -class OpenSearchResponse: - """Opensearch response container.""" - - raw_response: Mapping[str, Any] - request_time_ms: int - - -class OpenSearchEncoder(json.JSONEncoder): - """Special json encoder for OpenSearch types""" - - def default(self, obj): - """Override""" - return _JSON_SERIALIZER.default(obj) - - -class OpenSearchConnection: - """OpenSearch connection helper, allows query based on config.""" - - def __init__( - self, - opensearch_config: OpenSearchConfig, - ): - self._opensearch_config = opensearch_config - self._opensearch_connection: Optional[OpenSearch] = None - self._sensitive_query_terms = load_sensitive_query_terms() - - def query_families( - self, - search_request_body: SearchRequestBody, - opensearch_internal_config: OpenSearchQueryConfig, - document_extra_info: Mapping[str, Mapping[str, str]], - preference: Optional[str], - ) -> SearchResponse: - """Build & make an OpenSearch query based on the given request body.""" - - t0 = time.perf_counter_ns() - opensearch_request = build_opensearch_request_body( - search_request=search_request_body, - opensearch_internal_config=opensearch_internal_config, - sensitive_query_terms=self._sensitive_query_terms, - ) - - indices = self._get_opensearch_indices_to_query(search_request_body) - - opensearch_response_body = self.raw_query( - opensearch_request.query, preference, indices - ) - - return process_search_response_body_families( - t0, - opensearch_response_body, - document_extra_info, - limit=search_request_body.limit, - offset=search_request_body.offset, - ) - - def _get_opensearch_indices_to_query( - self, search_request: SearchRequestBody - ) -> str: - """ - Get the OpenSearch indices to query based on the request body. - - :param [SearchRequestBody] search_request: The search request body. - :return [str]: a comma-separated string of indices. - """ - - # By default we just query the index containing names and descriptions, - # and the non-translated PDFs - indices_include = [ - f"{self._opensearch_config.index_prefix}_core", - f"{self._opensearch_config.index_prefix}_pdfs_non_translated", - ] - - if search_request.include_results is None: - return ",".join(indices_include) - - if IncludedResults.PDFS_TRANSLATED in search_request.include_results: - indices_include.append( - f"{self._opensearch_config.index_prefix}_pdfs_translated" - ) - - if IncludedResults.HTMLS_TRANSLATED in search_request.include_results: - indices_include.append( - f"{self._opensearch_config.index_prefix}_htmls_translated" - ) - - if IncludedResults.HTMLS_NON_TRANSLATED in search_request.include_results: - indices_include.append( - f"{self._opensearch_config.index_prefix}_htmls_non_translated" - ) - - return ",".join(indices_include) - - def raw_query( - self, - request_body: Mapping[str, Any], - preference: Optional[str], - indices: str, - ) -> OpenSearchResponse: - """Query the configured OpenSearch instance with a JSON OpenSearch body.""" - - if self._opensearch_connection is None: - login_details = ( - self._opensearch_config.username, - self._opensearch_config.password, - ) - self._opensearch_connection = OpenSearch( - [self._opensearch_config.url], - http_auth=login_details, - use_ssl=self._opensearch_config.use_ssl, - veriy_certs=self._opensearch_config.verify_certs, - ssl_show_warn=self._opensearch_config.ssl_show_warnings, - ) - - start = time.time_ns() - response = self._opensearch_connection.search( - body=request_body, - index=indices, - request_timeout=self._opensearch_config.request_timeout, - preference=preference, - ) - end = time.time_ns() - search_request_time = round((end - start) / 1e6) - - _LOGGER.info( - "Search request completed", - extra={ - "props": { - "search_request": json.dumps(request_body, cls=OpenSearchEncoder), - "search_request_time": search_request_time, - }, - }, - ) - - return OpenSearchResponse( - raw_response=response, - request_time_ms=search_request_time, - ) - - -def _year_range_filter( - year_range: tuple[Optional[int], Optional[int]] -) -> Optional[dict[str, Any]]: - """ - Get an Opensearch filter for year range. - - The filter returned is between the first term of `year_range` and the last term, - and is inclusive. Either value can be set to None to only apply one year constraint. - """ - - policy_year_conditions = {} - if year_range[0] is not None: - policy_year_conditions["gte"] = f"01/01/{year_range[0]}" - if year_range[1] is not None: - policy_year_conditions["lte"] = f"31/12/{year_range[1]}" - - if policy_year_conditions: - return {"range": {"document_date": policy_year_conditions}} - - return None - - -class QueryBuilder: - """Helper class for building OpenSearch queries.""" - - def __init__(self, config: OpenSearchQueryConfig): - self._config = config - self._request_body: dict[str, Any] = {} - - @property - def query(self) -> Mapping[str, Any]: - """Property to allow access to the build request body.""" - - return self._request_body - - def _with_search_term_base(self): - self._request_body = { - "size": 0, # only return aggregations - "query": { - "bool": { - "should": [], - "minimum_should_match": 1, - }, - }, - "aggs": { - "sample": { - "sampler": { - "shard_size": self._config.n_passages_to_sample_per_shard - }, - "aggs": { - "top_docs": { - "terms": { - "field": OPENSEARCH_INDEX_INDEX_KEY, - "order": {"top_hit": _DEFAULT_SORT_ORDER.value}, - "size": self._config.max_doc_count, - }, - "aggs": { - "top_passage_hits": { - "top_hits": { - "_source": { - "excludes": [ - "text_embedding", - OPENSEARCH_INDEX_DESCRIPTION_EMBEDDING_KEY, # noqa: E501 - ] - }, - "size": self._config.max_passages_per_doc, - }, - }, - "top_hit": {"max": {"script": {"source": "_score"}}}, - _SORT_FIELD_MAP[SortField.DATE]: { - "stats": { - "field": _SORT_FIELD_MAP[SortField.DATE], - }, - }, - }, - }, - }, - }, - "no_unique_docs": {"cardinality": {"field": "document_slug"}}, - }, - } - - def with_semantic_query(self, query_string: str, knn: bool): - """Configure the query to search semantically for a given query string.""" - - _LOGGER.info(f"Starting embeddings generation for '{query_string}'") - start_generation = time.time_ns() - embedding = ENCODER.embed( - query_string, - normalize=False, - show_progress_bar=False, - ) - end_generation = time.time_ns() - embeddings_generation_time = round((end_generation - start_generation) / 1e6) - _LOGGER.info( - f"Completed embeddings generation for '{query_string}'", - extra={ - "props": { - "embeddings_generation_time": embeddings_generation_time, - }, - }, - ) - - self._with_search_term_base() - self._request_body["query"]["bool"]["should"] = [ - { - "bool": { - "should": [ - { - "match": { - OPENSEARCH_INDEX_NAME_KEY: { - "query": query_string, - "operator": "and", - "minimum_should_match": "2<66%", - # all terms if there are 2 or less, otherwise - # 66% of terms (rounded down) - } - } - }, - { - "match_phrase": { - OPENSEARCH_INDEX_NAME_KEY: { - "query": query_string, - "boost": 2, # TODO: configure? - } - } - }, - ], - "boost": self._config.name_boost, - } - }, - { - "bool": { - "should": [ - { - "match": { - OPENSEARCH_INDEX_DESCRIPTION_KEY: { - "query": query_string, - "boost": 3, - "operator": "and", - "minimum_should_match": "2<66%", - # all terms if there are 2 or less, otherwise - # 66% of terms (rounded down) - } - } - }, - ], - "minimum_should_match": 1, - "boost": self._config.description_boost, - }, - }, - { - "bool": { - "should": [ - { - "match": { - "text": { - "query": query_string, - "operator": "and", - "minimum_should_match": "2<66%", - # all terms if there are 2 or less, otherwise - # 66% of terms (rounded down) - }, - } - }, - ], - "minimum_should_match": 1, - "boost": self._config.embedded_text_boost, - } - }, - ] - - if knn: - self._request_body["query"]["bool"]["should"][1]["bool"]["should"].append( - { - "function_score": { - "query": { - "knn": { - OPENSEARCH_INDEX_DESCRIPTION_EMBEDDING_KEY: { - "vector": embedding, - "k": self._config.k, - }, - }, - }, - "min_score": self._config.lucene_threshold, - } - } - ) - - self._request_body["query"]["bool"]["should"][2]["bool"]["should"].append( - { - "function_score": { - "query": { - "knn": { - "text_embedding": { - "vector": embedding, - "k": self._config.k, - }, - }, - }, - "min_score": self._config.lucene_threshold, - } - } - ) - - def with_exact_query(self, query_string: str): - """Configure the query to search for an exact match to a given query string.""" - - self._with_search_term_base() - self._request_body["query"]["bool"]["should"] = [ - # Document title matching - { - "match_phrase": { - OPENSEARCH_INDEX_NAME_KEY: { - "query": query_string, - "boost": self._config.name_boost, - }, - } - }, - # Document description matching - { - "match_phrase": { - OPENSEARCH_INDEX_DESCRIPTION_KEY: { - "query": query_string, - "boost": self._config.description_boost, - }, - } - }, - # Text passage matching - { - "match_phrase": { - "text": { - "query": query_string, - }, - } - }, - ] - - def with_keyword_filter(self, field: FilterField, values: Sequence[str]): - """Add a keyword filter to the configured query.""" - filters = self._request_body["query"]["bool"].get("filter") or [] - - filters.append({"terms": {_FILTER_FIELD_MAP[field]: values}}) - self._request_body["query"]["bool"]["filter"] = filters - - def with_year_range_filter(self, year_range: tuple[Optional[int], Optional[int]]): - """Add a year range filter to the configured query.""" - - year_range_filter = _year_range_filter(year_range) - if year_range_filter is not None: - filters = self._request_body["query"]["bool"].get("filter") or [] - filters.append(year_range_filter) - self._request_body["query"]["bool"]["filter"] = filters - - def with_search_order(self, field: SortField, order: SortOrder): - """Set sort order for search results.""" - terms_field = self._request_body["aggs"]["sample"]["aggs"]["top_docs"]["terms"] - - if field == SortField.DATE: - terms_field["order"] = {f"{_SORT_FIELD_MAP[field]}.avg": order.value} - elif field == SortField.TITLE: - terms_field["order"] = {"_key": order.value} - else: - raise RuntimeError(f"Unknown sort ordering field: {field}") - - def with_required_fields(self, required_fields: Sequence[str]): - """Ensure that required fields are present in opensearch responses.""" - must_clause = self._request_body["query"]["bool"].get("must") or [] - must_clause.extend( - [{"exists": {"field": field_name}} for field_name in required_fields] - ) - self._request_body["query"]["bool"]["must"] = must_clause - - -def build_opensearch_request_body( - search_request: SearchRequestBody, - opensearch_internal_config: Optional[OpenSearchQueryConfig] = None, - sensitive_query_terms: set[str] = set(), -) -> QueryBuilder: - """Build a complete OpenSearch request body.""" - - search_config = opensearch_internal_config or OpenSearchQueryConfig( - max_passages_per_doc=search_request.max_passages_per_doc, - ) - builder = QueryBuilder(search_config) - - # Strip punctuation and leading and trailing whitespace from query string - search_request.query_string = search_request.query_string.translate( - str.maketrans("", "", string.punctuation) - ).strip() - - if search_request.exact_match: - builder.with_exact_query(search_request.query_string) - else: - sensitive_terms_in_query = [ - term - for term in sensitive_query_terms - if term in search_request.query_string.lower() - ] - - # If the query contains any sensitive terms, and the length of the - # shortest sensitive term is >=50% of the length of the query by - # number of words, then disable KNN - if ( - sensitive_terms_in_query - and len(min(sensitive_terms_in_query, key=len).split(" ")) - / len(search_request.query_string.split(" ")) - >= 0.5 - ): - use_knn = False - else: - use_knn = True - - builder.with_semantic_query(search_request.query_string, knn=use_knn) - - if search_request.sort_field is not None: - builder.with_search_order( - search_request.sort_field, - search_request.sort_order or _DEFAULT_SORT_ORDER, - ) - - if _REQUIRED_FIELDS: - builder.with_required_fields(_REQUIRED_FIELDS) - - if search_request.keyword_filters is not None: - for keyword, values in search_request.keyword_filters.items(): - builder.with_keyword_filter(keyword, values) - - if search_request.year_range is not None: - builder.with_year_range_filter(search_request.year_range) - - return builder - - -def process_search_response_body_families( - t0: float, - opensearch_response_body: OpenSearchResponse, - document_extra_info: Mapping[str, Mapping[str, str]], - limit: int = 10, - offset: int = 0, -) -> SearchResponse: - search_json_response = opensearch_response_body.raw_response - search_response_document = None - search_response_family = None - unknown_document_ids = set() - - # Aggregate into families using OrderedDict to preserve the response relevance order - families: OrderedDict[str, SearchResponseFamily] = OrderedDict() - - result_docs = search_json_response["aggregations"]["sample"]["top_docs"]["buckets"] - for result_doc in result_docs: - title_match = False - description_match = False - for document_match in result_doc["top_passage_hits"]["hits"]["hits"]: - document_match_source = document_match["_source"] - document_id = document_match_source["document_id"] - # Skip documents that do not exist in RDS or are not Published - if document_id not in document_extra_info: - unknown_document_ids.add(document_match_source["document_id"]) - continue - - # Skip documents whose family is not set to Publshed - family_status = document_extra_info[document_id]["family_status"] - if family_status != "Published": - continue - - if OPENSEARCH_INDEX_NAME_KEY in document_match_source: - # Validate as a title match - doc_match = OpenSearchResponseNameMatch(**document_match_source) - if search_response_document is None: - search_response_document = create_search_response_family_document( - doc_match, - document_extra_info, - ) - title_match = True - elif OPENSEARCH_INDEX_DESCRIPTION_KEY in document_match_source: - # Validate as a description match - doc_match = OpenSearchResponseDescriptionMatch(**document_match_source) - if search_response_document is None: - search_response_document = create_search_response_family_document( - doc_match, - document_extra_info, - ) - description_match = True - elif OPENSEARCH_INDEX_TEXT_BLOCK_KEY in document_match_source: - # Process as a text block - doc_match = OpenSearchResponsePassageMatch(**document_match_source) - if search_response_document is None: - search_response_document = create_search_response_family_document( - doc_match, - document_extra_info, - ) - response_passage = SearchResponseDocumentPassage( - text=doc_match.text, - text_block_id=doc_match.text_block_id, - text_block_page=doc_match.text_block_page, - text_block_coords=doc_match.text_block_coords, - ) - search_response_document.document_passage_matches.append( - response_passage - ) - else: - _LOGGER.error("Unexpected data in match results") - continue - - family_id = document_extra_info[doc_match.document_id]["family_import_id"] - - search_response_family = families.get(family_id) - if search_response_family is None and family_status == "Published": - search_response_family = create_search_response_family( - doc_match, - document_extra_info, - ) - families[family_id] = search_response_family - - if search_response_document is None or search_response_family is None: - _LOGGER.error( - "Unexpected or unpublished document encountered, " - "not attempting to include in results" - ) - else: - search_response_family.family_title_match = ( - title_match or search_response_family.family_title_match - ) - search_response_family.family_description_match = ( - description_match or search_response_family.family_description_match - ) - search_response_family.family_documents.append(search_response_document) - - search_response_document = None - search_response_family = None - - if unknown_document_ids: - _LOGGER.error( - "Unknown document IDs were encountered in Opensearch response", - extra={"props": {"unknown document IDs": list(unknown_document_ids)}}, - ) - - time_taken = int((time.perf_counter_ns() - t0) / 1e6) - search_response = SearchResponse( - hits=len(families), - query_time_ms=opensearch_response_body.request_time_ms, - total_time_ms=time_taken, - families=list(families.values())[offset : offset + limit], - ) - - return search_response - - -def create_search_response_family_document( - opensearch_match: OpenSearchResponseMatchBase, - document_family_info: Mapping[str, Mapping[str, str]], -) -> SearchResponseFamilyDocument: - document_info = document_family_info[opensearch_match.document_id] - return SearchResponseFamilyDocument( - document_title=document_info["title"], - document_type=opensearch_match.document_type, - document_source_url=opensearch_match.document_source_url, - document_url=to_cdn_url(opensearch_match.document_cdn_object), - document_content_type=opensearch_match.document_content_type, - document_slug=document_info["slug"], - document_passage_matches=[], - ) - - -def create_search_response_family( - opensearch_match: OpenSearchResponseMatchBase, - document_family_info: Mapping[str, Mapping[str, str]], -) -> SearchResponseFamily: - document_info = document_family_info[opensearch_match.document_id] - return SearchResponseFamily( - family_slug=document_info["family_slug"], - family_name=document_info["family_title"], - family_description=document_info["family_description"], - family_category=document_info["family_category"], - family_date=document_info["family_published_date"], - family_last_updated_date=document_info["family_last_updated_date"], - family_source=opensearch_match.document_source, - family_geography=opensearch_match.document_geography, - family_title_match=False, - family_description_match=False, - # TODO: Remove unused fields below? - # ↓ Stuff we don't currently use for search ↓ - family_metadata={}, - family_documents=[], - ) - - def _get_extra_csv_info( db: Session, families: Sequence[SearchResponseFamily], diff --git a/app/core/sensitive_query_terms.tsv b/app/core/sensitive_query_terms.tsv deleted file mode 100644 index e4c26a64..00000000 --- a/app/core/sensitive_query_terms.tsv +++ /dev/null @@ -1,2987 +0,0 @@ -sex/gender transgender -sex/gender transgender man -sex/gender transgender woman -sex/gender non-binary -sex/gender non binary -sex/gender nonbinary -sex/gender bigender -sex/gender genderfluid -sex/gender agender -sex/gender eunuch -sex/gender transgender men -sex/gender transgender women -sex/gender men -sex/gender women -sex/gender man -sex/gender woman -sex/gender boy -sex/gender girl -nationality Abazins -nationality Abenaki -nationality Abitibiwinni First Nation -nationality Abkhazians -nationality Aboriginal Australians -nationality Abu Alkian -nationality Abydonian -nationality Acadians -nationality Aceh -nationality Acehnese -nationality Achakzai -nationality Acholi -nationality Achomi -nationality Achuar -nationality Acjachemen -nationality Acoma Pueblo tribe -nationality Adi -nationality Adivasi -nationality Adnyamathanha -nationality Aedui -nationality Afar -nationality Afghans -nationality Africa -nationality African -nationality African Americans -nationality African Brazilian -nationality African Haitian -nationality African Jamaican -nationality Afrikaners -nationality Afro Argentines -nationality Afro Caribbeans -nationality Afro Dominicans -nationality Afro Latin Americans -nationality Afro Mexicans -nationality Afro Venezuelans -nationality Afro-Barbadian -nationality Afro-Colombians -nationality Afro-Costa Rican -nationality Afro-Cuban -nationality Afro-Descendant -nationality Afro-Germans -nationality Afro-Nicaraguan -nationality Afro-Peruvian -nationality Afro-Russian -nationality Afro-Spaniard -nationality Afro-Uruguayan -nationality Ag Qoyunlu tribe -nationality Aguaruna -nationality Ahom -nationality Aiel -nationality Aimaq -nationality Ainu -nationality Akan -nationality Akan -nationality Akha -nationality Akwamu -nationality Al Bhed -nationality Alaska Natives -nationality Alaskan Athabaskans -nationality Albanians -nationality Aleut -nationality Algerians -nationality Algonquian -nationality Algonquin -nationality Algonquin -nationality Altai -nationality Alutiiq -nationality Alyawarre -nationality Amazigh -nationality Amazons -nationality American -nationality American Japanese -nationality American Japanese -nationality American Jews -nationality American-born Chinese -nationality Americans -nationality Americans -nationality Amhara -nationality Ami -nationality Amish -nationality Ammon -nationality Amorites -nationality Anatolians -nationality Ancient -nationality Ancient Celts -nationality Ancient Macedonians -nationality Ancient Pueblo -nationality Andalusian -nationality Andorian -nationality Andorrans -nationality Angles -nationality Anglo-Celtic Australians -nationality Anglo-Indian -nationality Anglo-Irish -nationality Anglo-Normans -nationality Anglo-Saxons -nationality Anishinaabe -nationality Anlo Ewe -nationality Anmatyerre -nationality Antes -nationality Antigua -nationality Antiochian Greeks -nationality Apache -nationality Apalachicola -nationality Aquitani -nationality Arab Americans -nationality Arab Canadians -nationality Arab Christians -nationality Arab citizens of Israel -nationality Arab Venezuelan -nationality Arab-Berber -nationality Arabized Berber -nationality Arabs -nationality Arain -nationality Arameans -nationality Arameans -nationality Arapaho -nationality Argentine Americans -nationality Argentines -nationality Arikara -nationality Armenian American -nationality Armenian Argentine -nationality Armenian Australian -nationality Armenian Canadians -nationality Armenians -nationality Aromanians -nationality Arrernte -nationality Asante -nationality Ashanti -nationality Ashkenazi Jews -nationality Asian -nationality Asian Americans -nationality Asian Brazilians -nationality Asian Canadians -nationality Asian New Zealanders -nationality Asmat -nationality Assamese -nationality Assiniboine -nationality Assyrian -nationality Astrakhan Tatars -nationality Asutsuare -nationality Atayal -nationality Atikamekw -nationality Augment -nationality Australia -nationality Australian American -nationality Australian Jews -nationality Australians -nationality Austrian Americans -nationality Austrian Empire -nationality Austrians -nationality Avars -nationality Awori tribe -nationality Axanar -nationality Aymara -nationality Azerbaijanis -nationality Aztecs -nationality Baga -nationality Bagale Thapa -nationality Baganda -nationality Bagobo -nationality Bahamians -nationality Bahawalpur State -nationality Bahun -nationality Bai -nationality Bailgu -nationality Bainuk -nationality Bajau -nationality Bajoran -nationality Baker Lake -nationality Bakhtiari -nationality Bakoena -nationality Balanta -nationality Balete -nationality Balinese -nationality Baloch -nationality Baloch of Iran -nationality Baltic Germans -nationality Balts -nationality Bamar -nationality Bambara -nationality Bamileke -nationality Bamun -nationality Banat Swabians -nationality Bangladeshis -nationality Bangwaketse -nationality Bani Assad -nationality Banjar -nationality Bannock -nationality Bantenese -nationality Bantu -nationality Banu Khazraj -nationality Banyankole -nationality Barbadian American -nationality Barbadians -nationality Bardings -nationality Bari -nationality Barngarla -nationality Basa -nationality Bashkir -nationality Basoga -nationality Basque -nationality Basque American -nationality Basque Colombians -nationality Bassa -nationality Batak -nationality Bedouin -nationality Beja -nationality Belarusians -nationality Belgian American -nationality Belgian Canadians -nationality Belgian Malinois -nationality Belgians -nationality Belgo-Moroccans -nationality Belizean Americans -nationality Belizean Creole -nationality Bemba -nationality Benadiri -nationality Bene Israel -nationality Benga -nationality Bengali -nationality Bengali Hindus -nationality Bengali Muslims -nationality Beornings -nationality Beothuk -nationality Berbers -nationality Bessarabian Bulgarians -nationality Bessi -nationality Beta Israel -nationality Betawi -nationality Betazoid -nationality Bhil -nationality Biblical Hittites -nationality Bibulman -nationality Bicolano -nationality Bisaya -nationality Bitterroot Salish -nationality Black -nationality Black Africans -nationality Black British -nationality Black Canadians -nationality Black Seminoles -nationality Blackfeet Nation -nationality Blackfoot Confederacy -nationality Blasians -nationality Bohemian -nationality Boholano -nationality Bolivians -nationality Bora -nationality Bosniaks -nationality Bosnians -nationality Botocudo -nationality Bouyei -nationality Brahmin -nationality Brahui -nationality Brancos -nationality Brazilian Americans -nationality Brazilian Canadians -nationality Brazilians -nationality Brazilian -nationality Bretons -nationality British -nationality British Americans -nationality British Armenians -nationality British Chinese -nationality British Indian -nationality British Iraqis -nationality British Jamaican -nationality British Jews -nationality British Nigerian -nationality British Pakistanis -nationality British Sri Lankans -nationality Brushed Ceramics Culture -nationality Bubi -nationality Buenos Aires Province -nationality Bugis -nationality Bukharan Jews -nationality Bulgarian Canadians -nationality Bulgarians -nationality Bulgars -nationality Bundjalung -nationality Bundjalung -nationality Bungandidj -nationality Burarra -nationality Burgenland Croats -nationality Burgundy -nationality Burmese Americans -nationality Buryats -nationality Butchulla -nationality Byzantine Greeks -nationality Caddo -nationality Cadusii -nationality Cagot -nationality Cahuilla -nationality Cajun -nationality Calabar -nationality Cambeba -nationality Cambodian Americans -nationality Canada -nationality Canadian Americans -nationality Canadians -nationality Canadian -nationality Canarian Americans -nationality Candoshi -nationality Cantonese -nationality Cantonese -nationality Capiznon -nationality Cappadocian Greeks -nationality Cardassian -nationality Caribbean -nationality Carinthian Slovenes -nationality Carolinian -nationality Carrier -nationality Cashibo -nationality Castilians -nationality Castizo -nationality Catalan -nationality Catawba -nationality Caucasian -nationality Cayuga -nationality Cebuano -nationality Celtic Britons -nationality Celts -nationality Chaga -nationality Chagossians -nationality Cham -nationality Chamar -nationality Chameloid -nationality Chamorro -nationality Chamorro -nationality Chechens -nationality Chemehuevi -nationality Cherokee -nationality Cherusci -nationality Cheyenne River Sioux Tribe -nationality Cheyennes -nationality Chhetri -nationality Chickasaw -nationality Chilcotin -nationality Chilean American -nationality Chilean Canadians -nationality Chileans -nationality Chin -nationality Chinantec -nationality Chinese -nationality Chinese -nationality Chinese Americans -nationality Chinese Australians -nationality Chinese Cambodian -nationality Chinese Canadians -nationality Chinese Indonesians -nationality Chinese New Zealander -nationality Chinese Singaporeans -nationality Chinese Surinamese -nationality Chinese Tatars -nationality Chinese Venezuelan -nationality Chinois -nationality Chiricahua -nationality Chitpavan -nationality Chochenyo -nationality Choctaw -nationality Choctaw Nation of Oklahoma -nationality Choros -nationality Chukchi -nationality Chuvash -nationality Ciguayos -nationality Circassians -nationality Coast Salish -nationality Cochiti -nationality Coharie -nationality Colombian Americans -nationality Colombians -nationality Coloured -nationality Colville tribe -nationality Comanche -nationality Converso -nationality Cook Islands Maori -nationality Coos -nationality Copts -nationality Cora -nationality Cornish -nationality Corsicans -nationality Costa Ricans -nationality Coushatta -nationality Cowlitz -nationality Cree -nationality Creole -nationality Creoles -nationality Crimean Karaites -nationality Crimean Tatar -nationality Crimean Tatars -nationality Croatian Americans -nationality Croatian Australian -nationality Croatian Canadians -nationality Croats -nationality Crow Nation -nationality Cuban -nationality Cuban American -nationality Cubans -nationality Cushitic -nationality Czech Americans -nationality Czech Brazilians -nationality Czech Canadians -nationality Czech-Brazilians -nationality Czechs -nationality Czech -nationality Dacians -nationality Dagbamba -nationality Dai -nationality Dakota -nationality Dalit -nationality Dalmatae -nationality Danes -nationality Danish Americans -nationality Danu -nationality Danube Swabians -nationality Dargwa -nationality Darod -nationality Daur -nationality Dayak -nationality Dene -nationality Denobulan -nationality Desano -nationality Devangar -nationality Dhuwal -nationality Di -nationality Diffa Arabs -nationality Dinka -nationality Dioula -nationality Djaru -nationality Dogrib -nationality Doliones -nationality Dom -nationality Dominican American -nationality Dominicans -nationality Don Cossacks -nationality Dorians -nationality Dothraki -nationality Doukhobors -nationality Dravida -nationality Druze -nationality Duala -nationality Dunlendings -nationality Dusun -nationality Dutch -nationality Dutch American -nationality Dzungar -nationality Easterlings -nationality Eastern Europeans -nationality Ebira -nationality Edain -nationality Edo -nationality Efik -nationality Egba -nationality Egyptian Americans -nationality Egyptian Arabic -nationality Egyptian Canadians -nationality Egyptians -nationality Egyptian -nationality Ekiti -nationality Eloi -nationality Emiratis -nationality Emishi -nationality England -nationality English -nationality English American -nationality English Australian -nationality English Canadians -nationality English River First Nation -nationality Eritreans -nationality Erzya -nationality Esan -nationality Eskimo -nationality Esselen -nationality Estonians -nationality Estonian -nationality Ethiopians -nationality Ethiopian -nationality Etruscans -nationality Eurasian -nationality European -nationality European Americans -nationality European Canadians -nationality European New Zealanders -nationality Evenks -nationality Ewe -nationality Ewe -nationality Ewondo -nationality Eyeish -nationality Ezhava -nationality Falathrim -nationality Falmari -nationality Fang -nationality Fante -nationality Faroese -nationality Ferengi -nationality Feylis -nationality Fijians -nationality Filipino -nationality Filipinos -nationality Filipino Americans -nationality Filipino Canadians -nationality Finnish Americans -nationality Finnish Kale -nationality Finnish Ukrainians -nationality Finns -nationality Fipa -nationality Fire Nation -nationality First Nations -nationality Flemings -nationality Fon -nationality Franco-Albertan -nationality Franco-Manitoban -nationality Franco-Mauritian -nationality Franco-Ontarian -nationality Franks -nationality Fremen -nationality French -nationality French Americans -nationality French Brazilian -nationality French Canadians -nationality French Trotter -nationality French-speaking Quebecer -nationality Frisians -nationality Fulani -nationality Fulbe -nationality Funj -nationality Fur -nationality Ga -nationality Ga-Adangbe -nationality Gaels -nationality Gagauz -nationality Galicians -nationality Gallaeci -nationality Gamo -nationality Garhajis -nationality Gauls -nationality Geats -nationality Gedeo -nationality Georgians -nationality Gepids -nationality German Americans -nationality German Brazilians -nationality German Canadians -nationality German diaspora -nationality German Texan -nationality German-Russians -nationality Germanic -nationality Germans -nationality German -nationality Germans of Romania -nationality Germans of Serbia -nationality Germany -nationality Gerudo -nationality Gezawa -nationality Ghanaian American -nationality Ghanaians -nationality Ghanaian -nationality Ghilzai -nationality Ghorbati -nationality Gibraltarian -nationality Gija -nationality Gisu -nationality Gitxsan -nationality Godala -nationality Gonja -nationality Gonja -nationality Gooniyandi -nationality Gorals -nationality Goths -nationality Gotlander -nationality Greek American -nationality Greek Australian -nationality Greek Canadians -nationality Greek Cypriots -nationality Greeks -nationality Greek -nationality Greenlandic Inuit -nationality Greenskins -nationality Grenadians -nationality Groningers -nationality Gros Ventre -nationality Guanches -nationality Gujarati -nationality Gujaratis -nationality Gujarati Americans -nationality Gullah -nationality Gumbaynggirr -nationality Guna -nationality Gunai -nationality Gunditjmara -nationality Gunwinggu -nationality Gurage -nationality Gurbeti -nationality Gurene -nationality Gurindji -nationality Gurjar -nationality Gurung -nationality Gushiegu -nationality Guugu Yimithirr -nationality Guyanese -nationality Gwong -nationality Habr Awal -nationality Hadiya -nationality Haida -nationality Haida -nationality Haisla -nationality Haitian -nationality Haitians -nationality Haitian American -nationality Haitians -nationality Hakka -nationality Han Chinese -nationality Hani -nationality Hapa -nationality Harakmbut -nationality Hashemites -nationality Haudenosaunee Confederacy -nationality Hausa -nationality Hausa -nationality Hayato -nationality Hazaras -nationality Hebrews -nationality Heiltsuk -nationality Hellenes -nationality Helvetii -nationality Herero -nationality Hesquiaht First Nation -nationality Hevelli -nationality Hidatsa -nationality Hiligaynon -nationality Hindustani ethnics -nationality Hispanic -nationality Hittites -nationality Hivite -nationality Hlai -nationality Hmong -nationality Hmong American -nationality Ho -nationality Ho-Chunk -nationality Hoa -nationality Hobbit -nationality Hoklo -nationality Hopi -nationality Houara -nationality House of Hador -nationality House of Haleth -nationality Hualapai -nationality Huaxia -nationality Huaxia -nationality Huguenot -nationality Hui -nationality Huilliche -nationality Hungarian -nationality Hungarian Americans -nationality Hungarians -nationality Hunkpapa -nationality Huns -nationality Huron -nationality Hutsuls -nationality Hutu -nationality Iapyges -nationality Iberians -nationality Ibibio -nationality Icelanders -nationality Icelandic Americans -nationality Icelandic Canadians -nationality Idoma -nationality Igbo -nationality Igbo -nationality Igorot -nationality Ijaw -nationality Ijebu -nationality Illyrians -nationality Ilocano -nationality Imperial Germans -nationality Inari Sami -nationality Inca -nationality Indian Americans -nationality Indian Australian -nationality Indian Mexicans -nationality Indian New Zealander -nationality Indian Singaporeans -nationality Indian South Africans -nationality Indians -nationality Indigenous Australians -nationality Indo Caribbeans -nationality Indo-Aryan -nationality Indo-Canadians -nationality Indo-Iranians -nationality Indo-Surinamese -nationality Indonesian Americans -nationality Indonesians -nationality Ingush -nationality Innu -nationality Inuit -nationality Inupiat -nationality Inuvialuit -nationality Iowa -nationality Iran -nationality Iranian -nationality Iranian American -nationality Iranian Arabs -nationality Iranian Armenians -nationality Iranian Azerbaijanis -nationality Iranian Georgians -nationality Iranian Turkmen -nationality Iranians -nationality Iraqi Americans -nationality Iraqi Canadians -nationality Iraqi Turkmens -nationality Iraqis -nationality Irish -nationality Irish Americans -nationality Irish Argentine -nationality Irish Australian -nationality Irish Canadians -nationality Irish New Zealanders -nationality Irish Quebecers -nationality Isleta -nationality Isoko -nationality Israel -nationality Israeli Jews -nationality Israelis -nationality Israelites -nationality Issei -nationality Italian Americans -nationality Italian Argentines -nationality Italian Brazilian -nationality Italian Brazilians -nationality Italian Canadians -nationality Italian Jews -nationality Italian New Zealanders -nationality Italian Scots -nationality Italians -nationality Italians of Romania -nationality Italic -nationality Italy -nationality Itsekiri -nationality Ivatan -nationality Iwaidja -nationality Ixil -nationality Izumo zoku -nationality Jamaica -nationality Jamaican American -nationality Jamaican Maroons -nationality Jamaican -nationality Jamaicans -nationality Japanese -nationality Japanese American -nationality Japanese Australian -nationality Japanese Brazilians -nationality Japanese Canadians -nationality Japanese Venezuelan -nationality Jaredites -nationality Javanese -nationality Jemez Puebloans -nationality Jew -nationality Jews -nationality Jewish -nationality Jicarilla Apache -nationality Jingpo -nationality Joketsuzoku -nationality Judaism -nationality Juif -nationality Jukun -nationality Jurchen -nationality Kabarday -nationality Kabyle -nationality Kadazan-Dusun -nationality Kadu -nationality Kafficho -nationality Kahlan -nationality Kaiadilt -nationality Kaingang -nationality Kakwa -nationality Kalaallit -nationality Kalapuya -nationality Kalbelia -nationality Kalderash -nationality Kale -nationality Kalenjin -nationality Kalinago -nationality Kalmyks -nationality Kam -nationality Kamba -nationality Kamchadals -nationality Kami -nationality Kamilaroi -nationality Kamoro -nationality Kanak -nationality Kankalis -nationality Kannada -nationality Kanuri -nationality Kapampangan -nationality Kaqchikel -nationality Karakalpaks -nationality Karay-a -nationality Karen -nationality Karenni -nationality Karo -nationality Karuk -nationality Kashmiri -nationality Kashmiri Pandit -nationality Kashubians -nationality Kaskaskia -nationality Kaurna -nationality Kayan -nationality Kayapo -nationality Kayastha -nationality Kazakhs -nationality Kelantanese Malay -nationality Kenite -nationality Kenyan Americans -nationality Keres -nationality Kewa Pueblo -nationality Khanty -nationality Khas -nationality Khasas -nationality Khazars -nationality Khitan -nationality Khmer -nationality Khond -nationality Khorchin Mongols -nationality Kickapoo -nationality Kikuyu -nationality Kilamiut -nationality Kildin Saami -nationality Kiowa -nationality Kipchaks -nationality Kisii -nationality Kist -nationality Kitigan Zibi Anishinabeg -nationality Klemantan -nationality Kluane First Nation -nationality Koibal -nationality Kokama -nationality Kokang -nationality Kombe -nationality Komi -nationality Kongo -nationality Kongu Vellalar -nationality Konkomba -nationality Korea -nationality Korean American -nationality Korean Australian -nationality Korean Brazilian -nationality Korean Canadians -nationality Korean -nationality Koreans -nationality Koryo-saram -nationality Kpando -nationality Krahn -nationality Krobo -nationality Kryptonian -nationality Kshatriya -nationality Kubu -nationality Kukatja -nationality Kumawat -nationality Kumawu -nationality Kumeyaay -nationality Kumyks -nationality Kurdish -nationality Kurdish American -nationality Kurds -nationality Kushwaha -nationality Kusunda -nationality Kven -nationality Kyrgyz -nationality Lac Seul First Nation -nationality Ladin -nationality Laguna -nationality Lahu -nationality Lake Babine Nation -nationality Lakota -nationality Langha -nationality Langi -nationality Lao -nationality Latin Americans -nationality Latino -nationality Latvian American -nationality Latvians -nationality Laz -nationality Lebanese -nationality Lebanese American -nationality Lebanese Australian -nationality Lebanese Brazilians -nationality Lebanese Canadians -nationality Lebanese Colombian -nationality Lebanese Venezuelan -nationality Lechites -nationality Lemkos -nationality Lenape -nationality Lenca -nationality Lezgian -nationality Liberian American -nationality Libu -nationality Ligures -nationality Limbu -nationality Lipan Apache -nationality Lipka Tatars -nationality Lisu -nationality Lithuanian American -nationality Lithuanian Jews -nationality Lithuanian -nationality Lithuanians -nationality Liu -nationality Lombards -nationality Lotud -nationality Louisiana Creole -nationality Lovari -nationality Lower Brule Sioux Tribe -nationality Luba -nationality Lucenses -nationality Lucumi -nationality Lugbara -nationality Luguru -nationality Luhya -nationality Lumbee -nationality Lummi -nationality Lunda -nationality Luo -nationality Luritja -nationality Lurs -nationality Lutici -nationality Luxembourgers -nationality Maasai -nationality Macedonians -nationality Macorix -nationality Macushi -nationality Madurese -nationality Magadha -nationality Magars -nationality Maghrebi Jews -nationality Maidu -nationality Malawian -nationality Malawians -nationality Malay Singaporeans -nationality Malayali -nationality Malays -nationality Malaysian -nationality Malaysian Americans -nationality Malaysian Chinese -nationality Malaysian Indian -nationality Malaysian Malays -nationality Maltese -nationality Maltese American -nationality Mam -nationality Mamprusi -nationality Manchu -nationality Mandaeans -nationality Mandalorians -nationality Mandan -nationality Mandinka -nationality Manganiar -nationality Mangbetu -nationality Mangue -nationality Manobo -nationality Mapuches -nationality Maranao -nationality Marathi -nationality Marehan -nationality Mari -nationality Maropa -nationality Marsi -nationality Marwari -nationality Mashpee Wampanoag Tribe -nationality Masmuda -nationality Massachusett -nationality Masurians -nationality Mauri -nationality Maya -nationality Maya civilization -nationality Mbundu -nationality Mdewakanton -nationality Medes -nationality Mediterranean race -nationality Meghwal -nationality Meherrin -nationality Meitei -nationality Meknes -nationality Melanesians -nationality Memon -nationality Mende -nationality Mennonites -nationality Menominee -nationality Meriam -nationality Meru -nationality Mescalero -nationality Meskhetian Turks -nationality Meskwaki -nationality Mestizo Colombian -nationality Mexican American -nationality Mexicans -nationality Mhallami -nationality Miami -nationality Miao -nationality Miccosukee -nationality Micronesians -nationality Midian -nationality Midianites -nationality Miji -nationality Mijikenda -nationality Milagro-Quevedo culture -nationality Miluk -nationality Minangkabau -nationality Miniconjou -nationality Minnesota Chippewa Tribe -nationality Miskitos -nationality Mission Indians -nationality Mississaugas -nationality Missouria -nationality Miwok -nationality Mixed -nationality Mixtec -nationality Mizrahi Jews -nationality Moab -nationality Modh -nationality Modoc -nationality Mohawk -nationality Mohegan -nationality Mokshas -nationality Moldovans -nationality Moluccans -nationality Mon -nationality Mongo -nationality Mongolian American -nationality Mongolic -nationality Mongols -nationality Monguor -nationality Monpa -nationality Montenegrins -nationality Montserrat -nationality Moors -nationality Moors -nationality Moravians -nationality Mordvins -nationality Morisco -nationality Moroccan Dutch -nationality Moroccan Jews -nationality Moroccans -nationality Mosuo -nationality Mountain Mari -nationality Mozarab -nationality Mpongwe -nationality Muhajir -nationality Multiracial American -nationality Munchkin -nationality Munduruku -nationality Munsee -nationality Mununjali clan -nationality Muong -nationality Murri -nationality Murut -nationality Muruwari -nationality Muscogee -nationality Muslim -nationality Muslims -nationality Musulamii -nationality Muthi Muthi -nationality Myene -nationality Myrmidons -nationality Nabataeans -nationality Naga -nationality Nahuas -nationality Naimans -nationality Nair -nationality Nakhi -nationality Nakota -nationality Nanai -nationality Nanda tribe -nationality Nandor -nationality Narentines -nationality Narungga -nationality Naskapi -nationality Native Hawaiians -nationality Navajo -nationality Nchumbulu -nationality Ndyuka -nationality Negev Bedouin -nationality Negrito -nationality Neimoidians -nationality Nenets -nationality Nepalese Americans -nationality Nephites -nationality New Christian -nationality New Zealand American -nationality New Zealanders -nationality Newar -nationality Nez Perce -nationality Ngadjuri -nationality Ngaiawang -nationality Ngarigo -nationality Ngarinjin -nationality Ngarluma -nationality Ngbaka -nationality Ngunnawal -nationality Ni-Vanuatu -nationality Nibblonians -nationality Nicaraguans -nationality Nigerian -nationality Nigerian American -nationality Nigerian Canadians -nationality Nigerians -nationality Nipissing First Nation -nationality Noldor -nationality Noongar -nationality Normans -nationality Norsemen -nationality North Germanic -nationality North Korean -nationality North Koreans -nationality Northern Cheyenne Tribe -nationality Northern Irish -nationality Northern Ndebele -nationality Norwegian Americans -nationality Norwegian -nationality Norwegians -nationality Norzai -nationality Novgorod Slavs -nationality Nsawkaw -nationality Nuba -nationality Nubian -nationality Nuer -nationality Nung Rawang -nationality Nuristani -nationality Nyakyusa -nationality Nyamwezi -nationality Nyanga -nationality Nzakara -nationality Obotrites -nationality Ocampa -nationality Occaneechi -nationality Occitans -nationality Odawa -nationality Odia -nationality Ogan -nationality Oghuz Turks -nationality Oglala Lakota -nationality Oglala Sioux Tribe -nationality Ogoni -nationality Ohkay Owingeh -nationality Ohkay Owingeh -nationality Ohlone -nationality Oirats -nationality Oji-Cree -nationality Ojibwe -nationality Okanagan -nationality Okinawan -nationality Old Order Amish -nationality Old Prussians -nationality Omaha -nationality Oneida -nationality Onondaga -nationality Orang Asli -nationality Organian -nationality Orion -nationality Oromo -nationality Osage Nation -nationality Ossetians -nationality Ostrogoths -nationality Otoe tribe -nationality Otomi -nationality Ottoman Greeks -nationality Ottoman Turks -nationality Ovambo -nationality Ovimbundu -nationality Ozbek -nationality Pacific Islanders -nationality Paez -nationality Paiute -nationality Paiwan -nationality Pakistani American -nationality Pakistanis -nationality Palaung -nationality Palembangese -nationality Palestinian American -nationality Palestinian Jews -nationality Palestinian -nationality Palestinians -nationality Pamiri -nationality Pamunkey -nationality Panamanian American -nationality Panamanians -nationality Pangasinan -nationality Pannonians -nationality Papel -nationality Papuan -nationality Pardo Brazilians -nationality Pare -nationality Parsi -nationality Parthians -nationality Pashayi -nationality Pashtuns -nationality Patamona -nationality Patawomeck -nationality Pawnee -nationality Pazeh -nationality Pemon -nationality Pennsylvania Dutch -nationality Peoria tribe -nationality Peranakan -nationality Persians -nationality Peru American -nationality Peruvian Australian -nationality Peruvians -nationality Petty Dwarves -nationality Phanariotes -nationality Philistines -nationality Piapoco -nationality Piaroa -nationality Picts -nationality Picts -nationality Pied-Noir -nationality Piegan Blackfeet -nationality Pima -nationality Pima Bajo -nationality Pintupi -nationality Piro Pueblos -nationality Pitcairn Islanders -nationality Plains Apache -nationality Plains Cree -nationality Plains Indians -nationality Polabian Slavs -nationality Polans -nationality Polans -nationality Poles -nationality Polish Americans -nationality Polish Australian -nationality Polish Brazilian -nationality Polish Canadians -nationality Polynesians -nationality Pomo -nationality Ponca -nationality Pondo -nationality Porcko -nationality Portuguese -nationality Portuguese Africans -nationality Portuguese Americans -nationality Portuguese Brazilians -nationality Portuguese Canadians -nationality Portuguese Venezuelan -nationality Potawatomi -nationality Potiguara -nationality Powhatan -nationality Proto-Indo-Europeans -nationality Puebloan -nationality Puerto Ricans -nationality Pumi -nationality Punjabi -nationality Punjabis -nationality Puyallup -nationality Puyuma -nationality Qiang -nationality Qiang -nationality Qibi -nationality Qizilbash -nationality Quapaw -nationality Quebeckers -nationality Quechan -nationality Quechua -nationality Quinault -nationality Qulla -nationality Quraysh -nationality Rai -nationality Rajasthani -nationality Rakhine -nationality Rapa Iti -nationality Rapa Nui -nationality Rappahannock Tribe -nationality Rawas -nationality Remans -nationality Rigellian -nationality Rincon Reservation -nationality Rito -nationality River Severn -nationality Rohingya -nationality Rohirrim -nationality Romani -nationality Romani -nationality Romani Americans -nationality Romanian Americans -nationality Romanian Argentines -nationality Romanian Canadians -nationality Romanian diaspora -nationality Romanian Jews -nationality Romanians -nationality Romanians of Serbia -nationality Romanichal -nationality Romaniote Jews -nationality Romulan -nationality Rongowhakaata -nationality Rosebud Sioux Tribe -nationality Rotuma -nationality Rotuman -nationality Russia -nationality Russian Americans -nationality Russian Brazilians -nationality Russian Canadians -nationality Russian Jews -nationality Russian Mennonite -nationality Russian -nationality Russians -nationality Rusyns -nationality Ruthenians -nationality Rutul -nationality Rutuli -nationality Ryukyuan -nationality Sabines -nationality Sac and Fox Nation -nationality Sahrawi -nationality Saint Petersburg -nationality Saiyan -nationality Sakhalin Koreans -nationality Sakizaya -nationality Salentini -nationality Salian Franks -nationality Salish -nationality Salvadoran Americans -nationality Samantha Kshatriya -nationality Samaritan -nationality Samnites -nationality Samo -nationality Samoan American -nationality Samoan New Zealander -nationality Samoans -nationality San -nationality San Carlos Apache Tribe -nationality San Ildefonso Pueblo -nationality Sanhaja -nationality Sankethi -nationality Sans Arc -nationality Santal -nationality Santee tribe -nationality Saracen -nationality Saraiki -nationality Sarakatsani -nationality Sarki -nationality Sarmatians -nationality Sasak -nationality Satedan -nationality Sauk -nationality Saulteaux -nationality Savoyard -nationality Sawa -nationality Saxons -nationality Sayisi Dene -nationality Sayyid -nationality Scandinavian Americans -nationality Scandinavians -nationality Scotch-Irish Americans -nationality Scotch-Irish Canadians -nationality Scotland -nationality Scottish -nationality Scots -nationality Scottish American -nationality Scottish Australian -nationality Scottish Canadians -nationality Sechelt -nationality Seediq -nationality Semai -nationality Seminole -nationality Seminole Tribe of Florida -nationality Seneca -nationality Senegalese -nationality Sengwer -nationality Senufo -nationality Sephardi Jews -nationality Serbian -nationality Serbian Americans -nationality Serbs -nationality Serbs of Croatia -nationality Serbs of Montenegro -nationality Seri -nationality Serrano -nationality Servitka Roma -nationality Shabak -nationality Shambala -nationality Shan -nationality Shawnee -nationality Shilha -nationality Shinnecock Indian Nation -nationality Shona -nationality Shoshone -nationality Siberian -nationality Siberians -nationality Sicels -nationality Sicilian Americans -nationality Sicilians -nationality Sidama -nationality Siddi -nationality Sihasapa -nationality Sikh -nationality Sikhs -nationality Siksika Nation -nationality Silesians -nationality Silvan Elves -nationality Sindar -nationality Sindhi Rajput -nationality Sindhis -nationality Singaporean Australians -nationality Singaporeans -nationality Sinhalese -nationality Sinti -nationality Sioux -nationality Sisseton Wahpeton Oyate -nationality Sitka Tribe of Alaska -nationality Skagit tribes -nationality Skolts -nationality Skownan First Nation -nationality Slavs -nationality Slovak Americans -nationality Slovak -nationality Slovaks -nationality Slovene American -nationality Slovenes -nationality Slovenian Germans -nationality Snohomish tribe -nationality Sogdo -nationality Somali American -nationality Somali -nationality Somalis -nationality Songhai -nationality Sorbs -nationality Sotho -nationality South African New Zealander -nationality South Africans -nationality South Asia -nationality South Asian Americans -nationality South Korean -nationality South Koreans -nationality South Sea Islander -nationality South Sudanese Canadians -nationality Soviet -nationality Spaniards -nationality Spanish American -nationality Spanish Argentines -nationality Speakers of Wu Chinese -nationality Spokane -nationality Squamish -nationality Sri Lankan Americans -nationality Sri Lankan Tamils -nationality Standing Rock Sioux Tribe -nationality Stateside Puerto Ricans -nationality Stoors -nationality Sudanese -nationality Sudanese American -nationality Sudanese Arabs -nationality Sudeten Germans -nationality Suebi -nationality Sumba -nationality Sundanese -nationality Surinamers -nationality Swabians -nationality Swahili -nationality Swedes -nationality Swedish Americans -nationality Swiss -nationality Swiss Americans -nationality Syrian Americans -nationality Syrian Canadians -nationality Syrian Jews -nationality Syrian Turkmen -nationality Syrians -nationality Tagalog -nationality Tagbanwa -nationality Tagish -nationality Tahitians -nationality Tahltan -nationality Tai -nationality Tainui -nationality Taiwan Japanese -nationality Taiwanese -nationality Taiwanese Americans -nationality Taiwanese Hakka -nationality Taiwanese Plains Aborigines -nationality Tajiks -nationality Talaxian -nationality Talysh -nationality Tamang -nationality Tamiang -nationality Tamil -nationality Tamils -nationality Tamil Americans -nationality Tamil Canadians -nationality Tangale -nationality Tangut -nationality Taos Pueblo -nationality Tarahumara -nationality Tatar -nationality Tatars -nationality Tausug -nationality Tay -nationality Te Arawa -nationality Te Rarawa -nationality Te Roroa -nationality Tejano -nationality Teke -nationality Teleri -nationality Temne -nationality Temuan -nationality Tenharim -nationality Tenino -nationality Teptjars -nationality Ter Sami -nationality Terena -nationality Teso -nationality Tesuque -nationality Tewa -nationality Thadou -nationality Thai -nationality Thai Americans -nationality Thai Britons -nationality Thai Chinese -nationality Thakuri -nationality Tharawal -nationality Tharu -nationality Thracians -nationality Tibetan -nationality Tigray -nationality Tiv -nationality Tiwi -nationality Tlahuica -nationality Tlingit -nationality Toba Batak -nationality Togolese -nationality Tokelauan -nationality Tolai -nationality Toli -nationality Tolupan -nationality Tonga -nationality Tongan American -nationality Tongans -nationality Tongva -nationality Tonkawa -nationality Toro -nationality Torres Strait Islanders -nationality Toucouleur -nationality Transylvanian Saxons -nationality Tribe of Asher -nationality Tribe of Benjamin -nationality Tribe of Dan -nationality Tribe of Levi -nationality Trill -nationality Trinidadians and Tobagonians -nationality Trinovantes -nationality Trojans -nationality Truku -nationality Tsimshian -nationality Tswana -nationality Tuareg -nationality Tucano -nationality Tujia -nationality Tukkers -nationality Tungusic -nationality Tunisians -nationality Tunumiit -nationality Tupuri -nationality Turco-Mongol -nationality Turkic -nationality Turkish Americans -nationality Turkish Cypriots -nationality Turkmens -nationality Turkish -nationality Turks -nationality Turks of Romania -nationality Tuscarora -nationality Tutsi -nationality Tuvans -nationality Tuyuca -nationality Udmurt -nationality Uganda -nationality Ugandan -nationality Ukrainian -nationality Ukrainian Americans -nationality Ukrainian Argentine -nationality Ukrainian Canadians -nationality Ukrainian Jews -nationality Ukrainians -nationality Umatilla -nationality Ume Saami -nationality United States of America -nationality Urhobo -nationality Urhobo -nationality Uruguayan -nationality Uruguayans -nationality Ute -nationality Uyghur -nationality Uyghurs -nationality Uz -nationality Uzbek -nationality Vanara -nationality Vandals -nationality Vanyar -nationality Varciani -nationality Vellalar -nationality Venda -nationality Venezuelan -nationality Venezuelans -nationality Vietnamese -nationality Vietnamese Americans -nationality Vietnamese Australians -nationality Vietnamese Korean -nationality Vietnamese New Zealanders -nationality Vikings -nationality Visayans -nationality Visigoths -nationality Vlachs -nationality Volga Tatars -nationality Vorta -nationality Vulcan -nationality Wa -nationality Waikato Tainui -nationality Waitaha -nationality Wakka Wakka -nationality Wallachian Roma -nationality Wallisians and Futunians -nationality Walloons -nationality Wampanoag -nationality Waorani -nationality Wapishana -nationality Wappo -nationality Waray -nationality Wari -nationality Warlpiri -nationality Warmian -nationality Washo -nationality Water Tribe -nationality Watjarri -nationality Wayampi -nationality Wayana -nationality Wea -nationality Weequays -nationality Welayta -nationality Welsh -nationality Welsh American -nationality Welsh Australian -nationality Wemba-Wemba -nationality Wenatchi -nationality West Frisians -nationality West Slavs -nationality White Angolans -nationality White Australians -nationality White British -nationality White Colombian -nationality White Dominican -nationality White Mexicans -nationality White South Africans -nationality Wichita -nationality Winnebago Tribe of Nebraska -nationality Wintu -nationality Wintun -nationality Wiradjuri -nationality Wirangu -nationality Wixarika -nationality Wiyot -nationality Woodland Cree -nationality Woppaburra -nationality Worimi -nationality Woyo -nationality Wurundjeri -nationality Wyandot -nationality Xavante -nationality Xhosa -nationality Xianbei -nationality Yadav -nationality Yaghan -nationality Yakama -nationality Yakuts -nationality Yamatji -nationality Yamato -nationality Yankton Sioux Tribe -nationality Yanktonai -nationality Yanomamis -nationality Yao -nationality Yao -nationality Yaqui -nationality Yaqui -nationality Yawalapiti -nationality Yawuru -nationality Yaygir -nationality Yazidis -nationality Yemenite Jews -nationality Yeniche -nationality Yi -nationality Yorta Yorta -nationality Yoruba -nationality Yoruba -nationality Yucatec Maya -nationality Yuchi -nationality Yugambeh -nationality Yugoslavs -nationality Yuhup -nationality Yuin -nationality Yupik -nationality Yurok -nationality Yuwaalaraay -nationality Yuzzum -nationality Zafimaniry -nationality Zaghawa -nationality Zande -nationality Zapotec -nationality Zarma -nationality Zarubintsy culture -nationality Zaza -nationality Zenata -nationality Zhuang -nationality Zia -nationality Zimbabwean -nationality Zimbabweans -nationality Zoque -nationality Zulu -nationality Zuni -religion A Moslem -religion Abakuá -religion Abenaki mythology -religion Abkhaz neopaganism -religion Abraham -religion Abrahamic -religion Acacians -religion Āḏar Kayvānī -religion Advaita Vedanta -religion Adventista -religion Agon Shu -religion Ahl al-Hadith -religion Ahmadiyya -religion Ājīvika -religion Akan -religion Akhbari -religion Akkadian mythology -religion Alawites -religion Alevism -religion Alexandrian Wicca -religion American Jews -religion Amish -religion Ananaikyo -religion anarchism -religion Ancient -religion Anglican Communion -religion Anglicanism -religion Anglo-Catholicism -religion Anglo-Saxon Christianity -religion Anglo-Saxon paganism -religion Anti-Hinduism -religion Antoinism -religion Antonianism -religion Apollinarism -religion Apostolic Brethren -religion Apostolic Church -religion Arab Christians -religion Arabian mythology -religion Arabian polytheism -religion Ari Buddhism -religion Arianism -religion Arianism visigothic -religion Armenian mythology -religion Armenian Rite -religion Arminianism -religion Arya Samaj -religion Ásatrúarfélagið -religion Ash'ari -religion Ashkenazi Jews -religion Ashurism -religion Atea -religion Atea -religion Ateizm Derneği -religion Athari -religion atheist -religion Atheists -religion Augsburg Confession -religion Augustinians -religion Aum Shinrikyo -religion Avenna -religion Awakening -religion Azerbaijani -religion Azhaliism -religion Azraqites -religion Aztec -religion Baalism -religion Bábism -religion Babylonian -religion Baghdadi Jews -religion Baháʼí Faith -religion Balinese Hinduism -religion Baltic -religion Baltic mythology -religion Baptists -religion Barnabites -religion Basilian monks -religion Basque mythology -religion Bathouism -religion Bayhasiyyah -religion Bear worship -religion Bektashi Order -religion Bell Church -religion Belokrinitskaya Hierarchy -religion Benedictines -religion Bengali -religion Bengali Christians -religion Benten-shū -religion Benzhuism -religion Beta Israel -religion Bezpopovtsy -religion Bhakti -religion Big Drum -religion Bimoism -religion Black church -religion Blavatskian theosophy -religion Bocheon-gyo -religion Bodong -religion Bogomilism -religion Bon -religion Bosnian Church -religion Brahmanism -religion Brahmin -religion Brahmo -religion Brahmo Samaj -religion Brahmoism -religion Branch Davidians -religion Brethren -religion Bridgettines -religion Broad church -religion Buchanites -religion Buddhism -religion Buddhism -religion Buddhist -religion Budha -religion Burkhanism -religion Bwiti -religion Byzantine Church -religion Byzantine Rite -religion Calvinism -religion Campbellite -religion Canaanite -religion Candomblé Bantu -religion Candomblé Ketu -religion Cao Đài -religion Caodaism -religion Catalan Jews -religion Catharism -religion Catholic -religion Catholic -religion Catholic -religion Catholic -religion Catholic cemetery -religion Catholic Church -religion Catholicism -religion Catolico -religion Celtic Christianity -religion Celtic polytheism -religion Chabad Lubavitch -religion Chalcedonian Christianity -religion Chan Buddhism -religion Chardal -religion Charismatic Christianity -religion Cheondoism -religion Chilote mythology -religion Chinese Buddhism -religion Chinese shamanism -religion Chinzei -religion Chod -religion Christ Embassy -religion Christadelphians -religion Christain -religion Christendom -religion Christian -religion Christian -religion Christian atheism -religion Christian Church -religion Christian Church -religion Christian Connection -religion Christian denomination -religion Christian fundamentalism -religion Christian Identity -religion Christian radicalism -religion Christian Science -religion Christian socialism -religion Christianism -religion Christianity -religion Christianity -religion Christianity -religion Circassian paganism -religion Církev bratrská -religion Cistercians -religion communism -religion Confessing Church -religion Confessio Bohemica -religion Confucianism -religion Congregational Church -religion Congregationalism -religion Congregationalist polity -religion Conservative Friends -religion Conservative Judaism -religion Conservative Laestadianism -religion Conservative Mennonites -religion Constitutional Church -religion Converso -religion Copts -religion Creativity -religion Cristiana -religion Cristiano -religion Crypto-Judaism -religion Cuban Vodú -religion Dacians -religion Daejonggyo -religion Dagpo Kagyu -religion Daheshism -religion Daijokyo -religion Daitoku-ji school -religion Darqawa -religion Daruma-shū -religion Destiny Church -religion Diablo -religion Dievturība -religion Digambara Terapanth -religion Din-e Ilahi -religion Discordianism -religion Dominican Order -religion Dominican Vudú -religion Donatism -religion Dönmeh -religion Doukhobors -religion Dravidar Kazhagam -religion Drikung Kagyu -religion Drukpa Lineage -religion Druze -religion Druze-Israeli -religion Druzism -religion Dudeism -religion Early Christianity -religion Eastern Christianity -religion Eastern Orthodoxy -religion Eckankar -religion Ecumenic community -religion Edinoverie -religion Egyptian mythology -religion EL BICHO -religion Endtime Ministries -religion Episcopal Church -religion Episcopal Church -religion Esoteric Christianity -religion Esoteric Nazism -religion Esperanto -religion Espiritismo -religion Essenes -religion Etruscan mythology -religion Evangelical -religion Evangelical Anglicanism -religion Evangelical Association -religion Evangelical Christians-Baptists -religion Evangelical Friends -religion Evangelicalism -religion Evangelisch-lutherische Kirche -religion evangelism -religion evangelism -religion Evanglische Kirche -religion Falun Gong -religion Fedoseevtsy -religion Feri Tradition -religion Finno-Ugric mythology -religion First Secession -religion Five-Percent Nation -religion Folk Catholicism -religion Folk Orthodoxy -religion Foundationism -religion Franciscan Church -religion Franciscan spirituality -religion Franciscans -religion Frankism -religion freemason -religion Freireligiöse Bewegung -religion Fuji lineage -religion Fuji worship -religion Fujikō -religion Fuju-fuse -religion Fuke-shū -religion Fukkoshinto -religion Gallicanism -religion Gaudiya Vaishnavism -religion Gelug -religion General Baptists -religion Georgian mythology -religion German Catholics -religion Germanic paganism -religion Ghost Dance -religion Ghulat -religion Gion Faith -religion Giudaismo -religion Glasite -religion God-fearer -religion Godianism -religion Goryō faith -religion Gothic art -religion Gothic Christianity -religion Gottgläubig -religion Greco-Roman -religion Greek mythology -religion Grundtvigianism -religion Guanche mythology -religion Guiyidao -religion Gurneyite Friends -religion Haitian Vodou -religion Hakusan cult -religion Handsome Lake -religion Hangui -religion Hanif -religion Haredi Judaism -religion Hasidism -religion Hattic mythology -religion Haugean -religion Hawaiian -religion Heathenry -religion Heaven worship -religion Hellenism -religion Helvetic Confessions -religion Hermetic Qabalah -religion Hervormde kerk -religion Hetanism -religion Hicksite Friends -religion Higashi Honganji-ha -religion High church -religion Hikawa shinkō -religion Hillsong Church -religion Hindu -religion Hindu -religion Hindu -religion Hindu temple -religion Hinduism -religion Hittite -religion Hittite mythology -religion Hòa Hảo -religion Hokke-shū -religion Hokke-shū Honmon-ryū -religion Hokke-shū Jinmon-ryū -religion Hokkeshū Shinmonryū -religion Holy Spirit -religion Homaranismo -religion Honganji-ha -religion Honmon Butsuryū-shū -religion Honzan Shugen-shū -religion Hoodoo -religion Huayan school -religion Huguenot -religion Humanistic Buddhism -religion Humanistic Judaism -religion Hungarian mythology -religion Hussites -religion Hutterite -religion Hyper-Calvinism -religion Ibandla lamaNazaretha -religion Iconodule -religion Ietsism -religion Ifá -religion Ignatian spirituality -religion Imamiyyah -religion Immersion baptism -religion Inayati Order -religion Inca mythology -religion Independent Baptist -religion Independent Catholicism -religion Independents -religion India -religion Indian -religion Inghamites -religion Interspirituality -religion Ise Shintō -religion Islam -religion Islam -religion Islam -religion Islam -religion Islamic architecture -religion Islamic extremism -religion Islamic state -religion Izumo-taishakyo -religion Jabriyah -religion Jainism -religion Jariri -religion Jaririya -religion Jedi -religion Jediism -religion Jehovah's Witnesses -religion Jesuism -religion Jesuit -religion Jewish -religion Jewish -religion Jewish atheism -religion Jewish Buddhist -religion Jewish cemetery -religion Jewish Christian -religion Jewish Renewal -religion Jewish secularism -religion Ji-shū -religion Jingming Taoism -religion Jinja Honkyō -religion Jōdo Shinshū -religion Jodo shu -religion Jogye Order -religion Jonang -religion Jordruk -religion Judaism -religion Judaism -religion Judio -religion Kabbalah -religion Kadam -religion Kagyu -religion Kalachakra -religion Kalash -religion Kalmyks -religion Kannada -religion Karaite -religion Karaite Judaism -religion Karma Kagyu -religion Kaumaram -religion Kegon -religion Kejawen -religion Kélé -religion Kemetism -religion Kempon Hokke -religion Khalwati order -religion Kharijites -religion Khatmiyya -religion Kimbanguism -religion Kinomiya faith -religion Kirchenkreis Gladbach-Neuss -religion Kirchenkreis Niederlausitz -religion Kito -religion Konkokyo -religion Korean Buddhism -religion Korean shamanism -religion Kōshin -religion Kotohira Honkyō -religion Kōyasan Shingon-shū -religion Kubrawiya -religion Kuksu -religion Kumina -religion Kurama Kōkyō -religion Kurozumikyō -religion Kusha-shū -religion Kyiv Metropolis -religion Labadists -religion Laestadianism -religion Landmarkism -religion Lapsed Catholic -religion Latin Church -religion Latter Rain -religion LaVeyan Satanism -religion Laypeople -religion Legio Maria -religion Liberal Catholicism -religion Lingayatism -religion Linji school -religion Lisu Church -religion Lithuanian mythology -religion Livets Ord -religion Living Church -religion Loco -religion Lollardy -religion Longhouse -religion Louisiana Voodoo -religion Lutheran -religion Lutheran Churches -religion Lutheranism -religion Luxor -religion Madhwas -religion Madkhalism -religion Mahanubhava -religion Mahāyāna -religion Mainline Protestant -religion Malagasy mythology -religion Malkia -religion Mandaeism -religion Manichaeism -religion Mapuche -religion Marapu -religion Mariavite Church -religion Marist Brothers -religion Maronite Church -religion Maronites -religion Marrano -religion Martinism -religion Martsang Kagyü -religion Masortim -religion Maturidi -religion Maya -religion Meiteism -religion Mennonites -religion Messianic Judaism -religion Methodism -religion Methodism -religion Miaphysitism -religion Middle Orthodoxy -religion Midewiwin -religion Millerism -religion Millî Görüş -religion Missionary Baptists -religion Mithraic mysteries -religion Mizrahi Jews -religion Mo -religion Mohéli -religion Moloch -religion Molokans -religion Mongolian shamanism -religion Monophysitism -religion monotheism -religion Monothelitism -religion Montanism -religion Moravian Church -religion Moravian Church -religion Mormon -religion Mormon fundamentalism -religion Mormonism -religion Mormons -religion Mu'tazila -religion Muisca -religion Murayama Shugen -religion Muridism -religion Murji'ah -religion Muslim -religion Musulman -religion Musulman -religion Mwari -religion Myōken-shū -religion Myōshin-ji sect -religion mysticism -religion Namdhari -religion Naqshbandi -religion Narodniks -religion Native faith -religion Navayana -religion Nazarene -religion Nedo Kagyü -religion Neo-Confucianism -religion Neolog Judaism -religion Nestorianism -religion New Age -religion New Atheism -religion New Christian -religion New Thought -religion Nicene Christianity -religion Nichiren Buddhism -religion Nichiren Shōshū -religion Nichiren Shū -religion Nichirenshū Fuju-fuse-ha -religion Nizari Isma'ilism -religion Nonconformists -religion Nondenominational Christianity -religion None -religion Nonjuring schism -religion nontheism -religion Norse mythology -religion Nuoism -religion Nuwaubian Nation -religion Nyingma -religion Ōbaku -religion Obeah -religion Odinism -religion Old Believers -religion Old Calendarism -religion Omuro school -religion Oneness Pentecostalism -religion Onmyōdō -religion Oomoto -religion Open Brethren -religion Orthodox Christian -religion Orthodox Church -religion Orthodox Friends -religion Orthodox Judaism -religion orthodoxy -religion Ōtani-ha -religion Pajero -religion Pallottines -religion pantheism -religion Paradesi Jews -religion Parsi -religion Pastafarianism -religion Paulicianism -religion Pene -religion Pentecostalism -religion Persian Jews -religion Petite Église -religion Phagdru Kagyu -religion Pharisees -religion Philippine mythology -religion Philosotology -religion Phoenician mythology -religion Pietism -religion Pietistic Reformed -religion PL Kyodan -religion Plymouth Brethren -religion Polish Brethren -religion Polish Catholicism -religion Polynesian mythology -religion polytheism -religion Pomo -religion Pre-sectarian Buddhism -religion Presbyterianism -religion Primitive Baptists -religion Primitive Methodism -religion Priscillianism -religion Progressive Christianity -religion Protestant -religion Protestant -religion Protestant Ascendancy -religion Protestant theology -religion Protestantism -religion Proto-Indo-European -religion Providence -religion Prussian mythology -religion Punic -religion Puritanism -religion Qadariyya -religion Qadiriyya -religion Qarmatians -religion Qibla -religion Quaker -religion Quaker -religion Quakers -religion Quanzhen School -religion Quimbanda -religion Quranism -religion Qutbism -religion Rabbinic Judaism -religion Radha Soami -religion Radical Pietism -religion Raelism -religion Rahmaniyya -religion Rātana -religion Reconstructionist Judaism -religion Reform Judaism -religion Reformation -religion Reformed Baptists -religion Reformed Church -religion Reformed Church -religion Reiyūkai -religion Remonstrants -religion Restorationism -religion Rien -religion Rifaʽi -religion Ringatū -religion Rinzai school -religion Risshū -religion Rodnovery -religion Rodzima Wiara -religion Roman Catholic -religion Roman Catholics -religion Roman Rite -religion Romani mythology -religion Romaniote Jews -religion Roshaniyya -religion Rosicrucianism -religion Ruri Kyōkai -religion Russian Mennonite -religion Ryukyuan -religion Sabbateans -religion Sahaja Yoga -religion Saiva -religion Saivam -religion Sakya -religion Salafi jihadism -religion Salafism -religion Salvita Decorte -religion Samaniyya -religion Samaritan -religion Samaritanism -religion Samoan mythology -religion Sanamahism -religion Sanātana Dharma -religion Sanbo Kyodan -religion Sannō -religion Sannō Shintō -religion Sanron -religion Sant Mat -religion Santería -religion Santo Daime -religion Satanism -religion Satanism -religion Satmar -religion Schwarzenau Brethren -religion Schwenkfelder Church -religion Scientology -religion Sect Shinto -religion Secular -religion secularism -religion Sedeprivationism -religion Seicho-no-Ie -religion Seizan Jōdo-shū -religion Sephardi Jews -religion Sephardic Haredim -religion Serer -religion Seventh-day Adventism -religion Shadhili -religion Shaiva Siddhanta -religion Shaivism -religion Shakers -religion shamanism -religion Shambhala Buddhism -religion Shangpa Kagyu -religion Shaykhism -religion Shia Islam -religion Shigisan Shingon-shū -religion Shinbutsu-shūgō -religion Shingon Buddhism -religion Shingon Risshu -religion Shingon-shu Buzan-ha -religion Shingon-shū Chizan-ha -religion Shingon-shū Daigo-ha -religion Shingon-shū Daikakuji-ha -religion Shingon-shū Sennyūji-ha -religion Shingon-shū Tōji-ha -religion Shinji Shumeikai -religion Shinnyo Sanmaya-ryū -religion Shinshū Bukkōji-ha -religion Shinshū Jōshōjiha -religion Shinshū Kōshō-ha -religion Shinto -religion Shōtoku-shū -religion Shramana -religion Shri Vidya -religion Shrine Shinto -religion Shtundists -religion Shugendō -religion Sikh -religion Sikhs -religion Sikhism -religion Sith -religion Skoptsy -religion Slavic -religion Slavic Christianity -religion Slavic mythology -religion Socinianism -religion Sofia -religion Soka Gakkai -religion Sōtō -religion Spiritual Baptist -religion Spiritual Christianity -religion State Shinto -religion Stregheria -religion Subud -religion Sufi -religion Sufism -religion Sufri -religion Suika Shinto -religion Sukyo Mahikari -religion Sumerian -religion Sunda Wiwitan -religion Sunni Islam -religion Śvētāmbara -religion Swaminarayan Hinduism -religion Swedenborgian Church -religion Swiss Brethren -religion Syntheism -religion Syro-Malabar Church -religion Szekler Sabbatarians -religion Taklung Kagyu -religion Talmont -religion Tanritsu -religion Tantric Buddhism -religion Taoism -religion Tateyama Shugen -religion Telugu Christian -religion Tendai -religion Tendaijimon Sect -religion Tengri -religion Tengrism -religion Tenjin -religion Tenjin faith -religion Tenrikyo -religion Tetragrammaton -religion Tewahedo Church -religion Thelema -religion Theophilanthropy -religion Theravāda -religion Third Convention -religion Thomism -religion Thracian mythology -religion Three teachings -religion Tiandism -religion Tiantai -religion Tibetan Buddhism -religion Tōji Shingon-shū -religion Traditionalist School -religion Transcendental Meditation -religion Trekkie -religion Tsalpa Kagyü -religion Tsarebozhiye -religion Tsushima faith -religion Twelver Shiism -religion Tymora -religion Umbanda -religion Umbanda -religion Unification Church -religion Unitarian Christianity -religion Unitarian Universalism -religion Unitarianism -religion United Order -religion Unity Church -religion Urantia Foundation -religion Usuli -religion Utraquism -religion Uttaradi Math -religion Uwaisi -religion Vaishnavism -religion Vajrayana -religion Vedanta -religion Vedda -religion Veerashaiva -religion Venezuelan mythology -religion Vibhajyavāda -religion Victory Outreach -religion Viracocha -religion Vizhnitz -religion Vrouwenpartij -religion Wahhabism -religion Waldensians -religion Walloon church -religion Watra -religion Wesleyan Church -religion Wesleyanism -religion Western Christianity -religion Western esotericism -religion White Lotus -religion Wicca -religion WinningJahrian -religion Winti -religion Won Buddhism -religion Yahweh -religion Yahwism -religion Yao Taoism -religion Yarsanism -religion Yasawiyya -religion Yazdânism -religion Yazidis -religion Yazidism -religion Yemeni Judaism -religion Yiguandao -religion Yogacara -religion Yoruba -religion Yoruba -religion Yoruba -religion Yoshida Shintō -religion Yoshikawa Shintō -religion Yugyō-ha -religion Yuzu Nembutsu -religion Zealot -religion Zen -religion Zen -religion Zen Peacemakers -religion Zhalu -religion Zhengyi Dao -religion Zionism -religion Zionist Churches -religion Zoroastrianism -religion Zwinglians -country Chad -country Cuba -country Fiji -country Iran -country Iraq -country Laos -country Mali -country Oman -country Peru -country Togo -country Benin -country Chile -country China -country Congo -country Egypt -country Gabon -country Ghana -country Haiti -country India -country Italy -country Japan -country Kenya -country Libya -country Malta -country Nauru -country Nepal -country Niger -country Palau -country Qatar -country Samoa -country Spain -country Sudan -country Syria -country Tonga -country Yemen -country Angola -country Belize -country Bhutan -country Brazil -country Brunei -country Canada -country Cyprus -country France -country Greece -country Guinea -country Guyana -country Israel -country Jordan -country Kosovo -country Kuwait -country Latvia -country Malawi -country Mexico -country Monaco -country Norway -country Panama -country Poland -country Russia -country Rwanda -country Serbia -country Sweden -country Turkey -country Tuvalu -country Uganda -country Zambia -country Albania -country Algeria -country Andorra -country Armenia -country Austria -country Bahrain -country Belarus -country Belgium -country Bolivia -country Burundi -country Comoros -country Croatia -country Czechia -country Denmark -country Ecuador -country Eritrea -country Estonia -country Finland -country Georgia -country Germany -country Grenada -country Hungary -country Iceland -country Ireland -country Jamaica -country Lebanon -country Lesotho -country Liberia -country Moldova -country Morocco -country Namibia -country Nigeria -country Romania -country Senegal -country Somalia -country Tunisia -country Ukraine -country Uruguay -country Vanuatu -country Vietnam -country Barbados -country Botswana -country Bulgaria -country Cambodia -country Cameroon -country Colombia -country Djibouti -country Dominica -country Eswatini -country Ethiopia -country Honduras -country Kiribati -country Malaysia -country Maldives -country Mongolia -country Pakistan -country Paraguay -country Portugal -country Slovakia -country Slovenia -country St Lucia -country Suriname -country Tanzania -country Thailand -country Zimbabwe -country Argentina -country Australia -country Guatemala -country Indonesia -country Lithuania -country Mauritius -country Nicaragua -country Singapore -country Sri Lanka -country Venezuela -country Azerbaijan -country Bangladesh -country Cape Verde -country Costa Rica -country East Timor -country Kazakhstan -country Kyrgyzstan -country Luxembourg -country Madagascar -country Mauritania -country Micronesia -country Montenegro -country Mozambique -country San Marino -country Seychelles -country St Vincent -country Tajikistan -country Uzbekistan -country Afghanistan -country El Salvador -country The Gambia -country Gambia -country Ivory Coast -country Netherlands -country New Zealand -country North Korea -country Philippines -country Saint Lucia -country South Korea -country South Sudan -country Switzerland -country The Bahamas -country Bahamas -country Burkina Faso -country Saudi Arabia -country Sierra Leone -country South Africa -country Turkmenistan -country Vatican City -country Guinea-Bissau -country Liechtenstein -country United States -country Czech Republic -country State of Libya -country State of Qatar -country United Kingdom -country French Republic -country Kyrgyz Republic -country Myanmar (Burma) -country North Macedonia -country Slovak Republic -country Solomon Islands -country State of Israel -country State of Kuwait -country Italian Republic -country Kingdom of Spain -country Kingdom of Tonga -country Marshall Islands -country Papua New Guinea -country Republic of Chad -country Republic of Cuba -country Republic of Fiji -country Republic of Iraq -country Republic of Mali -country Republic of Peru -country State of Eritrea -country Brunei Darussalam -country Equatorial Guinea -country Gabonese Republic -country Hellenic Republic -country Kingdom of Bhutan -country Kingdom of Norway -country Kingdom of Sweden -country Lebanese Republic -country Republic of Benin -country Republic of Chile -country Republic of Ghana -country Republic of Haiti -country Republic of India -country Republic of Kenya -country Republic of Korea -country Republic of Malta -country Republic of Nauru -country Republic of Niger -country Republic of Palau -country Republic of Yemen -country Sultanate of Oman -country Togolese Republic -country Argentine Republic -country Dominican Republic -country Kingdom of Bahrain -country Kingdom of Belgium -country Kingdom of Denmark -country Kingdom of Lesotho -country Kingdom of Morocco -country Republic of Angola -country Republic of Cyprus -country Republic of Guinea -country Republic of Kosovo -country Republic of Latvia -country Republic of Malawi -country Republic of Panama -country Republic of Poland -country Republic of Rwanda -country Republic of Serbia -country Republic of Uganda -country Republic of Zambia -country Russian Federation -country St Kitts and Nevis -country Antigua and Barbuda -country Kingdom of Cambodia -country Kingdom of Eswatini -country Kingdom of Thailand -country Portuguese Republic -country Republic of Albania -country Republic of Armenia -country Republic of Austria -country Republic of Belarus -country Republic of Burundi -country Republic of Croatia -country Republic of Ecuador -country Republic of Estonia -country Republic of Finland -country Republic of Iceland -country Republic of Liberia -country Republic of Moldova -country Republic of Namibia -country Republic of Senegal -country Republic of Tunisia -country Republic of Türkiye -country Republic of Vanuatu -country Swiss Confederation -country Trinidad and Tobago -country Republic of Botswana -country Republic of Bulgaria -country Republic of Cameroon -country Republic of Colombia -country Republic of Djibouti -country Republic of Honduras -country Republic of Kiribati -country Republic of Maldives -country Republic of Paraguay -country Republic of Slovenia -country Republic of Suriname -country Republic of Zimbabwe -country Syrian Arab Republic -country Union of the Comoros -country United Arab Emirates -country Republic of Guatemala -country Republic of Indonesia -country Republic of Lithuania -country Republic of Mauritius -country Republic of Nicaragua -country Republic of Singapore -country Republic of the Congo -country Republic of the Sudan -country Sao Tome and Principe -country United Mexican States -country Arab Republic of Egypt -country Bosnia and Herzegovina -country Principality of Monaco -country Republic of Azerbaijan -country Republic of Cabo Verde -country Republic of Costa Rica -country Republic of Kazakhstan -country Republic of Madagascar -country Republic of Mozambique -country Republic of San Marino -country Republic of Seychelles -country Republic of Tajikistan -country Republic of The Gambia -country Republic of Uzbekistan -country Kingdom of Saudi Arabia -country Principality of Andorra -country Republic of El Salvador -country Republic of South Sudan -country Central African Republic -country Commonwealth of Dominica -country Islamic Republic of Iran -country Republic of Sierra Leone -country Republic of South Africa -country United States of America -country Commonwealth of Australia -country Grand Duchy of Luxembourg -country Republic of Côte d’Ivoire -country Republic of Guinea-Bissau -country Independent State of Samoa -country Kingdom of the Netherlands -country People’s Republic of China -country Commonwealth of The Bahamas -country Congo (Democratic Republic) -country Federal Republic of Germany -country Federal Republic of Nigeria -country Federal Republic of Somalia -country Hashemite Kingdom of Jordan -country Republic of North Macedonia -country Republic of the Philippines -country United Republic of Tanzania -country Islamic Republic of Pakistan -country Oriental Republic of Uruguay -country Federative Republic of Brazil -country Principality of Liechtenstein -country Republic of Equatorial Guinea -country Federated States of Micronesia -country Islamic Republic of Mauritania -country Plurinational State of Bolivia -country Socialist Republic of Viet Nam -country Co-operative Republic of Guyana -country Islamic Republic of Afghanistan -country People’s Republic of Bangladesh -country Republic of Trinidad and Tobago -country Bolivarian Republic of Venezuela -country Democratic Republic of the Congo -country Lao People’s Democratic Republic -country Republic of the Marshall Islands -country Republic of the Union of Myanmar -country Saint Vincent and the Grenadines -country Democratic Republic of Timor-Leste -country Democratic Peopleís Republic of Korea -country Independent State of Papua New Guinea -country Federal Democratic Republic of Ethiopia -country People’s Democratic Republic of Algeria -country Federation of Saint Christopher and Nevis -country Democratic Socialist Republic of Sri Lanka -country Democratic Republic of Sao Tome and Principe -country United Kingdom of Great Britain and Northern Ireland -country Vatican City State -country Swaziland -sexual orientation gay -sexual orientation homosexual -sexual orientation homosexuality -sexual orientation bisexuality -sexual orientation lesbian -sexual orientation lesbianism -sexual orientation pansexual -sexual orientation pansexuality -sexual orientation heterosexuality -sexual orientation heterosexual -sexual orientation asexual -sexual orientation asexuality -sexual orientation monosexual -sexual orientation monosexuality -sexual orientation demisexual -sexual orientation demisexuality -sexual orientation sapphic -sexual orientation sapphism -sexual orientation bisexual -sexual orientation bisexual man -sexual orientation bisexual woman -sexual orientation gays -sexual orientation lesbians -misc fascism -misc prostitution -misc abortion -misc extremism -misc ISIS -misc nazi -misc nazis -misc quran -misc bible -misc synagogue -misc genocide -misc islamic extremists -misc extremists -misc antisemitism -misc anti-semitism -misc antizionism -misc anti-zionism -misc racism -misc rape -misc ku klux klan -misc bombing -misc bombings -misc nazism -misc neo-nazism -misc neonazism \ No newline at end of file diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 9ff3eb87..1a2a47e0 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -23,16 +23,3 @@ services: depends_on: vespatest: condition: service_healthy - - opensearch-node1: - environment: - - bootstrap.memory_lock=true # along with the memlock settings below, disables swapping - - "OPENSEARCH_JAVA_OPTS=-Xms2g -Xmx2g" # minimum and maximum Java heap size, recommend setting both to 50% of system RAM - - "DISABLE_INSTALL_DEMO_CONFIG=true" # disables execution of install_demo_configuration.sh bundled with security plugin, which installs demo certificates and security configurations to OpenSearch - - "DISABLE_SECURITY_PLUGIN=true" # disables security plugin entirely in OpenSearch by setting plugins.security.disabled: true in opensearch.yml - - "discovery.type=single-node" # disables bootstrap checks that are enabled when network.host is set to a non-loopback address - - opensearch-dashboards: - environment: - - 'OPENSEARCH_HOSTS=["http://opensearch-node1:9200"]' - - "DISABLE_SECURITY_DASHBOARDS_PLUGIN=true" # disables security dashboards plugin in OpenSearch Dashboards diff --git a/docker-compose.yml b/docker-compose.yml index 9567118b..7fba3760 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,32 +15,6 @@ services: timeout: 3s retries: 30 - opensearch-node1: - image: opensearchproject/opensearch:1.3.0 - container_name: opensearch-node1 - environment: - - cluster.name=opensearch-cluster - - node.name=opensearch-node1 - env_file: - - .env - ulimits: - memlock: - soft: -1 - hard: -1 - nofile: - soft: 65536 # maximum number of open files for the OpenSearch user, set to at least 65536 on modern systems - hard: 65536 - volumes: - - opensearch-data1:/usr/share/opensearch/data - ports: - - 9200:9200 - - 9600:9600 # required for Performance Analyzer - healthcheck: - test: curl -u ${OPENSEARCH_USER}:${OPENSEARCH_PASSWORD} -s -f opensearch-node1:9200/_cat/health >/dev/null || exit 1 - interval: 5s - timeout: 3s - retries: 30 - backend: build: context: ./ @@ -65,26 +39,6 @@ services: timeout: 3s retries: 30 - opensearch-dashboards: - image: opensearchproject/opensearch-dashboards:1.3.0 - container_name: opensearch-dashboards - ports: - - 5601:5601 - depends_on: - opensearch-node1: - condition: service_healthy - - opensearch-test-loader: - image: elasticdump/elasticsearch-dump - container_name: opensearch-test-loader - volumes: - - ./:/cpr-backend/:cached - env_file: - - .env - depends_on: - opensearch-node1: - condition: service_healthy volumes: db-data-backend: - opensearch-data1: diff --git a/docs/api/search.md b/docs/api/search.md index b217fd60..da230de3 100644 --- a/docs/api/search.md +++ b/docs/api/search.md @@ -35,8 +35,8 @@ There is **_no_** authentication required for using this interface.  The search endpoint behaves in two distinct ways: -1. In “Browse” mode - this is when an empty` query_string `is provided. This mode does not use Opensearch, rather queries the structured data (postgresql) directly, using the other supplied filter fields. -2. In “Search” mode - when a `query_string `is provided. A query is constructed sent to Opensearch and the response is augmented with the structured data before being returned in the same response scheme. +1. In “Browse” mode - this is when an empty` query_string `is provided. This mode does not use Vespa, rather queries the structured data (postgresql) directly, using the other supplied filter fields. +2. In “Search” mode - when a `query_string `is provided. A query is constructed sent to Vespa and the response is augmented with the structured data before being returned in the same response scheme. ## **Request Payload** @@ -130,7 +130,7 @@ The total number of families that meet the search criteria. #### query_time_ms -The time Opensearch spent performing the query. +The time Vespa spent performing the query. #### total_time_ms @@ -267,4 +267,4 @@ curl "$API_HOST/api/v1/searches" \      -H 'Accept: application/json' \      -H 'Content-Type: application/json' \      --data-raw '{"query_string":"", "exact_match":true, "keyword_filters":{}, "sort_field":null, "sort_order":"desc", "limit":100, "offset":0}' -``` \ No newline at end of file +``` diff --git a/makefile-docker.defs b/makefile-docker.defs index a6374236..ebfd4e4f 100644 --- a/makefile-docker.defs +++ b/makefile-docker.defs @@ -110,16 +110,6 @@ test_search: -vvv tests/routes/test_vespasearch.py \ -m 'search' -setup_test_search_index: - docker-compose -f docker-compose.yml -f docker-compose.dev.yml run --rm backend curl -XDELETE -u "${OPENSEARCH_USER}:${OPENSEARCH_PASSWORD}" ${OPENSEARCH_URL}/${OPENSEARCH_INDEX_PREFIX}* --insecure - docker-compose -f docker-compose.yml -f docker-compose.dev.yml run --rm opensearch-test-loader multielasticdump --direction=load --input=/cpr-backend/tests/data/ --output=${OPENSEARCH_URL} --ignoreType=template - -test_opensearch: setup_test_search_index - docker-compose -f docker-compose.yml -f docker-compose.dev.yml run --name search_test -v "${PWD}/data:/data" backend pytest -vvv -m 'opensearch' - docker cp search_test:/data/benchmark_browse.txt . - docker cp search_test:/data/benchmark_search.txt . - docker rm search_test - test_cors: docker-compose -f docker-compose.yml -f docker-compose.dev.yml run --rm backend pytest -vvv -m 'cors' @@ -127,15 +117,11 @@ test_unit: docker-compose -f docker-compose.yml -f docker-compose.dev.yml run --rm backend pytest -vvv tests/unit test: - docker-compose -f docker-compose.yml -f docker-compose.dev.yml run --rm backend pytest -vvv --test-alembic -m 'not opensearch and not search' + docker-compose -f docker-compose.yml -f docker-compose.dev.yml run --rm backend pytest -vvv --test-alembic -m 'not search' # ---------------------------------- # tasks # ---------------------------------- -# Check OpenSearch running on localhost -check_opensearch_local: - curl -XGET http://localhost:9200 -u 'admin:admin' --insecure - postgres_dump: docker-compose run -v ${PWD}/backend:/app/data --rm backend_db pg_dump -d ${DATABASE_URL} --data-only -F c --file /app/data/backend_db_dump.dump diff --git a/makefile-local.defs b/makefile-local.defs index 20b7f38e..6ae9f2ab 100644 --- a/makefile-local.defs +++ b/makefile-local.defs @@ -19,12 +19,6 @@ ifneq (,$(wildcard ./.env)) exit 1 endif -opensearch_test_data: - rm -r ./search-index/test/data/test_opensearch_*.dump* - elasticdump --input=http://admin:admin@localhost:9200 --output=./tests/data/test_opensearch_data.dump.gz --type=data --fsCompress --input-index=navigator - elasticdump --input=http://admin:admin@localhost:9200 --output=./tests/data/test_opensearch_mapping.dump --type=mapping --input-index=navigator - elasticdump --input=http://admin:admin@localhost:9200 --output=./tests/data/test_opensearch_analyzer.dump --type=analyzer --input-index=navigator - dev_install: check_dev_environment # Sets up a local dev environment diff --git a/poetry.lock b/poetry.lock index 9e5a442a..c4fb886a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,9 +1,10 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry and should not be changed by hand. [[package]] name = "aiohttp" version = "3.9.1" description = "Async http client/server framework (asyncio)" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -100,6 +101,7 @@ speedups = ["Brotli", "aiodns", "brotlicffi"] name = "aiosignal" version = "1.3.1" description = "aiosignal: a list of registered asynchronous callbacks" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -114,6 +116,7 @@ frozenlist = ">=1.1.0" name = "alembic" version = "1.13.1" description = "A database migration tool for SQLAlchemy." +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -133,6 +136,7 @@ tz = ["backports.zoneinfo"] name = "alembic-utils" version = "0.8.2" description = "A sqlalchemy/alembic extension for migrating procedures and views" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -155,6 +159,7 @@ nvim = ["neovim", "python-language-server"] name = "annotated-types" version = "0.6.0" description = "Reusable constraint types to use with typing.Annotated" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -166,6 +171,7 @@ files = [ name = "anyio" version = "3.7.1" description = "High level compatibility layer for multiple asynchronous event loop implementations" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -187,6 +193,7 @@ trio = ["trio (<0.22)"] name = "async-timeout" version = "4.0.3" description = "Timeout context manager for asyncio programs" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -198,6 +205,7 @@ files = [ name = "attrs" version = "23.2.0" description = "Classes Without Boilerplate" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -217,6 +225,7 @@ tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "p name = "authlib" version = "0.15.6" description = "The ultimate Python library in building OAuth and OpenID Connect servers." +category = "main" optional = false python-versions = "*" files = [ @@ -234,6 +243,7 @@ client = ["requests"] name = "aws-error-utils" version = "2.7.0" description = "Error-handling functions for boto3/botocore" +category = "main" optional = false python-versions = ">=3.7,<4" files = [ @@ -248,6 +258,7 @@ botocore = "*" name = "bcrypt" version = "3.2.2" description = "Modern password hashing for your software and your servers" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -275,6 +286,7 @@ typecheck = ["mypy"] name = "black" version = "23.12.1" description = "The uncompromising code formatter." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -321,6 +333,7 @@ uvloop = ["uvloop (>=0.15.2)"] name = "boto3" version = "1.34.15" description = "The AWS SDK for Python" +category = "main" optional = false python-versions = ">= 3.8" files = [ @@ -340,6 +353,7 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] name = "botocore" version = "1.34.19" description = "Low-level, data-driven core of boto 3." +category = "main" optional = false python-versions = ">= 3.8" files = [ @@ -362,6 +376,7 @@ crt = ["awscrt (==0.19.19)"] name = "certifi" version = "2023.11.17" description = "Python package for providing Mozilla's CA Bundle." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -373,6 +388,7 @@ files = [ name = "cffi" version = "1.16.0" description = "Foreign Function Interface for Python calling C code." +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -437,6 +453,7 @@ pycparser = "*" name = "cfgv" version = "3.4.0" description = "Validate configuration and produce human readable error messages." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -448,6 +465,7 @@ files = [ name = "charset-normalizer" version = "3.3.2" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -547,6 +565,7 @@ files = [ name = "click" version = "8.1.7" description = "Composable command line interface toolkit" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -561,6 +580,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "cmake" version = "3.28.1" description = "CMake is an open-source, cross-platform family of tools designed to build, test and package software" +category = "main" optional = false python-versions = "*" files = [ @@ -590,6 +610,7 @@ test = ["coverage (>=4.2)", "importlib-metadata (>=2.0)", "pytest (>=3.0.3)", "p name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ @@ -601,6 +622,7 @@ files = [ name = "cpr-data-access" version = "0.3.0" description = "" +category = "main" optional = false python-versions = "^3.9" files = [] @@ -634,6 +656,7 @@ resolved_reference = "ac2e2ced819b92c68cf7313f56bfb9ad4a0f1699" name = "cryptography" version = "41.0.7" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -679,6 +702,7 @@ test-randomorder = ["pytest-randomly"] name = "datasets" version = "2.16.1" description = "HuggingFace community-driven open-source library of datasets" +category = "main" optional = false python-versions = ">=3.8.0" files = [ @@ -723,6 +747,7 @@ vision = ["Pillow (>=6.2.1)"] name = "deprecation" version = "2.1.0" description = "A library to handle automated deprecations" +category = "main" optional = false python-versions = "*" files = [ @@ -737,6 +762,7 @@ packaging = "*" name = "dill" version = "0.3.7" description = "serialize all of Python" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -751,6 +777,7 @@ graph = ["objgraph (>=1.7.2)"] name = "distlib" version = "0.3.8" description = "Distribution utilities" +category = "dev" optional = false python-versions = "*" files = [ @@ -762,6 +789,7 @@ files = [ name = "docker" version = "7.0.0" description = "A Python library for the Docker Engine API." +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -783,6 +811,7 @@ websockets = ["websocket-client (>=1.3.0)"] name = "exceptiongroup" version = "1.2.0" description = "Backport of PEP 654 (exception groups)" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -797,6 +826,7 @@ test = ["pytest (>=6)"] name = "fastapi" version = "0.103.2" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -817,6 +847,7 @@ all = ["email-validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)" name = "fastapi-health" version = "0.4.0" description = "Heath check on FastAPI applications." +category = "main" optional = false python-versions = ">=3.6.1,<4.0.0" files = [ @@ -831,6 +862,7 @@ fastapi = ">=0.63.0" name = "fastapi-pagination" version = "0.9.3" description = "FastAPI pagination" +category = "main" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -861,6 +893,7 @@ tortoise = ["tortoise-orm[aiomysql,aiosqlite,asyncpg] (>=0.16.18,<0.20.0)"] name = "filelock" version = "3.13.1" description = "A platform independent file lock." +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -877,6 +910,7 @@ typing = ["typing-extensions (>=4.8)"] name = "flupy" version = "1.2.0" description = "Method chaining built on generators" +category = "main" optional = false python-versions = "*" files = [ @@ -893,6 +927,7 @@ dev = ["black", "mypy", "pre-commit", "pylint", "pytest", "pytest-benchmark", "p name = "freezegun" version = "1.4.0" description = "Let your Python tests travel through time" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -907,6 +942,7 @@ python-dateutil = ">=2.7" name = "frozenlist" version = "1.4.1" description = "A list-like structure which implements collections.abc.MutableSequence" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -993,6 +1029,7 @@ files = [ name = "fsspec" version = "2023.10.0" description = "File-system specification" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1032,6 +1069,7 @@ tqdm = ["tqdm"] name = "greenlet" version = "3.0.3" description = "Lightweight in-process concurrent programming" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1103,6 +1141,7 @@ test = ["objgraph", "psutil"] name = "h11" version = "0.12.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1114,6 +1153,7 @@ files = [ name = "httpcore" version = "0.14.7" description = "A minimal low-level HTTP client." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1122,19 +1162,20 @@ files = [ ] [package.dependencies] -anyio = "==3.*" +anyio = ">=3.0.0,<4.0.0" certifi = "*" h11 = ">=0.11,<0.13" -sniffio = "==1.*" +sniffio = ">=1.0.0,<2.0.0" [package.extras] http2 = ["h2 (>=3,<5)"] -socks = ["socksio (==1.*)"] +socks = ["socksio (>=1.0.0,<2.0.0)"] [[package]] name = "httptools" version = "0.6.1" description = "A collection of framework independent HTTP protocol utils." +category = "main" optional = false python-versions = ">=3.8.0" files = [ @@ -1183,6 +1224,7 @@ test = ["Cython (>=0.29.24,<0.30.0)"] name = "httpx" version = "0.22.0" description = "The next generation HTTP client." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1199,14 +1241,15 @@ sniffio = "*" [package.extras] brotli = ["brotli", "brotlicffi"] -cli = ["click (==8.*)", "pygments (==2.*)", "rich (==10.*)"] +cli = ["click (>=8.0.0,<9.0.0)", "pygments (>=2.0.0,<3.0.0)", "rich (>=10.0.0,<11.0.0)"] http2 = ["h2 (>=3,<5)"] -socks = ["socksio (==1.*)"] +socks = ["socksio (>=1.0.0,<2.0.0)"] [[package]] name = "huggingface-hub" version = "0.20.2" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" +category = "main" optional = false python-versions = ">=3.8.0" files = [ @@ -1239,6 +1282,7 @@ typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "t name = "identify" version = "2.5.33" description = "File identification library for Python" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1253,6 +1297,7 @@ license = ["ukkonen"] name = "idna" version = "3.6" description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1264,6 +1309,7 @@ files = [ name = "iniconfig" version = "2.0.0" description = "brain-dead simple config-ini parsing" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1275,6 +1321,7 @@ files = [ name = "itsdangerous" version = "2.1.2" description = "Safely pass data to untrusted environments and back." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1286,6 +1333,7 @@ files = [ name = "jinja2" version = "3.1.2" description = "A very fast and expressive template engine." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1303,6 +1351,7 @@ i18n = ["Babel (>=2.7)"] name = "jmespath" version = "1.0.1" description = "JSON Matching Expressions" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1314,6 +1363,7 @@ files = [ name = "joblib" version = "1.3.2" description = "Lightweight pipelining with Python functions" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1325,6 +1375,7 @@ files = [ name = "json-logging" version = "1.3.0" description = "JSON Python Logging" +category = "main" optional = false python-versions = "*" files = [ @@ -1336,6 +1387,7 @@ files = [ name = "langdetect" version = "1.0.9" description = "Language detection library ported from Google's language-detection." +category = "main" optional = false python-versions = "*" files = [ @@ -1350,6 +1402,7 @@ six = "*" name = "lit" version = "17.0.6" description = "A Software Testing Tool" +category = "main" optional = false python-versions = "*" files = [ @@ -1360,6 +1413,7 @@ files = [ name = "mako" version = "1.3.0" description = "A super-fast templating language that borrows the best ideas from the existing templating languages." +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1379,6 +1433,7 @@ testing = ["pytest"] name = "markupsafe" version = "2.1.3" description = "Safely add untrusted strings to HTML/XML markup." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1448,6 +1503,7 @@ files = [ name = "moto" version = "3.1.19" description = "A library that allows your python tests to easily mock out the boto library" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1497,6 +1553,7 @@ xray = ["aws-xray-sdk (>=0.93,!=0.96)", "setuptools"] name = "mpmath" version = "1.3.0" description = "Python library for arbitrary-precision floating-point arithmetic" +category = "main" optional = false python-versions = "*" files = [ @@ -1514,6 +1571,7 @@ tests = ["pytest (>=4.6)"] name = "multidict" version = "6.0.4" description = "multidict implementation" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1597,6 +1655,7 @@ files = [ name = "multiprocess" version = "0.70.15" description = "better multiprocessing and multithreading in Python" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1625,6 +1684,7 @@ dill = ">=0.3.7" name = "mypy-extensions" version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -1636,6 +1696,7 @@ files = [ name = "networkx" version = "3.2.1" description = "Python package for creating and manipulating graphs and networks" +category = "main" optional = false python-versions = ">=3.9" files = [ @@ -1654,6 +1715,7 @@ test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"] name = "nltk" version = "3.8.1" description = "Natural Language Toolkit" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1679,6 +1741,7 @@ twitter = ["twython"] name = "nodeenv" version = "1.8.0" description = "Node.js virtual environment builder" +category = "dev" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*" files = [ @@ -1693,6 +1756,7 @@ setuptools = "*" name = "numpy" version = "1.26.3" description = "Fundamental package for array computing in Python" +category = "main" optional = false python-versions = ">=3.9" files = [ @@ -1738,6 +1802,7 @@ files = [ name = "nvidia-cublas-cu11" version = "11.10.3.66" description = "CUBLAS native runtime libraries" +category = "main" optional = false python-versions = ">=3" files = [ @@ -1753,6 +1818,7 @@ wheel = "*" name = "nvidia-cuda-cupti-cu11" version = "11.7.101" description = "CUDA profiling tools runtime libs." +category = "main" optional = false python-versions = ">=3" files = [ @@ -1768,6 +1834,7 @@ wheel = "*" name = "nvidia-cuda-nvrtc-cu11" version = "11.7.99" description = "NVRTC native runtime libraries" +category = "main" optional = false python-versions = ">=3" files = [ @@ -1784,6 +1851,7 @@ wheel = "*" name = "nvidia-cuda-runtime-cu11" version = "11.7.99" description = "CUDA Runtime native Libraries" +category = "main" optional = false python-versions = ">=3" files = [ @@ -1799,6 +1867,7 @@ wheel = "*" name = "nvidia-cudnn-cu11" version = "8.5.0.96" description = "cuDNN runtime libraries" +category = "main" optional = false python-versions = ">=3" files = [ @@ -1814,6 +1883,7 @@ wheel = "*" name = "nvidia-cufft-cu11" version = "10.9.0.58" description = "CUFFT native runtime libraries" +category = "main" optional = false python-versions = ">=3" files = [ @@ -1825,6 +1895,7 @@ files = [ name = "nvidia-curand-cu11" version = "10.2.10.91" description = "CURAND native runtime libraries" +category = "main" optional = false python-versions = ">=3" files = [ @@ -1840,6 +1911,7 @@ wheel = "*" name = "nvidia-cusolver-cu11" version = "11.4.0.1" description = "CUDA solver native runtime libraries" +category = "main" optional = false python-versions = ">=3" files = [ @@ -1856,6 +1928,7 @@ wheel = "*" name = "nvidia-cusparse-cu11" version = "11.7.4.91" description = "CUSPARSE native runtime libraries" +category = "main" optional = false python-versions = ">=3" files = [ @@ -1871,6 +1944,7 @@ wheel = "*" name = "nvidia-nccl-cu11" version = "2.14.3" description = "NVIDIA Collective Communication Library (NCCL) Runtime" +category = "main" optional = false python-versions = ">=3" files = [ @@ -1881,6 +1955,7 @@ files = [ name = "nvidia-nvtx-cu11" version = "11.7.91" description = "NVIDIA Tools Extension" +category = "main" optional = false python-versions = ">=3" files = [ @@ -1892,31 +1967,11 @@ files = [ setuptools = "*" wheel = "*" -[[package]] -name = "opensearch-py" -version = "1.1.0" -description = "Python low-level client for OpenSearch" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4" -files = [ - {file = "opensearch-py-1.1.0.tar.gz", hash = "sha256:7d0c41cea61fedc34542be7fb9169931360134cf823c596f719106c3bd8466fe"}, - {file = "opensearch_py-1.1.0-py2.py3-none-any.whl", hash = "sha256:cb573546fb373dac8091be9b8eac2ba8da277713eea4b50b4a49ccd30dec25f1"}, -] - -[package.dependencies] -certifi = "*" -urllib3 = ">=1.21.1,<2" - -[package.extras] -async = ["aiohttp (>=3,<4)"] -develop = ["black", "botocore", "coverage", "jinja2", "mock", "myst-parser", "pytest", "pytest-cov", "pyyaml", "requests (>=2.0.0,<3.0.0)", "sphinx", "sphinx-copybutton", "sphinx-rtd-theme"] -docs = ["myst-parser", "sphinx", "sphinx-copybutton", "sphinx-rtd-theme"] -requests = ["requests (>=2.4.0,<3.0.0)"] - [[package]] name = "packaging" version = "23.2" description = "Core utilities for Python packages" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1928,6 +1983,7 @@ files = [ name = "pandas" version = "1.5.3" description = "Powerful data structures for data analysis, time series, and statistics" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1963,8 +2019,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.20.3", markers = "python_version < \"3.10\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, ] python-dateutil = ">=2.8.1" pytz = ">=2020.1" @@ -1976,6 +2032,7 @@ test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"] name = "parse" version = "1.20.0" description = "parse() is the opposite of format()" +category = "main" optional = false python-versions = "*" files = [ @@ -1987,6 +2044,7 @@ files = [ name = "passlib" version = "1.7.4" description = "comprehensive password hashing framework supporting over 30 schemes" +category = "main" optional = false python-versions = "*" files = [ @@ -2004,6 +2062,7 @@ totp = ["cryptography"] name = "pathspec" version = "0.12.1" description = "Utility library for gitignore style pattern matching of file paths." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -2015,6 +2074,7 @@ files = [ name = "pillow" version = "10.2.0" description = "Python Imaging Library (Fork)" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2100,6 +2160,7 @@ xmp = ["defusedxml"] name = "platformdirs" version = "4.1.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -2115,6 +2176,7 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-co name = "pluggy" version = "1.3.0" description = "plugin and hook calling mechanisms for python" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -2130,6 +2192,7 @@ testing = ["pytest", "pytest-benchmark"] name = "pre-commit" version = "2.21.0" description = "A framework for managing and maintaining multi-language pre-commit hooks." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2148,6 +2211,7 @@ virtualenv = ">=20.10.0" name = "psycopg2-binary" version = "2.9.9" description = "psycopg2 - Python-PostgreSQL Database Adapter" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2229,6 +2293,7 @@ files = [ name = "pyarrow" version = "14.0.2" description = "Python library for Apache Arrow" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2277,6 +2342,7 @@ numpy = ">=1.16.6" name = "pyarrow-hotfix" version = "0.6" description = "" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -2288,6 +2354,7 @@ files = [ name = "pycparser" version = "2.21" description = "C parser in Python" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -2299,6 +2366,7 @@ files = [ name = "pydantic" version = "2.5.3" description = "Data validation using Python type hints" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2318,6 +2386,7 @@ email = ["email-validator (>=2.0.0)"] name = "pydantic-core" version = "2.14.6" description = "" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2435,6 +2504,7 @@ typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" name = "pyjwt" version = "2.8.0" description = "JSON Web Token implementation in Python" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2452,6 +2522,7 @@ tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] name = "pyright" version = "1.1.345" description = "Command line wrapper for pyright" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2470,6 +2541,7 @@ dev = ["twine (>=3.4.1)"] name = "pytest" version = "7.4.4" description = "pytest: simple powerful testing with Python" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2492,6 +2564,7 @@ testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "no name = "pytest-alembic" version = "0.10.7" description = "A pytest plugin for verifying alembic migrations." +category = "dev" optional = false python-versions = ">=3.6,<4" files = [ @@ -2508,6 +2581,7 @@ sqlalchemy = "*" name = "pytest-asyncio" version = "0.18.3" description = "Pytest support for asyncio" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2526,6 +2600,7 @@ testing = ["coverage (==6.2)", "flaky (>=3.5.0)", "hypothesis (>=5.7.1)", "mypy name = "pytest-mock" version = "3.12.0" description = "Thin-wrapper around the mock package for easier use with pytest" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -2543,6 +2618,7 @@ dev = ["pre-commit", "pytest-asyncio", "tox"] name = "python-dateutil" version = "2.8.2" description = "Extensions to the standard Python datetime module" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ @@ -2557,6 +2633,7 @@ six = ">=1.5" name = "python-dotenv" version = "0.19.2" description = "Read key-value pairs from a .env file and set them as environment variables" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -2571,6 +2648,7 @@ cli = ["click (>=5.0)"] name = "python-multipart" version = "0.0.5" description = "A streaming multipart parser for Python" +category = "main" optional = false python-versions = "*" files = [ @@ -2584,6 +2662,7 @@ six = ">=1.4.0" name = "python-slugify" version = "6.1.2" description = "A Python slugify application that also handles Unicode" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -2601,6 +2680,7 @@ unidecode = ["Unidecode (>=1.1.1)"] name = "pytz" version = "2023.3.post1" description = "World timezone definitions, modern and historical" +category = "main" optional = false python-versions = "*" files = [ @@ -2612,6 +2692,7 @@ files = [ name = "pyvespa" version = "0.37.1" description = "Python API for vespa.ai" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2633,6 +2714,7 @@ typing-extensions = "*" name = "pywin32" version = "306" description = "Python for Window Extensions" +category = "main" optional = false python-versions = "*" files = [ @@ -2656,6 +2738,7 @@ files = [ name = "pyyaml" version = "6.0.1" description = "YAML parser and emitter for Python" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2715,6 +2798,7 @@ files = [ name = "regex" version = "2023.12.25" description = "Alternative regular expression module, to replace re." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2817,6 +2901,7 @@ files = [ name = "requests" version = "2.31.0" description = "Python HTTP for Humans." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2838,6 +2923,7 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "requests-toolbelt" version = "0.10.1" description = "A utility belt for advanced users of python-requests" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -2852,6 +2938,7 @@ requests = ">=2.0.1,<3.0.0" name = "responses" version = "0.24.1" description = "A utility library for mocking out the `requests` Python library." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -2871,6 +2958,7 @@ tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=7.0.0)", "pytest-asy name = "rfc3986" version = "1.5.0" description = "Validating URI References per RFC 3986" +category = "main" optional = false python-versions = "*" files = [ @@ -2888,6 +2976,7 @@ idna2008 = ["idna"] name = "ruff" version = "0.0.291" description = "An extremely fast Python linter, written in Rust." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2914,6 +3003,7 @@ files = [ name = "s3transfer" version = "0.10.0" description = "An Amazon S3 Transfer Manager" +category = "main" optional = false python-versions = ">= 3.8" files = [ @@ -2931,6 +3021,7 @@ crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"] name = "safetensors" version = "0.4.1" description = "" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3050,6 +3141,7 @@ torch = ["safetensors[numpy]", "torch (>=1.10)"] name = "scikit-learn" version = "1.3.2" description = "A set of python modules for machine learning and data mining" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -3097,6 +3189,7 @@ tests = ["black (>=23.3.0)", "matplotlib (>=3.1.3)", "mypy (>=1.3)", "numpydoc ( name = "scipy" version = "1.11.4" description = "Fundamental algorithms for scientific computing in Python" +category = "main" optional = false python-versions = ">=3.9" files = [ @@ -3139,6 +3232,7 @@ test = ["asv", "gmpy2", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeo name = "sentence-transformers" version = "2.2.2" description = "Multilingual text embeddings" +category = "main" optional = false python-versions = ">=3.6.0" files = [ @@ -3161,6 +3255,7 @@ transformers = ">=4.6.0,<5.0.0" name = "sentencepiece" version = "0.1.99" description = "SentencePiece python wrapper" +category = "main" optional = false python-versions = "*" files = [ @@ -3215,6 +3310,7 @@ files = [ name = "setuptools" version = "69.0.3" description = "Easily download, build, install, upgrade, and uninstall Python packages" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -3231,6 +3327,7 @@ testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jar name = "six" version = "1.16.0" description = "Python 2 and 3 compatibility utilities" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -3242,6 +3339,7 @@ files = [ name = "sniffio" version = "1.3.0" description = "Sniff out which async library your code is running under" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3253,54 +3351,34 @@ files = [ name = "sqlalchemy" version = "1.4.51" description = "Database Abstraction Library" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ - {file = "SQLAlchemy-1.4.51-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:1a09d5bd1a40d76ad90e5570530e082ddc000e1d92de495746f6257dc08f166b"}, {file = "SQLAlchemy-1.4.51-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2be4e6294c53f2ec8ea36486b56390e3bcaa052bf3a9a47005687ccf376745d1"}, {file = "SQLAlchemy-1.4.51-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ca484ca11c65e05639ffe80f20d45e6be81fbec7683d6c9a15cd421e6e8b340"}, {file = "SQLAlchemy-1.4.51-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:0535d5b57d014d06ceeaeffd816bb3a6e2dddeb670222570b8c4953e2d2ea678"}, {file = "SQLAlchemy-1.4.51-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af55cc207865d641a57f7044e98b08b09220da3d1b13a46f26487cc2f898a072"}, - {file = "SQLAlchemy-1.4.51-cp310-cp310-win32.whl", hash = "sha256:7af40425ac535cbda129d9915edcaa002afe35d84609fd3b9d6a8c46732e02ee"}, - {file = "SQLAlchemy-1.4.51-cp310-cp310-win_amd64.whl", hash = "sha256:8d1d7d63e5d2f4e92a39ae1e897a5d551720179bb8d1254883e7113d3826d43c"}, - {file = "SQLAlchemy-1.4.51-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:eaeeb2464019765bc4340214fca1143081d49972864773f3f1e95dba5c7edc7d"}, {file = "SQLAlchemy-1.4.51-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7deeae5071930abb3669b5185abb6c33ddfd2398f87660fafdb9e6a5fb0f3f2f"}, {file = "SQLAlchemy-1.4.51-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0892e7ac8bc76da499ad3ee8de8da4d7905a3110b952e2a35a940dab1ffa550e"}, - {file = "SQLAlchemy-1.4.51-cp311-cp311-win32.whl", hash = "sha256:50e074aea505f4427151c286955ea025f51752fa42f9939749336672e0674c81"}, - {file = "SQLAlchemy-1.4.51-cp311-cp311-win_amd64.whl", hash = "sha256:3b0cd89a7bd03f57ae58263d0f828a072d1b440c8c2949f38f3b446148321171"}, - {file = "SQLAlchemy-1.4.51-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:a33cb3f095e7d776ec76e79d92d83117438b6153510770fcd57b9c96f9ef623d"}, {file = "SQLAlchemy-1.4.51-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6cacc0b2dd7d22a918a9642fc89840a5d3cee18a0e1fe41080b1141b23b10916"}, {file = "SQLAlchemy-1.4.51-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:245c67c88e63f1523e9216cad6ba3107dea2d3ee19adc359597a628afcabfbcb"}, - {file = "SQLAlchemy-1.4.51-cp312-cp312-win32.whl", hash = "sha256:8e702e7489f39375601c7ea5a0bef207256828a2bc5986c65cb15cd0cf097a87"}, - {file = "SQLAlchemy-1.4.51-cp312-cp312-win_amd64.whl", hash = "sha256:0525c4905b4b52d8ccc3c203c9d7ab2a80329ffa077d4bacf31aefda7604dc65"}, - {file = "SQLAlchemy-1.4.51-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:1980e6eb6c9be49ea8f89889989127daafc43f0b1b6843d71efab1514973cca0"}, {file = "SQLAlchemy-1.4.51-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ec7a0ed9b32afdf337172678a4a0e6419775ba4e649b66f49415615fa47efbd"}, {file = "SQLAlchemy-1.4.51-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:352df882088a55293f621328ec33b6ffca936ad7f23013b22520542e1ab6ad1b"}, {file = "SQLAlchemy-1.4.51-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:86a22143a4001f53bf58027b044da1fb10d67b62a785fc1390b5c7f089d9838c"}, {file = "SQLAlchemy-1.4.51-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c37bc677690fd33932182b85d37433845de612962ed080c3e4d92f758d1bd894"}, - {file = "SQLAlchemy-1.4.51-cp36-cp36m-win32.whl", hash = "sha256:d0a83afab5e062abffcdcbcc74f9d3ba37b2385294dd0927ad65fc6ebe04e054"}, - {file = "SQLAlchemy-1.4.51-cp36-cp36m-win_amd64.whl", hash = "sha256:a61184c7289146c8cff06b6b41807c6994c6d437278e72cf00ff7fe1c7a263d1"}, - {file = "SQLAlchemy-1.4.51-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:3f0ef620ecbab46e81035cf3dedfb412a7da35340500ba470f9ce43a1e6c423b"}, {file = "SQLAlchemy-1.4.51-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c55040d8ea65414de7c47f1a23823cd9f3fad0dc93e6b6b728fee81230f817b"}, {file = "SQLAlchemy-1.4.51-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38ef80328e3fee2be0a1abe3fe9445d3a2e52a1282ba342d0dab6edf1fef4707"}, {file = "SQLAlchemy-1.4.51-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f8cafa6f885a0ff5e39efa9325195217bb47d5929ab0051636610d24aef45ade"}, {file = "SQLAlchemy-1.4.51-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8f2df79a46e130235bc5e1bbef4de0583fb19d481eaa0bffa76e8347ea45ec6"}, - {file = "SQLAlchemy-1.4.51-cp37-cp37m-win32.whl", hash = "sha256:f2e5b6f5cf7c18df66d082604a1d9c7a2d18f7d1dbe9514a2afaccbb51cc4fc3"}, - {file = "SQLAlchemy-1.4.51-cp37-cp37m-win_amd64.whl", hash = "sha256:5e180fff133d21a800c4f050733d59340f40d42364fcb9d14f6a67764bdc48d2"}, - {file = "SQLAlchemy-1.4.51-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:7d8139ca0b9f93890ab899da678816518af74312bb8cd71fb721436a93a93298"}, {file = "SQLAlchemy-1.4.51-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb18549b770351b54e1ab5da37d22bc530b8bfe2ee31e22b9ebe650640d2ef12"}, {file = "SQLAlchemy-1.4.51-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:55e699466106d09f028ab78d3c2e1f621b5ef2c8694598242259e4515715da7c"}, {file = "SQLAlchemy-1.4.51-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2ad16880ccd971ac8e570550fbdef1385e094b022d6fc85ef3ce7df400dddad3"}, {file = "SQLAlchemy-1.4.51-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b97fd5bb6b7c1a64b7ac0632f7ce389b8ab362e7bd5f60654c2a418496be5d7f"}, - {file = "SQLAlchemy-1.4.51-cp38-cp38-win32.whl", hash = "sha256:cecb66492440ae8592797dd705a0cbaa6abe0555f4fa6c5f40b078bd2740fc6b"}, - {file = "SQLAlchemy-1.4.51-cp38-cp38-win_amd64.whl", hash = "sha256:39b02b645632c5fe46b8dd30755682f629ffbb62ff317ecc14c998c21b2896ff"}, - {file = "SQLAlchemy-1.4.51-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:b03850c290c765b87102959ea53299dc9addf76ca08a06ea98383348ae205c99"}, {file = "SQLAlchemy-1.4.51-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e646b19f47d655261b22df9976e572f588185279970efba3d45c377127d35349"}, {file = "SQLAlchemy-1.4.51-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3cf56cc36d42908495760b223ca9c2c0f9f0002b4eddc994b24db5fcb86a9e4"}, {file = "SQLAlchemy-1.4.51-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:0d661cff58c91726c601cc0ee626bf167b20cc4d7941c93c5f3ac28dc34ddbea"}, {file = "SQLAlchemy-1.4.51-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3823dda635988e6744d4417e13f2e2b5fe76c4bf29dd67e95f98717e1b094cad"}, - {file = "SQLAlchemy-1.4.51-cp39-cp39-win32.whl", hash = "sha256:b00cf0471888823b7a9f722c6c41eb6985cf34f077edcf62695ac4bed6ec01ee"}, - {file = "SQLAlchemy-1.4.51-cp39-cp39-win_amd64.whl", hash = "sha256:a055ba17f4675aadcda3005df2e28a86feb731fdcc865e1f6b4f209ed1225cba"}, {file = "SQLAlchemy-1.4.51.tar.gz", hash = "sha256:e7908c2025eb18394e32d65dd02d2e37e17d733cdbe7d78231c2b6d7eb20cdb9"}, ] @@ -3332,6 +3410,7 @@ sqlcipher = ["sqlcipher3_binary"] name = "sqlalchemy-utils" version = "0.38.3" description = "Various utility functions for SQLAlchemy." +category = "main" optional = false python-versions = "~=3.6" files = [ @@ -3360,6 +3439,7 @@ url = ["furl (>=0.4.1)"] name = "starlette" version = "0.27.0" description = "The little ASGI library that shines." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3378,6 +3458,7 @@ full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyam name = "surrogate" version = "0.1" description = "A Python micro-lib to create stubs for non-existing modules." +category = "dev" optional = false python-versions = "*" files = [ @@ -3388,6 +3469,7 @@ files = [ name = "sympy" version = "1.12" description = "Computer algebra system (CAS) in Python" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -3402,6 +3484,7 @@ mpmath = ">=0.19" name = "tenacity" version = "8.2.3" description = "Retry code until it succeeds" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3416,6 +3499,7 @@ doc = ["reno", "sphinx", "tornado (>=4.5)"] name = "text-unidecode" version = "1.3" description = "The most basic Text::Unidecode port" +category = "main" optional = false python-versions = "*" files = [ @@ -3427,6 +3511,7 @@ files = [ name = "threadpoolctl" version = "3.2.0" description = "threadpoolctl" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -3438,6 +3523,7 @@ files = [ name = "tokenizers" version = "0.15.0" description = "" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3553,6 +3639,7 @@ testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] name = "tomli" version = "2.0.1" description = "A lil' TOML parser" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3564,6 +3651,7 @@ files = [ name = "torch" version = "2.0.0" description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" +category = "main" optional = false python-versions = ">=3.8.0" files = [ @@ -3619,6 +3707,7 @@ opt-einsum = ["opt-einsum (>=3.3)"] name = "torchvision" version = "0.15.1" description = "image and video datasets and models for torch deep learning" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -3646,7 +3735,7 @@ files = [ [package.dependencies] numpy = "*" -pillow = ">=5.3.0,<8.3.dev0 || >=8.4.dev0" +pillow = ">=5.3.0,<8.3.0 || >=8.4.0" requests = "*" torch = "2.0.0" @@ -3657,6 +3746,7 @@ scipy = ["scipy"] name = "tqdm" version = "4.66.1" description = "Fast, Extensible Progress Meter" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3677,6 +3767,7 @@ telegram = ["requests"] name = "transformers" version = "4.36.2" description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" +category = "main" optional = false python-versions = ">=3.8.0" files = [ @@ -3745,6 +3836,7 @@ vision = ["Pillow (>=10.0.1,<=15.0)"] name = "triton" version = "2.0.0" description = "A language and compiler for custom Deep Learning operations" +category = "main" optional = false python-versions = "*" files = [ @@ -3782,6 +3874,7 @@ tutorials = ["matplotlib", "pandas", "tabulate"] name = "types-sqlalchemy" version = "1.4.53.38" description = "Typing stubs for SQLAlchemy" +category = "dev" optional = false python-versions = "*" files = [ @@ -3793,6 +3886,7 @@ files = [ name = "typing-extensions" version = "4.9.0" description = "Backported and Experimental Type Hints for Python 3.8+" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -3804,6 +3898,7 @@ files = [ name = "urllib3" version = "1.26.18" description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -3820,6 +3915,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] name = "uvicorn" version = "0.20.0" description = "The lightning-fast ASGI server." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3834,7 +3930,7 @@ h11 = ">=0.8" httptools = {version = ">=0.5.0", optional = true, markers = "extra == \"standard\""} python-dotenv = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} pyyaml = {version = ">=5.1", optional = true, markers = "extra == \"standard\""} -uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "(sys_platform != \"win32\" and sys_platform != \"cygwin\") and platform_python_implementation != \"PyPy\" and extra == \"standard\""} +uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\" and extra == \"standard\""} watchfiles = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} websockets = {version = ">=10.4", optional = true, markers = "extra == \"standard\""} @@ -3845,6 +3941,7 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", name = "uvloop" version = "0.19.0" description = "Fast implementation of asyncio event loop on top of libuv" +category = "main" optional = false python-versions = ">=3.8.0" files = [ @@ -3889,6 +3986,7 @@ test = ["Cython (>=0.29.36,<0.30.0)", "aiohttp (==3.9.0b0)", "aiohttp (>=3.8.1)" name = "virtualenv" version = "20.25.0" description = "Virtual Python Environment builder" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3909,6 +4007,7 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess name = "watchfiles" version = "0.21.0" description = "Simple, modern and high performance file watching and code reload in python." +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -3996,6 +4095,7 @@ anyio = ">=3.0.0" name = "websockets" version = "12.0" description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -4077,6 +4177,7 @@ files = [ name = "werkzeug" version = "2.1.2" description = "The comprehensive WSGI web application library." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -4091,6 +4192,7 @@ watchdog = ["watchdog"] name = "wheel" version = "0.42.0" description = "A built-package format for Python" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4105,6 +4207,7 @@ test = ["pytest (>=6.0.0)", "setuptools (>=65)"] name = "xmltodict" version = "0.13.0" description = "Makes working with XML feel like you are working with JSON" +category = "dev" optional = false python-versions = ">=3.4" files = [ @@ -4116,6 +4219,7 @@ files = [ name = "xxhash" version = "3.4.1" description = "Python binding for xxHash" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4233,6 +4337,7 @@ files = [ name = "yarl" version = "1.9.4" description = "Yet another URL library" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4335,4 +4440,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "b436088fa7d16511af2429ebfed4528de9234fe23ea5264636acde4941d47b39" +content-hash = "4ecf6b1ccb1d80fc3125883ca60fc23cd80a294a763bceca22bc5dd929464dfa" diff --git a/pyproject.toml b/pyproject.toml index ae79d707..0d42849b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,6 @@ fastapi-pagination = { extras = ["sqlalchemy"], version = "^0.9.1" } httpx = "^0.22.0" itsdangerous = "^2.1.0" json-logging = "^1.3.0" -opensearch-py = "^1.1.0" pandas = "^1.4.1" passlib = "^1.7.4" psycopg2-binary = "^2.9.3" @@ -64,7 +63,6 @@ env_files = """ markers = [ "cors", "search", - "opensearch", "unit", ] asyncio_mode = "strict" diff --git a/tests/conftest.py b/tests/conftest.py index 3c02f039..af1528fc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,7 +13,6 @@ from app.core import security from app.core.aws import S3Client, get_s3_client -from app.core.search import OpenSearchConfig, OpenSearchConnection from app.db.models.app import AppUser from app.db.session import Base, get_db from app.main import app @@ -50,20 +49,6 @@ def test_s3_client(s3_document_bucket_names): yield s3_client -@pytest.fixture(scope="session") -def test_opensearch(): - """Provide a test OpenSearch DB""" - connection = OpenSearchConnection( - OpenSearchConfig( - url=os.environ["OPENSEARCH_URL"], - username=os.environ["OPENSEARCH_USER"], - password=os.environ["OPENSEARCH_PASSWORD"], - index_prefix=f"{os.environ['OPENSEARCH_INDEX_PREFIX']}_test", - ) - ) - yield connection - - @pytest.fixture(scope="session") def test_vespa(): """Connect to local vespa instance""" diff --git a/tests/routes/test_search.py b/tests/routes/test_search.py deleted file mode 100644 index a7433190..00000000 --- a/tests/routes/test_search.py +++ /dev/null @@ -1,1632 +0,0 @@ -import csv -import json -import random -import time -from datetime import datetime -from io import StringIO -from pathlib import Path -from typing import Any, Mapping, Sequence, cast -import httpx - -import pytest -from sqlalchemy import update -from sqlalchemy.orm import Session - -from app.api.api_v1.routers import search -from app.api.api_v1.schemas.search import ( - FilterField, - IncludedResults, - SortOrder, - SearchRequestBody, -) -from app.core.search import _FILTER_FIELD_MAP, OpenSearchQueryConfig -from tests.core.ingestion.legacy_setup.utils import get_or_create -from app.data_migrations.taxonomy_cclw import get_cclw_taxonomy -from app.db.models.app import Organisation -from app.db.models.law_policy.family import ( - DocumentStatus, - EventStatus, - FamilyCategory, - Family, - FamilyDocument, - FamilyDocumentType, - FamilyEvent, - FamilyEventType, - FamilyOrganisation, - Geography, - Slug, - Variant, -) -from app.db.models.law_policy.metadata import ( - FamilyMetadata, - MetadataTaxonomy, - MetadataOrganisation, -) -from app.db.models.document.physical_document import ( - Language, - LanguageSource, - PhysicalDocument, - PhysicalDocumentLanguage, -) -from app.initial_data import populate_geography, populate_language, populate_taxonomy - -SEARCH_ENDPOINT = "/api/v1/searches" -CSV_DOWNLOAD_ENDPOINT = "/api/v1/searches/download-csv" -_EXPECTED_FAMILY_TITLE = "Decision No 1386/2013/EU" - - -def clean_response(r: httpx.Response) -> dict: - new_r = r.json() - del new_r["query_time_ms"] - del new_r["total_time_ms"] - return new_r - - -def _populate_search_db_families(db: Session) -> None: - documents: dict[str, FamilyDocument] = {} - families: dict[str, Family] = {} - - populate_language(db) - populate_geography(db) - populate_taxonomy(db) - - original = Variant(variant_name="Original Language", description="") - translated = Variant(variant_name="Official Translation", description="") - variants: dict[str, Variant] = { - "translated_True": translated, - "translated_False": original, - } - organisation = Organisation( - name="CCLW", description="CCLW", organisation_type="CCLW Type" - ) - family_event_type = FamilyEventType( - name="Passed/Approved", - description="", - ) - db.add(family_event_type) - db.add(original) - db.add(translated) - db.add(organisation) - db.commit() - db.refresh(organisation) - - cclw_taxonomy_data = get_cclw_taxonomy() - - containing_dir = Path(__file__).parent - data_dir = containing_dir.parent / "data" - for f in data_dir.iterdir(): - if f.is_file() and f.suffixes == [".json"]: - with open(f, "r") as of: - for line in of.readlines(): - search_document = json.loads(line) - _create_family_structures( - db, - search_document, - documents, - families, - variants, - organisation, - cclw_taxonomy_data, - ) - - -def _generate_metadata( - cclw_taxonomy_data: Mapping[str, dict] -) -> Mapping[str, Sequence[str]]: - meta_value = {} - for k in cclw_taxonomy_data: - element_count = random.randint(0, 3) - meta_value[k] = random.sample( - cclw_taxonomy_data[k]["allowed_values"], element_count - ) - return meta_value - - -def _create_family_structures( - db: Session, - doc: dict[str, Any], - documents: dict[str, FamilyDocument], - families: dict[str, Family], - variants: dict[str, Variant], - organisation: Organisation, - cclw_taxonomy_data: Mapping[str, dict], -) -> None: - """Populate a db to match the test search index code""" - doc_details = doc["_source"] - doc_id = doc_details["document_id"] - if doc_id in documents: - return - - doc_type = get_or_create( - db, - FamilyDocumentType, - **{ - "name": doc_details["document_type"], - "description": doc_details["document_type"], - }, - ) - - doc_id_components = doc_id.split(".") - family_id = f"CCLW.family.{doc_id_components[2]}.0" # assume single family - - if family_id not in families: - family = Family( - # Truncate the family name to produce the same "family name" for the example - # data where we have engineered 2 documents into a single family. - title=doc_details["document_name"][:24], - import_id=family_id, - description=doc_details["document_description"], - geography_id=( - db.query(Geography) - .filter(Geography.value == doc_details["document_geography"]) - .one() - .id - ), - family_category=FamilyCategory(doc_details["document_category"]), - ) - family_slug = Slug( - name=family_id, - family_import_id=family_id, - family_document_import_id=None, - ) - family_organisation = FamilyOrganisation( - family_import_id=family_id, - organisation_id=organisation.id, - ) - db.add(family) - db.commit() - db.add(family_slug) - db.add(family_organisation) - db.commit() - db.refresh(family) - families[family_id] = family - - # Make sure we add an event so we can filter by date - family_event = FamilyEvent( - import_id=f"CCLW.event.{doc_id_components[2]}.0", - title=f"CCLW.family.{doc_id_components[2]}.0 Event", - date=datetime.strptime(doc_details["document_date"], "%d/%m/%Y"), - event_type_name="Passed/Approved", - family_import_id=family_id, - family_document_import_id=None, - status=EventStatus.OK, - ) - db.add(family_event) - db.commit() - - metadata_value = _generate_metadata(cclw_taxonomy_data) - - family_metadata = FamilyMetadata( - family_import_id=family.import_id, - taxonomy_id=( - db.query(MetadataTaxonomy) - .join( - MetadataOrganisation, - MetadataOrganisation.taxonomy_id == MetadataTaxonomy.id, - ) - .join( - Organisation, - MetadataOrganisation.organisation_id == Organisation.id, - ) - .filter(Organisation.name == "CCLW") - .one() - .id - ), - value=metadata_value, - ) - db.add(family_metadata) - db.commit() - - physical_document = PhysicalDocument( - title=doc_details["document_name"], - cdn_object=doc_details["document_cdn_object"], - md5_sum=doc_details["document_md5_sum"], - source_url=doc_details["document_source_url"], - content_type=doc_details["document_content_type"], - ) - db.add(physical_document) - db.commit() - db.refresh(physical_document) - # TODO: better handling of document language! - existing_language = db.query(Language).filter(Language.name == "English").one() - physical_document_language = PhysicalDocumentLanguage( - language_id=existing_language.id, - document_id=physical_document.id, - source=LanguageSource.USER, - visible=True, - ) - db.add(physical_document_language) - db.commit() - db.refresh(physical_document_language) - db.refresh(physical_document) - family_document = FamilyDocument( - family_import_id=family_id, - physical_document_id=physical_document.id, - import_id=doc_id, - variant_name=variants[f"translated_{doc_details['translated']}"].variant_name, - document_status=DocumentStatus.PUBLISHED, - document_type=doc_type.name, - ) - family_document_slug = Slug( - name=f"fd_{doc_id}", - family_import_id=None, - family_document_import_id=doc_id, - ) - db.add(family_document) - db.commit() - db.add(family_document_slug) - db.commit() - db.refresh(family_document) - documents[doc_id] = family_document - - -@pytest.mark.opensearch -def test_slug_is_from_family_document(test_opensearch, client, test_db, monkeypatch): - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - - page1_response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "and", - "exact_match": False, - "limit": 2, - "offset": 0, - }, - params={"use_vespa": False}, - ) - assert page1_response.status_code == 200 - - page1_response_body = page1_response.json() - fam1 = page1_response_body["families"][0] - doc1 = fam1["family_documents"][0] - assert doc1["document_slug"].startswith("fd_") and "should be from FamilyDocument" - - -@pytest.mark.opensearch -def test_simple_pagination_families(test_opensearch, client, test_db, monkeypatch): - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - - page1_response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "and", - "exact_match": False, - "limit": 2, - "offset": 0, - }, - params={"use_vespa": False}, - ) - assert page1_response.status_code == 200 - - page1_response_body = page1_response.json() - page1_families = page1_response_body["families"] - assert len(page1_families) == 2 - - page2_response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "and", - "exact_match": False, - "limit": 2, - "offset": 2, - }, - params={"use_vespa": False}, - ) - assert page2_response.status_code == 200 - - page2_response_body = page2_response.json() - page2_families = page2_response_body["families"] - assert len(page2_families) == 2 - - # Sanity check that we really do have 4 different documents - family_slugs = {d["family_slug"] for d in page1_families} | { - d["family_slug"] for d in page2_families - } - - assert len(family_slugs) == 4 - - for d in page1_families: - assert d not in page2_families - - -@pytest.mark.opensearch -@pytest.mark.parametrize("exact_match", [True, False]) -def test_search_body_valid(exact_match, test_opensearch, monkeypatch, client, test_db): - """Test a simple known valid search responds with success.""" - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - - response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "disaster", - "exact_match": exact_match, - }, - params={"use_vespa": False}, - ) - assert response.status_code == 200 - - -@pytest.mark.opensearch -def test_benchmark_families_search(test_opensearch, monkeypatch, client, test_db): - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - - times = [] - for _ in range(1, 10): - response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "climate", - "exact_match": True, - }, - params={"use_vespa": False}, - ) - assert response.status_code == 200 - time_taken = response.json()["total_time_ms"] - times.append(str(time_taken)) - - with open("/data/benchmark_search.txt", "w") as out_file: - out_file.write("\n".join(times)) - - -@pytest.mark.opensearch -def test_benchmark_families_browse(test_opensearch, monkeypatch, client, test_db): - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - - times = [] - for _ in range(1, 10): - response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "", - }, - params={"use_vespa": False}, - ) - assert response.status_code == 200 - time_taken = response.json()["total_time_ms"] - times.append(str(time_taken)) - - with open("/data/benchmark_browse.txt", "w") as out_file: - out_file.write("\n".join(times)) - - -@pytest.mark.opensearch -def test_families_search(test_opensearch, monkeypatch, client, test_db, mocker): - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - - expected_config = OpenSearchQueryConfig() - expected_search_body = SearchRequestBody( - query_string="climate", - exact_match=True, - max_passages_per_doc=10, - keyword_filters=None, - year_range=None, - sort_field=None, - sort_order=SortOrder.DESCENDING, - limit=10, - offset=0, - ) - query_spy = mocker.spy(search._OPENSEARCH_CONNECTION, "query_families") - - response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "climate", - "exact_match": True, - }, - params={"use_vespa": False}, - ) - assert response.status_code == 200 - # Ensure nothing has/is going on in the background - assert query_spy.call_count == 1 # Called once as not using jit search - - actual_search_body = query_spy.mock_calls[0].kwargs["search_request_body"] - assert actual_search_body == expected_search_body - - # Check default config is used - actual_config = query_spy.mock_calls[0].kwargs["opensearch_internal_config"] - assert actual_config == expected_config - - # Check the correct number of hits is returned - data = response.json() - assert data["hits"] == 3 - assert len(data["families"]) == 3 - - names_returned = [f["family_name"] for f in data["families"]] - assert _EXPECTED_FAMILY_TITLE in names_returned - - -@pytest.mark.opensearch -def test_families_search_with_all_docs_deleted( - test_opensearch, monkeypatch, client, test_db -): - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - # This test is fragile due to _EXPECTED_FAMILY_TITLE being generated in the - # populate db function from imperfect data. Ye be warned! 🏴‍☠️ - family = test_db.query(Family).filter(Family.title == _EXPECTED_FAMILY_TITLE).one() - for doc in family.family_documents: - test_db.execute( - update(FamilyDocument) - .where(FamilyDocument.import_id == doc.import_id) - .values(document_status="Deleted") - ) - - response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "climate", - "exact_match": False, - }, - params={"use_vespa": False}, - ) - assert response.status_code == 200 - - response2 = client.get(f"/api/v1/documents/{family.import_id}") - assert response2.status_code == 404 - - # Check the correct number of hits is returned - data = response.json() - assert data["hits"] == 2 - assert len(data["families"]) == 2 - names_returned = [f["family_name"] for f in data["families"]] - assert _EXPECTED_FAMILY_TITLE not in names_returned - - -@pytest.mark.opensearch -def test_families_search_with_one_doc_deleted( - test_opensearch, monkeypatch, client, test_db -): - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - # This test is fragile due to _EXPECTED_FAMILY_TITLE being generated in the - # populate db function from imperfect data. Ye be warned! 🏴‍☠️ - family = test_db.query(Family).filter(Family.title == _EXPECTED_FAMILY_TITLE).one() - doc = family.family_documents[0] - test_db.execute( - update(FamilyDocument) - .where(FamilyDocument.import_id == doc.import_id) - .values(document_status="Deleted") - ) - deleted_title = doc.physical_document.title - - response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "climate", - "exact_match": True, - }, - params={"use_vespa": False}, - ) - - assert response.status_code == 200 - - # Check the correct number of hits is returned - data = response.json() - assert data["hits"] == 3 - assert len(data["families"]) == 3 - names_returned = [f["family_name"] for f in data["families"]] - assert _EXPECTED_FAMILY_TITLE in names_returned - - # Check the deleted document is not returned but the non-deleted one is - found = False - for fam in data["families"]: - if fam["family_name"] == _EXPECTED_FAMILY_TITLE: - found = True - doc_titles = [d["document_title"] for d in fam["family_documents"]] - assert len(doc_titles) == 1 - assert deleted_title not in doc_titles - - assert found - - -@pytest.mark.opensearch -def test_keyword_filters(test_opensearch, client, test_db, monkeypatch, mocker): - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - - query_spy = mocker.spy(search._OPENSEARCH_CONNECTION, "raw_query") - response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "climate", - "exact_match": False, - "keyword_filters": {"countries": ["kenya"]}, - "jit_query": "disabled", - }, - params={"use_vespa": False}, - ) - assert response.status_code == 200 - assert query_spy.call_count == 1 - query_body = query_spy.mock_calls[0].args[0] - - assert { - "terms": {_FILTER_FIELD_MAP[FilterField("countries")]: ["KEN"]} - } in query_body["query"]["bool"]["filter"] - - -@pytest.mark.opensearch -def test_keyword_filters_region(test_opensearch, test_db, monkeypatch, client, mocker): - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - - query_spy = mocker.spy(search._OPENSEARCH_CONNECTION, "raw_query") - response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "climate", - "exact_match": False, - "keyword_filters": {"regions": ["south-asia"]}, - "jit_query": "disabled", - }, - params={"use_vespa": False}, - ) - assert response.status_code == 200 - assert query_spy.call_count == 1 - query_body = query_spy.mock_calls[0].args[0] - - assert { - "terms": { - _FILTER_FIELD_MAP[FilterField.COUNTRY]: [ - "AFG", - "BGD", - "BTN", - "IND", - "LKA", - "MDV", - "NPL", - "PAK", - ] - } - } in query_body["query"]["bool"]["filter"] - - # Only country filters should be added - query_term_keys = [] - for d in query_body["query"]["bool"]["filter"]: - search_term_dict = d["terms"] - query_term_keys.extend(search_term_dict.keys()) - - assert [_FILTER_FIELD_MAP[FilterField.COUNTRY]] == query_term_keys - - -@pytest.mark.opensearch -def test_keyword_filters_region_invalid( - test_opensearch, monkeypatch, client, test_db, mocker -): - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - - query_spy = mocker.spy(search._OPENSEARCH_CONNECTION, "raw_query") - response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "climate", - "exact_match": False, - "keyword_filters": {"regions": ["daves-region"]}, - "jit_query": "disabled", - }, - params={"use_vespa": False}, - ) - assert response.status_code == 200 - assert query_spy.call_count == 1 - query_body = query_spy.mock_calls[0].args[0] - - # The region is invalid, so no filters should be applied - assert "filter" not in query_body["query"]["bool"] - - -@pytest.mark.opensearch -def test_invalid_keyword_filters(test_opensearch, test_db, monkeypatch, client): - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - - response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "disaster", - "exact_match": False, - "keyword_filters": { - "geographies": ["kenya"], - "unknown_filter_no1": ["BOOM"], - }, - }, - params={"use_vespa": False}, - ) - assert response.status_code == 422 - - -@pytest.mark.opensearch -@pytest.mark.parametrize( - "year_range", [(None, None), (1900, None), (None, 2020), (1900, 2020)] -) -def test_year_range_filters( - year_range, - test_opensearch, - monkeypatch, - client, - test_db, - mocker, -): - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - - query_spy = mocker.spy(search._OPENSEARCH_CONNECTION, "raw_query") - response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "disaster", - "exact_match": False, - "year_range": year_range, - "jit_query": "disabled", - }, - params={"use_vespa": False}, - ) - query_body = query_spy.mock_calls[0].args[0] - - assert response.status_code == 200 - assert query_spy.call_count == 1 - # Check that search query default order is not modified unless requested - assert query_body["aggs"]["sample"]["aggs"]["top_docs"]["terms"]["order"] == { - "top_hit": "desc" - } - - if year_range[0] or year_range[1]: - expected_range_check = { - "range": { - "document_date": dict( - [ - r - for r in zip( - ["gte", "lte"], - [ - f"01/01/{year_range[0]}" - if year_range[0] is not None - else None, - f"31/12/{year_range[1]}" - if year_range[1] is not None - else None, - ], - ) - if r[1] is not None - ] - ) - } - } - - assert expected_range_check in query_body["query"]["bool"]["filter"] - else: - assert "filter" not in query_body["query"]["bool"] - - -@pytest.mark.opensearch -def test_multiple_filters(test_opensearch, test_db, monkeypatch, client, mocker): - """Check that multiple filters are successfully applied""" - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - - query_spy = mocker.spy(search._OPENSEARCH_CONNECTION, "raw_query") - response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "greenhouse", - "exact_match": False, - "keyword_filters": { - "countries": ["south-korea"], - "sources": ["CCLW"], - "categories": ["Legislative"], - }, - "year_range": (1900, 2020), - "jit_query": "disabled", - }, - params={"use_vespa": False}, - ) - assert response.status_code == 200 - assert query_spy.call_count == 1 - query_body = query_spy.mock_calls[0].args[0] - - assert { - "terms": {_FILTER_FIELD_MAP[FilterField("countries")]: ["KOR"]} - } in query_body["query"]["bool"]["filter"] - assert { - "terms": {_FILTER_FIELD_MAP[FilterField("sources")]: ["CCLW"]} - } in query_body["query"]["bool"]["filter"] - assert { - "terms": {_FILTER_FIELD_MAP[FilterField("categories")]: ["Legislative"]} - } in query_body["query"]["bool"]["filter"] - assert { - "range": {"document_date": {"gte": "01/01/1900", "lte": "31/12/2020"}} - } in query_body["query"]["bool"]["filter"] - - response_content = response.json() - assert response_content["hits"] > 0 - assert len(response.json()["families"]) > 0 - families = response_content["families"] - for family in families: - assert family["family_category"] == "Legislative" - - -@pytest.mark.opensearch -def test_result_order_score(test_opensearch, monkeypatch, client, test_db, mocker): - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - - query_spy = mocker.spy(search._OPENSEARCH_CONNECTION, "raw_query") - response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "disaster", - "exact_match": False, - }, - params={"use_vespa": False}, - ) - assert response.status_code == 200 - query_response = query_spy.spy_return.raw_response - result_docs = query_response["aggregations"]["sample"]["top_docs"]["buckets"] - - s = None - for d in result_docs: - new_s = d["top_hit"]["value"] - if s is not None: - assert new_s <= s - s = new_s - - -@pytest.mark.opensearch -@pytest.mark.parametrize("order", [SortOrder.ASCENDING, SortOrder.DESCENDING]) -def test_result_order_date(test_opensearch, monkeypatch, client, test_db, order): - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - - response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "climate", - "exact_match": False, - "sort_field": "date", - "sort_order": order.value, - }, - params={"use_vespa": False}, - ) - assert response.status_code == 200 - - response_body = response.json() - elements = response_body["families"] - assert len(elements) > 1 - - dt = None - for e in elements: - new_dt = datetime.fromisoformat(e["family_date"]) - if dt is not None: - if order == SortOrder.DESCENDING: - assert new_dt <= dt - if order == SortOrder.ASCENDING: - assert new_dt >= dt - dt = new_dt - - -@pytest.mark.opensearch -@pytest.mark.parametrize("order", [SortOrder.ASCENDING, SortOrder.DESCENDING]) -def test_result_order_title(test_opensearch, monkeypatch, client, test_db, order): - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - - response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "climate", - "exact_match": False, - "sort_field": "title", - "sort_order": order.value, - }, - params={"use_vespa": False}, - ) - assert response.status_code == 200 - - response_body = response.json() - elements = response_body["families"] - assert len(elements) > 1 - - t = None - for e in elements: - new_t = e["family_name"] - if t is not None: - if order == SortOrder.DESCENDING: - assert new_t <= t - if order == SortOrder.ASCENDING: - assert new_t >= t - t = new_t - - -@pytest.mark.opensearch -def test_invalid_request(test_opensearch, monkeypatch, client, test_db): - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - - response = client.post( - SEARCH_ENDPOINT, - json={"exact_match": False}, - ) - assert response.status_code == 422 - - response = client.post( - SEARCH_ENDPOINT, - json={"limit": 1, "offset": 2}, - ) - assert response.status_code == 422 - - response = client.post( - SEARCH_ENDPOINT, - json={}, - ) - assert response.status_code == 422 - - -@pytest.mark.opensearch -def test_case_insensitivity(test_opensearch, monkeypatch, client, test_db): - """Make sure that query string results are not affected by case.""" - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - - response1 = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "climate", - "exact_match": False, - }, - params={"use_vespa": False}, - ) - response2 = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "climate", - "exact_match": False, - }, - params={"use_vespa": False}, - ) - response3 = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "climate", - "exact_match": False, - }, - params={"use_vespa": False}, - ) - - response1_json = clean_response(response1) - response2_json = clean_response(response2) - response3_json = clean_response(response3) - - assert response1_json["families"] - assert response1_json == response2_json == response3_json - - -@pytest.mark.opensearch -def test_punctuation_ignored(test_opensearch, monkeypatch, client, test_db): - """Make sure that punctuation in query strings is ignored.""" - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - - response1 = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "climate.", - "exact_match": False, - }, - params={"use_vespa": False}, - ) - response2 = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "climate, ", - "exact_match": False, - }, - params={"use_vespa": False}, - ) - response3 = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": ";climate", - "exact_match": False, - }, - params={"use_vespa": False}, - ) - - response1_json = clean_response(response1) - response2_json = clean_response(response2) - response3_json = clean_response(response3) - - assert response1_json["families"] - assert response1_json == response2_json == response3_json - - -@pytest.mark.opensearch -def test_sensitive_queries(test_db, test_opensearch, monkeypatch, client): - """Make sure that queries in the list of sensitive queries only return results containing that term, and not KNN results.""" - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - - response1 = client.post( - SEARCH_ENDPOINT, - json={"query_string": "spain", "exact_match": False}, - params={"use_vespa": False}, - ) - - response2 = client.post( - SEARCH_ENDPOINT, - json={"query_string": "clean energy strategy", "exact_match": False}, - params={"use_vespa": False}, - ) - - # In this example the sensitive term is less than half the length of the query, so KNN results should be returned - response3 = client.post( - SEARCH_ENDPOINT, - json={"query_string": "spanish ghg emissions", "exact_match": False}, - params={"use_vespa": False}, - ) - - response1_json = response1.json() - response2_json = response2.json() - response3_json = response3.json() - - # If the queries above return no results then the tests below are meaningless - assert len(response1_json["families"]) > 0 - assert len(response2_json["families"]) > 0 - assert len(response3_json["families"]) > 0 - - assert all( - [ - "spain" in passage_match["text"].lower() - for family in response1_json["families"] - for document in family["family_documents"] - for passage_match in document["document_passage_matches"] - ] - ) - assert not all( - [ - "clean energy strategy" in passage_match["text"].lower() - for family in response1_json["families"] - for document in family["family_documents"] - for passage_match in document["document_passage_matches"] - ] - ) - assert not all( - [ - "spanish ghg emissions" in passage_match["text"].lower() - for family in response1_json["families"] - for document in family["family_documents"] - for passage_match in document["document_passage_matches"] - ] - ) - - -@pytest.mark.opensearch -def test_accents_ignored(test_db, test_opensearch, monkeypatch, client): - """Make sure that accents in query strings are ignored.""" - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - - response1 = client.post( - SEARCH_ENDPOINT, - json={"query_string": "climàte", "exact_match": False}, - params={"use_vespa": False}, - ) - response2 = client.post( - SEARCH_ENDPOINT, - json={"query_string": "climatë", "exact_match": False}, - params={"use_vespa": False}, - ) - response3 = client.post( - SEARCH_ENDPOINT, - json={"query_string": "climàtë", "exact_match": False}, - params={"use_vespa": False}, - ) - - response1_json = clean_response(response1) - response2_json = clean_response(response2) - response3_json = clean_response(response3) - assert response1_json["families"] - assert response1_json == response2_json == response3_json - - -@pytest.mark.opensearch -def test_time_taken(test_opensearch, monkeypatch, client): - """Make sure that query time taken is sensible.""" - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - - start = time.time() - response = client.post( - SEARCH_ENDPOINT, - json={"query_string": "disaster", "exact_match": False}, - params={"use_vespa": False}, - ) - end = time.time() - - assert response.status_code == 200 - response_json = response.json() - reported_response_time_ms = response_json["query_time_ms"] - expected_response_time_ms_max = 1000 * (end - start) - assert 0 < reported_response_time_ms < expected_response_time_ms_max - - -@pytest.mark.opensearch -def test_empty_search_term_performs_browse(client, test_db): - """Make sure that empty search term returns results in browse mode.""" - _populate_search_db_families(test_db) - - response = client.post( - SEARCH_ENDPOINT, - json={"query_string": ""}, - params={"use_vespa": False}, - ) - assert response.status_code == 200 - assert response.json()["hits"] > 0 - assert len(response.json()["families"]) > 0 - - -@pytest.mark.opensearch -@pytest.mark.parametrize("order", [SortOrder.ASCENDING, SortOrder.DESCENDING]) -def test_browse_order_by_title(client, test_db, order): - """Make sure that empty search terms return no results.""" - _populate_search_db_families(test_db) - - response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "", - "sort_field": "title", - "sort_order": order.value, - }, - params={"use_vespa": False}, - ) - assert response.status_code == 200 - - response_body = response.json() - result_elements = response_body["families"] - - assert len(result_elements) > 0 - - t = None - for e in result_elements: - new_t = e["family_name"] - if t is not None: - if order == SortOrder.DESCENDING: - assert new_t <= t - if order == SortOrder.ASCENDING: - assert new_t >= t - t = new_t - - -@pytest.mark.opensearch -@pytest.mark.parametrize("order", [SortOrder.ASCENDING, SortOrder.DESCENDING]) -@pytest.mark.parametrize("start_year", [None, 1999, 2007]) -@pytest.mark.parametrize("end_year", [None, 2011, 2018]) -def test_browse_order_by_date(order, start_year, end_year, client, test_db): - """Make sure that empty search terms return no results.""" - _populate_search_db_families(test_db) - - response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "", - "sort_field": "date", - "sort_order": order.value, - "year_range": [start_year, end_year], - }, - params={"use_vespa": False}, - ) - assert response.status_code == 200 - - response_body = response.json() - result_elements = response_body["families"] - assert len(result_elements) > 0 - - dt = None - new_dt = None - for e in result_elements: - if e["family_date"]: - new_dt = datetime.fromisoformat(e["family_date"]).isoformat() - if dt is not None and new_dt is not None: - if order == SortOrder.DESCENDING: - assert new_dt <= dt - if order == SortOrder.ASCENDING: - assert new_dt >= dt - if start_year is not None: - assert new_dt >= datetime(year=start_year, month=1, day=1).isoformat() - if end_year is not None: - assert new_dt <= datetime(year=end_year, month=12, day=31).isoformat() - dt = new_dt - - -@pytest.mark.opensearch -@pytest.mark.parametrize("limit", [1, 4, 7, 10]) -def test_browse_limit_offset(client, test_db, limit): - """Make sure that the offset parameter in browse mode works.""" - _populate_search_db_families(test_db) - - response_offset_0 = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "", - "limit": limit, - "offset": 0, - }, - params={"use_vespa": False}, - ) - response_offset_2 = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "", - "limit": limit, - "offset": 2, - }, - params={"use_vespa": False}, - ) - - assert response_offset_0.status_code == 200 - assert response_offset_2.status_code == 200 - - response_offset_0_body = response_offset_0.json() - result_elements_0 = response_offset_0_body["families"] - assert len(result_elements_0) <= limit - - response_offset_2_body = response_offset_2.json() - result_elements_2 = response_offset_2_body["families"] - assert len(result_elements_2) <= limit - - assert result_elements_0[2 : len(result_elements_2)] == result_elements_2[:-2] - - -@pytest.mark.opensearch -def test_browse_filters(client, test_db): - """Check that multiple filters are successfully applied""" - _populate_search_db_families(test_db) - - response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "", - "keyword_filters": { - "countries": ["japan"], - "sources": ["CCLW"], - }, - "year_range": (1900, 2020), - "jit_query": "disabled", - }, - params={"use_vespa": False}, - ) - assert response.status_code == 200 - - response_body = response.json() - result_elements = response_body["families"] - assert len(result_elements) == 1 - - for result in result_elements: - result_date = result["family_date"] - assert result["family_source"] == "CCLW" - assert result["family_geography"] == "JPN" - assert result_date == "2017-01-01T00:00:00+00:00" - - -@pytest.mark.opensearch -def test_browse_filters_region(client, test_db): - """Check that multiple filters are successfully applied""" - _populate_search_db_families(test_db) - - response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "", - "keyword_filters": { - "regions": ["east-asia-pacific"], - "sources": ["CCLW"], - }, - "year_range": (1900, 2020), - "jit_query": "disabled", - }, - params={"use_vespa": False}, - ) - assert response.status_code == 200 - - response_body = response.json() - result_elements = response_body["families"] - assert len(result_elements) == 4 - geographies = [family["family_geography"] for family in result_elements] - assert set(geographies) == set(["JPN", "AUS", "IDN", "KOR"]) - - -@pytest.mark.opensearch -def test_browse_filters_region_and_geography(client, test_db): - """Check that multiple filters are successfully applied""" - _populate_search_db_families(test_db) - - response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "", - "keyword_filters": { - "regions": ["east-asia-pacific"], - "countries": ["japan"], - "sources": ["CCLW"], - }, - "year_range": (1900, 2020), - "jit_query": "disabled", - }, - params={"use_vespa": False}, - ) - assert response.status_code == 200 - - response_body = response.json() - result_elements = response_body["families"] - assert len(result_elements) == 4 - geographies = [family["family_geography"] for family in result_elements] - # TODO: I think it should behave like this: - # assert set(geographies) == set(['JPN']) - assert set(geographies) == set(["JPN", "AUS", "IDN", "KOR"]) - - -# TODO: This test will fail - as the countries expects a slug not an ISO -# value - this is in contrast to Opensearch which uses the same files but -# in this case the value will be an ISO. -# -# @pytest.mark.opensearch -# def test_browse_filters_geography_iso(client, test_db): -# """Check that multiple filters are successfully applied""" -# _populate_search_db_families(test_db) - -# response = client.post( -# SEARCH_ENDPOINT, -# json={ -# "query_string": "", -# "keyword_filters": { -# "countries": ["JPN"], -# "sources": ["CCLW"], -# }, -# "year_range": (1900, 2020), -# "jit_query": "disabled", -# }, -# ) -# assert response.status_code == 200 - -# response_body = response.json() -# result_elements = response_body["families"] -# assert len(result_elements) == 1 - -# geographies = [ -# family["family_geography"] -# for family in result_elements -# ] -# assert set(geographies) == set(['JPN']) - - -@pytest.mark.opensearch -def test_browse_filters_geography_slug(client, test_db): - """Check that multiple filters are successfully applied""" - _populate_search_db_families(test_db) - - response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "", - "keyword_filters": { - "countries": ["japan"], - "sources": ["CCLW"], - }, - "year_range": (1900, 2020), - "jit_query": "disabled", - }, - params={"use_vespa": False}, - ) - assert response.status_code == 200 - - response_body = response.json() - result_elements = response_body["families"] - assert len(result_elements) == 1 - geographies = [family["family_geography"] for family in result_elements] - assert set(geographies) == set(["JPN"]) - - -@pytest.mark.opensearch -def test_browse_filter_category(client, test_db): - """Make sure that empty search term returns results in browse mode.""" - _populate_search_db_families(test_db) - - response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "", - "keyword_filters": {"categories": ["Executive"]}, - }, - params={"use_vespa": False}, - ) - assert response.status_code == 200 - response_content = response.json() - assert response_content["hits"] > 0 - assert len(response.json()["families"]) > 0 - families = response_content["families"] - for family in families: - assert family["family_category"] == "Executive" - - -def _get_docs_for_family(db: Session, slug: str) -> Sequence[FamilyDocument]: - slug_object: Slug = db.query(Slug).filter(Slug.name == slug).one() - family: Family = ( - db.query(Family).filter(Family.import_id == slug_object.family_import_id).one() - ) - documents: Sequence[FamilyDocument] = ( - db.query(FamilyDocument) - .filter(FamilyDocument.family_import_id == family.import_id) - .all() - ) - - return documents - - -def _get_validation_data(db: Session, families: Sequence[dict]) -> dict[str, Any]: - return { - family["family_name"]: { - "family": family, - "documents": { - doc["document_title"]: doc for doc in family["family_documents"] - }, - "all_docs": { - doc.physical_document.title: doc - for doc in _get_docs_for_family(db, family["family_slug"]) - if doc.physical_document is not None - }, - "metadata": ( - db.query(FamilyMetadata) - .join(Slug, FamilyMetadata.family_import_id == Slug.family_import_id) - .filter(Slug.family_import_id == family["family_slug"]) - .one() - .value - ), - } - for family in families - } - - -@pytest.mark.opensearch -@pytest.mark.parametrize("exact_match", [True, False]) -@pytest.mark.parametrize("query_string", ["", "carbon"]) -def test_csv_content( - exact_match, - query_string, - client, - test_db, - test_opensearch, - monkeypatch, -): - """Make sure that downloaded CSV content matches a given search""" - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - - search_response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": query_string, - "exact_match": exact_match, - }, - params={"use_vespa": False}, - ) - assert search_response.status_code == 200 - search_response_content = search_response.json() - assert search_response_content["hits"] > 0 - assert len(search_response.json()["families"]) > 0 - families = search_response_content["families"] - - validation_data = _get_validation_data(test_db, families) - expected_csv_row_count = len(validation_data) - for f in validation_data: - len_all_family_documents = len(validation_data[f]["documents"]) + len( - [ - None - for d in validation_data[f]["all_docs"] - if d not in validation_data[f]["documents"] - ] - ) - if len_all_family_documents > 1: - # Extra rows only exist for multi-doc families - expected_csv_row_count += len_all_family_documents - 1 - - search_response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": query_string, - "exact_match": exact_match, - }, - params={"use_vespa": False}, - ) - assert search_response.status_code == 200 - search_content = search_response.json() - all_matching_titles = { - d["document_title"] - for f in search_content["families"] - for d in f["family_documents"] - if d["document_passage_matches"] - } - - download_response = client.post( - CSV_DOWNLOAD_ENDPOINT, - json={ - "query_string": query_string, - "exact_match": exact_match, - }, - params={"use_vespa": False}, - ) - assert download_response.status_code == 200 - csv_content = csv.DictReader(StringIO(download_response.content.decode("utf8"))) - - row_count = 0 - doc_match_count = 0 - for row in csv_content: - row_count += 1 - family_name = row["Family Name"] - assert family_name in validation_data - family = validation_data[family_name]["family"] - assert family["family_name"] == row["Family Name"] - assert family["family_description"] == row["Family Summary"] - assert family["family_date"] == row["Family Publication Date"] - assert family["family_source"] == row["Source"] - assert family["family_category"] == row["Category"] - assert row["Family URL"].endswith(family["family_slug"]) - assert family["family_geography"] == row["Geography"] - - # TODO: Test family metadata - need improved test_db setup - if doc_title := row["Document Title"]: - if doc_title in validation_data[family_name]["documents"]: - # The result is in search results directly, so use those details - document = validation_data[family_name]["documents"][doc_title] - assert document["document_title"] == row["Document Title"] - assert row["Document URL"].endswith(document["document_slug"]) - # Check that if the content type is pdf, we include a CDN URL for - # the document, otherwise we send the document source URL. - if document["document_content_type"] == "application/pdf": - assert row["Document Content URL"].startswith( - "https://cdn.climatepolicyradar.org/" - ) - else: - # Deal with the fact that our document model allows `None` for URL - validation_source_url = document["document_source_url"] or "" - assert validation_source_url == row["Document Content URL"] - assert document["document_type"] == row["Document Type"] - else: - # The result is an extra document retrieved from the database - assert doc_title in validation_data[family_name]["all_docs"] - db_document: FamilyDocument = validation_data[family_name]["all_docs"][ - doc_title - ] - assert db_document.physical_document is not None - assert db_document.physical_document.title == row["Document Title"] - assert row["Document URL"].endswith( - cast(str, db_document.slugs[-1].name) - ) - if db_document.physical_document.content_type == "application/pdf": - assert row["Document Content URL"].startswith( - "https://cdn.climatepolicyradar.org/" - ) - else: - assert ( - db_document.physical_document.source_url - or "" == row["Document Content URL"] - ) - assert db_document.document_type == row["Document Type"] - if query_string: - should_match_document = row["Document Title"] in all_matching_titles - if should_match_document: - doc_match_count += 1 - assert row["Document Content Matches Search Phrase"] == "Yes" - else: - assert row["Document Content Matches Search Phrase"] == "No" - else: - assert row["Document Content Matches Search Phrase"] == "n/a" - assert row["Languages"] == "English" - else: - assert row["Document URL"] == "" - assert row["Document Content URL"] == "" - assert row["Document Type"] == "" - assert row["Document Content Matches Search Phrase"] == "n/a" - assert row["Languages"] == "" - - expected_metadata = validation_data[family_name]["metadata"] - for k in expected_metadata: - assert k.title() in row - assert row[k.title()] == ";".join(expected_metadata[k]) - - if query_string: - # Make sure that we have tested some rows, that we have some "Yes" the document - # matches the search term, and some "No" values too! - assert doc_match_count > 0 - assert doc_match_count < row_count - assert row_count == expected_csv_row_count - - -@pytest.mark.opensearch -@pytest.mark.parametrize("query_string", ["", "greenhouse"]) -@pytest.mark.parametrize("limit", [1, 10, 35, 150]) -@pytest.mark.parametrize("offset", [0, 5, 10, 80]) -def test_csv_download_no_limit( - query_string, - limit, - offset, - client, - test_db, - test_opensearch, - monkeypatch, - mocker, -): - """Make sure that downloaded CSV is not limited to a single page of results.""" - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - - if query_string: - query_spy = mocker.spy(search._OPENSEARCH_CONNECTION, "query_families") - else: - query_spy = mocker.spy(search, "browse_rds_families") - - download_response = client.post( - CSV_DOWNLOAD_ENDPOINT, - json={ - "query_string": query_string, - "limit": limit, - "offset": offset, - }, - params={"use_vespa": False}, - ) - assert download_response.status_code == 200 - - if query_string: - actual_search_req = query_spy.mock_calls[0].kwargs["search_request_body"] - else: - actual_search_req = query_spy.mock_calls[0].kwargs["req"] - - # Make sure we overrode the search request content to produce the CSV download - assert actual_search_req.limit == max(limit, 100) - assert actual_search_req.offset == 0 - - -@pytest.mark.opensearch -def test_extra_indices_with_html_search( - test_opensearch, monkeypatch, client, test_db, mocker -): - monkeypatch.setattr(search, "_OPENSEARCH_CONNECTION", test_opensearch) - _populate_search_db_families(test_db) - - expected_config = OpenSearchQueryConfig() - expected_search_body = SearchRequestBody( - query_string="climate", - exact_match=False, - max_passages_per_doc=10, - keyword_filters=None, - year_range=None, - sort_field=None, - sort_order=SortOrder.DESCENDING, - limit=10, - offset=0, - include_results=[IncludedResults.HTMLS_NON_TRANSLATED], - ) - query_spy = mocker.spy(search._OPENSEARCH_CONNECTION, "query_families") - - response = client.post( - SEARCH_ENDPOINT, - json={ - "query_string": "climate", - "exact_match": False, - "include_results": ["htmlsNonTranslated"], - }, - params={"use_vespa": False}, - ) - assert response.status_code == 200 - # Ensure nothing has/is going on in the background - assert query_spy.call_count == 1 # Called once as not using jit search - - actual_search_body = query_spy.mock_calls[0].kwargs["search_request_body"] - assert actual_search_body == expected_search_body - - # Check default config is used - actual_config = query_spy.mock_calls[0].kwargs["opensearch_internal_config"] - assert actual_config == expected_config