Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove deprecated features from search #2618

Merged
merged 6 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions nucliadb/tests/nucliadb/integration/search/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from nucliadb.common.context import ApplicationContext
from nucliadb.tests.vectors import V1, V2, Q
from nucliadb_models.labels import Label, LabelSetKind
from nucliadb_models.search import MinScore
from nucliadb_models.search import MinScore, SearchOptions
from nucliadb_protos.resources_pb2 import (
Classification,
ExtractedTextWrapper,
Expand Down Expand Up @@ -351,7 +351,7 @@ async def _test_filtering(nucliadb_reader: AsyncClient, kbid: str, filters):
json=dict(
query="",
filters=filters,
features=["paragraph", "vector"],
features=[SearchOptions.KEYWORD, SearchOptions.SEMANTIC],
vector=Q,
min_score=MinScore(semantic=-1).model_dump(),
),
Expand Down
11 changes: 6 additions & 5 deletions nucliadb/tests/nucliadb/integration/search/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from nucliadb.ingest.consumer import shard_creator
from nucliadb.search.predict import SendToPredictError
from nucliadb.tests.vectors import V1
from nucliadb_models.search import SearchOptions
from nucliadb_protos import resources_pb2 as rpb
from nucliadb_protos.audit_pb2 import AuditRequest, ClientType
from nucliadb_protos.utils_pb2 import RelationNode
Expand Down Expand Up @@ -973,8 +974,8 @@ async def test_search_pagination(
page_size = 5

for feature, result_key in [
("paragraph", "paragraphs"),
("document", "fulltext"),
(SearchOptions.KEYWORD.value, "paragraphs"),
(SearchOptions.FULLTEXT.value, "fulltext"),
]:
total_pages = math.floor(total / page_size)
for page_number in range(0, total_pages):
Expand Down Expand Up @@ -1069,7 +1070,7 @@ async def test_resource_search_pagination(
f"/kb/{kbid}/resource/{rid}/search",
params={
"query": query,
"features": ["paragraph"],
"features": [SearchOptions.KEYWORD],
"page_number": page_number,
"page_size": page_size,
},
Expand All @@ -1083,7 +1084,7 @@ async def test_resource_search_pagination(
f"/kb/{kbid}/resource/{rid}/search",
params={
"query": query,
"features": ["paragraph"],
"features": [SearchOptions.KEYWORD],
"page_number": page_number + 1,
"page_size": page_size,
},
Expand All @@ -1109,7 +1110,7 @@ async def test_search_endpoints_handle_predict_errors(
resp = await nucliadb_reader.post(
f"/kb/{kbid}/{endpoint}",
json={
"features": ["vector"],
"features": [SearchOptions.SEMANTIC],
"query": "something",
},
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from httpx import AsyncClient

from nucliadb.tests.vectors import V1
from nucliadb_models.search import SearchOptions
from tests.nucliadb.integration.search.test_search import get_resource_with_a_sentence
from tests.utils import inject_message

Expand Down Expand Up @@ -76,8 +77,8 @@ async def resource(nucliadb_grpc, knowledgebox):
@pytest.mark.parametrize(
"feature",
[
"paragraph",
"vector",
SearchOptions.KEYWORD,
SearchOptions.SEMANTIC,
],
)
async def test_search_with_date_range_filters_nucliadb_dates(
Expand Down Expand Up @@ -133,8 +134,8 @@ async def test_search_with_date_range_filters_nucliadb_dates(
@pytest.mark.parametrize(
"feature",
[
"paragraph",
"vector",
SearchOptions.KEYWORD,
SearchOptions.SEMANTIC,
],
)
async def test_search_with_date_range_filters_origin_dates(
Expand Down Expand Up @@ -188,7 +189,7 @@ async def _test_find_date_ranges(
found,
):
payload = {"query": "Ramon", "features": features}
if "vector" in features:
if SearchOptions.SEMANTIC in features:
payload["vector"] = V1
if creation_start is not None:
payload["range_creation_start"] = creation_start.isoformat()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import pytest
from httpx import AsyncClient

from nucliadb_models.search import SearchOptions


@pytest.mark.asyncio
async def test_search_sort_by_score(
Expand Down Expand Up @@ -201,7 +203,7 @@ async def test_list_all_resources_by_creation_and_modification_dates_with_empty_
f"/kb/{kbid}/search",
params={
"query": "",
"features": ["document"],
"features": [SearchOptions.FULLTEXT.value],
"fields": ["a/title"],
"page_number": page_number,
"page_size": page_size,
Expand Down
5 changes: 3 additions & 2 deletions nucliadb/tests/nucliadb/integration/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
)
from nucliadb_models import common, metadata
from nucliadb_models.resource import Resource
from nucliadb_models.search import SearchOptions
from nucliadb_protos import resources_pb2 as rpb
from nucliadb_protos import writer_pb2 as wpb
from nucliadb_protos.dataset_pb2 import TaskType, TrainSet
Expand Down Expand Up @@ -920,7 +921,7 @@ async def test_pagination_limits(
f"/kb/kbid/find",
json={
"query": "foo",
"features": ["vector"],
"features": [SearchOptions.SEMANTIC],
"page_size": 1000,
},
)
Expand All @@ -933,7 +934,7 @@ async def test_pagination_limits(
f"/kb/kbid/find",
json={
"query": "foo",
"features": ["vector"],
"features": [SearchOptions.SEMANTIC],
"page_number": 30,
"page_size": 100,
},
Expand Down
9 changes: 5 additions & 4 deletions nucliadb/tests/nucliadb/integration/test_deletion.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import pytest
from httpx import AsyncClient

from nucliadb_models.search import SearchOptions
from nucliadb_protos.resources_pb2 import (
ExtractedTextWrapper,
ExtractedVectorsWrapper,
Expand Down Expand Up @@ -141,7 +142,7 @@ class FieldData:
f"/kb/{knowledgebox}/find",
json={
"query": "Original",
"features": ["paragraph"],
"features": [SearchOptions.KEYWORD],
"min_score": {"bm25": 0.0},
},
timeout=None,
Expand All @@ -155,7 +156,7 @@ class FieldData:
f"/kb/{knowledgebox}/find",
json={
"query": "Extracted",
"features": ["paragraph"],
"features": [SearchOptions.KEYWORD],
},
timeout=None,
)
Expand Down Expand Up @@ -233,7 +234,7 @@ class FieldData:
f"/kb/{knowledgebox}/find",
json={
"query": "Extracted",
"features": ["paragraph"],
"features": [SearchOptions.KEYWORD],
"min_score": {"bm25": 0.0},
},
timeout=None,
Expand All @@ -252,7 +253,7 @@ class FieldData:
f"/kb/{knowledgebox}/find",
json={
"query": "Modified",
"features": ["paragraph"],
"features": [SearchOptions.KEYWORD],
},
timeout=None,
)
Expand Down
7 changes: 4 additions & 3 deletions nucliadb/tests/nucliadb/integration/test_find.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import pytest
from httpx import AsyncClient

from nucliadb_models.search import SearchOptions
from nucliadb_protos.writer_pb2_grpc import WriterStub
from nucliadb_utils.exceptions import LimitsExceededError

Expand Down Expand Up @@ -105,14 +106,14 @@ async def test_find_does_not_support_fulltext_search(
knowledgebox,
):
resp = await nucliadb_reader.get(
f"/kb/{knowledgebox}/find?query=title&features=document&features=paragraph",
f"/kb/{knowledgebox}/find?query=title&features=fulltext&features=keyword",
)
assert resp.status_code == 422
assert "fulltext search not supported" in resp.json()["detail"][0]["msg"]

resp = await nucliadb_reader.post(
f"/kb/{knowledgebox}/find",
json={"query": "title", "features": ["document", "paragraph"]},
json={"query": "title", "features": [SearchOptions.FULLTEXT, SearchOptions.KEYWORD]},
)
assert resp.status_code == 422
assert "fulltext search not supported" in resp.json()["detail"][0]["msg"]
Expand Down Expand Up @@ -244,7 +245,7 @@ async def test_story_7286(
f"/kb/{knowledgebox}/find",
json={
"query": "title",
"features": ["paragraph", "vector", "relations"],
"features": [SearchOptions.KEYWORD, SearchOptions.SEMANTIC, SearchOptions.RELATIONS],
"shards": [],
"highlight": True,
"autofilter": False,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

from nucliadb.common.maindb.driver import Driver
from nucliadb.learning_proxy import LearningConfiguration
from nucliadb_models.search import SearchOptions
from nucliadb_protos import knowledgebox_pb2, resources_pb2, utils_pb2, writer_pb2
from nucliadb_protos.writer_pb2_grpc import WriterStub
from tests.utils import inject_message
Expand Down Expand Up @@ -127,7 +128,7 @@ async def test_matryoshka_embeddings(
f"/kb/{kbid}/search",
params={
"query": "matryoshka",
"features": ["vector"],
"features": [SearchOptions.SEMANTIC.value],
"min_score": 0.99999,
"with_duplicates": True,
},
Expand Down
4 changes: 3 additions & 1 deletion nucliadb/tests/nucliadb/integration/test_synonyms.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#
import pytest

from nucliadb_models.search import SearchOptions


@pytest.mark.asyncio
async def test_custom_synonyms_api(
Expand Down Expand Up @@ -197,7 +199,7 @@ async def test_search_errors_if_vectors_or_relations_requested(
resp = await nucliadb_reader.post(
f"/kb/{kbid}/search",
json=dict(
features=["paragraph", "vector", "relations"],
features=[SearchOptions.KEYWORD, SearchOptions.SEMANTIC, SearchOptions.RELATIONS],
query="planet",
with_synonyms=True,
),
Expand Down
Loading
Loading