Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support missing/empty values in search #3231

Merged
merged 5 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 34 additions & 33 deletions redis/commands/search/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from ._util import to_string
from .aggregation import AggregateRequest, AggregateResult, Cursor
from .document import Document
from .field import Field
from .indexDefinition import IndexDefinition
from .query import Query
from .result import Result
from .suggestion import SuggestionParser
Expand Down Expand Up @@ -151,44 +153,43 @@ def batch_indexer(self, chunk_size=100):

def create_index(
self,
fields,
no_term_offsets=False,
no_field_flags=False,
stopwords=None,
definition=None,
fields: List[Field],
no_term_offsets: bool = False,
no_field_flags: bool = False,
stopwords: Optional[List[str]] = None,
definition: Optional[IndexDefinition] = None,
max_text_fields=False,
temporary=None,
no_highlight=False,
no_term_frequencies=False,
skip_initial_scan=False,
no_highlight: bool = False,
no_term_frequencies: bool = False,
skip_initial_scan: bool = False,
):
"""
Create the search index. The index must not already exist.

### Parameters:

- **fields**: a list of TextField or NumericField objects
- **no_term_offsets**: If true, we will not save term offsets in
the index
- **no_field_flags**: If true, we will not save field flags that
allow searching in specific fields
- **stopwords**: If not None, we create the index with this custom
stopword list. The list can be empty
- **max_text_fields**: If true, we will encode indexes as if there
were more than 32 text fields which allows you to add additional
fields (beyond 32).
- **temporary**: Create a lightweight temporary index which will
expire after the specified period of inactivity (in seconds). The
internal idle timer is reset whenever the index is searched or added to.
- **no_highlight**: If true, disabling highlighting support.
Also implied by no_term_offsets.
- **no_term_frequencies**: If true, we avoid saving the term frequencies
in the index.
- **skip_initial_scan**: If true, we do not scan and index.

For more information see `FT.CREATE <https://redis.io/commands/ft.create>`_.
""" # noqa
Creates the search index. The index must not already exist.

For more information, see https://redis.io/commands/ft.create/

Args:
fields: A list of Field objects.
no_term_offsets: If `true`, term offsets will not be saved in the index.
no_field_flags: If true, field flags that allow searching in specific fields
will not be saved.
stopwords: If provided, the index will be created with this custom stopword
list. The list can be empty.
definition: If provided, the index will be created with this custom index
definition.
max_text_fields: If true, indexes will be encoded as if there were more than
32 text fields, allowing for additional fields beyond 32.
temporary: Creates a lightweight temporary index which will expire after the
specified period of inactivity. The internal idle timer is reset
whenever the index is searched or added to.
no_highlight: If true, disables highlighting support. Also implied by
`no_term_offsets`.
no_term_frequencies: If true, term frequencies will not be saved in the
index.
skip_initial_scan: If true, the initial scan and indexing will be skipped.

"""
args = [CREATE_CMD, self.index_name]
if definition is not None:
args += definition.args
Expand Down
26 changes: 26 additions & 0 deletions redis/commands/search/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@


class Field:
"""
A class representing a field in a document.
"""

NUMERIC = "NUMERIC"
TEXT = "TEXT"
WEIGHT = "WEIGHT"
Expand All @@ -14,15 +18,33 @@ class Field:
NOINDEX = "NOINDEX"
AS = "AS"
GEOSHAPE = "GEOSHAPE"
INDEX_MISSING = "INDEXMISSING"
INDEX_EMPTY = "INDEXEMPTY"

def __init__(
self,
name: str,
args: List[str] = None,
sortable: bool = False,
no_index: bool = False,
index_missing: bool = False,
index_empty: bool = False,
as_name: str = None,
):
"""
Create a new field object.

Args:
name: The name of the field.
args:
sortable: If `True`, the field will be sortable.
no_index: If `True`, the field will not be indexed.
index_missing: If `True`, it will be possible to search for documents that
have this field missing.
index_empty: If `True`, it will be possible to search for documents that
have this field empty.
as_name: If provided, this alias will be used for the field.
"""
if args is None:
args = []
self.name = name
Expand All @@ -34,6 +56,10 @@ def __init__(
self.args_suffix.append(Field.SORTABLE)
if no_index:
self.args_suffix.append(Field.NOINDEX)
if index_missing:
self.args_suffix.append(Field.INDEX_MISSING)
if index_empty:
self.args_suffix.append(Field.INDEX_EMPTY)

if no_index and not sortable:
raise ValueError("Non-Sortable non-Indexable fields are ignored")
Expand Down
156 changes: 145 additions & 11 deletions tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -2105,7 +2105,7 @@ def test_geo_params(client):
params_dict = {"lat": "34.95126", "lon": "29.69465", "radius": 1000, "units": "km"}
q = Query("@g:[$lon $lat $radius $units]").dialect(2)
res = client.ft().search(q, query_params=params_dict)
_assert_geosearch_result(client, res, ["doc1", "doc2", "doc3"])
_assert_search_result(client, res, ["doc1", "doc2", "doc3"])


@pytest.mark.redismod
Expand All @@ -2122,13 +2122,13 @@ def test_geoshapes_query_intersects_and_disjoint(client):
Query("@g:[intersects $shape]").dialect(3),
query_params={"shape": "POLYGON((15 15, 75 15, 50 70, 20 40, 15 15))"},
)
_assert_geosearch_result(client, intersection, ["doc_point2", "doc_polygon1"])
_assert_search_result(client, intersection, ["doc_point2", "doc_polygon1"])

disjunction = client.ft().search(
Query("@g:[disjoint $shape]").dialect(3),
query_params={"shape": "POLYGON((15 15, 75 15, 50 70, 20 40, 15 15))"},
)
_assert_geosearch_result(client, disjunction, ["doc_point1", "doc_polygon2"])
_assert_search_result(client, disjunction, ["doc_point1", "doc_polygon2"])


@pytest.mark.redismod
Expand All @@ -2146,19 +2146,19 @@ def test_geoshapes_query_contains_and_within(client):
Query("@g:[contains $shape]").dialect(3),
query_params={"shape": "POINT(25 25)"},
)
_assert_geosearch_result(client, contains_a, ["doc_polygon1"])
_assert_search_result(client, contains_a, ["doc_polygon1"])

contains_b = client.ft().search(
Query("@g:[contains $shape]").dialect(3),
query_params={"shape": "POLYGON((24 24, 24 26, 25 25, 24 24))"},
)
_assert_geosearch_result(client, contains_b, ["doc_polygon1"])
_assert_search_result(client, contains_b, ["doc_polygon1"])

within = client.ft().search(
Query("@g:[within $shape]").dialect(3),
query_params={"shape": "POLYGON((15 15, 75 15, 50 70, 20 40, 15 15))"},
)
_assert_geosearch_result(client, within, ["doc_point2", "doc_polygon1"])
_assert_search_result(client, within, ["doc_point2", "doc_polygon1"])


@pytest.mark.redismod
Expand Down Expand Up @@ -2322,19 +2322,153 @@ def test_geoshape(client: redis.Redis):
q2 = Query("@geom:[CONTAINS $poly]").dialect(3)
qp2 = {"poly": "POLYGON((2 2, 2 50, 50 50, 50 2, 2 2))"}
result = client.ft().search(q1, query_params=qp1)
_assert_geosearch_result(client, result, ["small"])
_assert_search_result(client, result, ["small"])
result = client.ft().search(q2, query_params=qp2)
_assert_geosearch_result(client, result, ["small", "large"])
_assert_search_result(client, result, ["small", "large"])


def _assert_geosearch_result(client, result, expected_doc_ids):
@pytest.mark.redismod
def test_search_missing_fields(client):
definition = IndexDefinition(prefix=["property:"], index_type=IndexType.HASH)

fields = [
TextField("title", sortable=True),
TagField("features", index_missing=True),
TextField("description", index_missing=True),
]

client.ft().create_index(fields, definition=definition)

# All fields present
client.hset(
"property:1",
mapping={
"title": "Luxury Villa in Malibu",
"features": "pool,sea view,modern",
"description": "A stunning modern villa overlooking the Pacific Ocean.",
},
)

# Missing features
client.hset(
"property:2",
mapping={
"title": "Downtown Flat",
"description": "Modern flat in central Paris with easy access to metro.",
},
)

# Missing description
client.hset(
"property:3",
mapping={
"title": "Beachfront Bungalow",
"features": "beachfront,sun deck",
},
)

with pytest.raises(redis.exceptions.ResponseError) as e:
client.ft().search(
Query("ismissing(@title)").dialect(5).return_field("id").no_content()
)
assert "to be defined with 'INDEXMISSING'" in e.value.args[0]

res = client.ft().search(
Query("ismissing(@features)").dialect(5).return_field("id").no_content()
)
_assert_search_result(client, res, ["property:2"])

res = client.ft().search(
Query("-ismissing(@features)").dialect(5).return_field("id").no_content()
)
_assert_search_result(client, res, ["property:1", "property:3"])

res = client.ft().search(
Query("ismissing(@description)").dialect(5).return_field("id").no_content()
)
_assert_search_result(client, res, ["property:3"])

res = client.ft().search(
Query("-ismissing(@description)").dialect(5).return_field("id").no_content()
)
_assert_search_result(client, res, ["property:1", "property:2"])


@pytest.mark.redismod
def test_search_empty_fields(client):
definition = IndexDefinition(prefix=["property:"], index_type=IndexType.HASH)

fields = [
TextField("title", sortable=True),
TagField("features", index_empty=True),
TextField("description", index_empty=True),
]

client.ft().create_index(fields, definition=definition)

# All fields present
client.hset(
"property:1",
mapping={
"title": "Luxury Villa in Malibu",
"features": "pool,sea view,modern",
"description": "A stunning modern villa overlooking the Pacific Ocean.",
},
)

# Empty features
client.hset(
"property:2",
mapping={
"title": "Downtown Flat",
"features": "",
"description": "Modern flat in central Paris with easy access to metro.",
},
)

# Empty description
client.hset(
"property:3",
mapping={
"title": "Beachfront Bungalow",
"features": "beachfront,sun deck",
"description": "",
},
)

with pytest.raises(redis.exceptions.ResponseError) as e:
client.ft().search(
Query("@title:''").dialect(5).return_field("id").no_content()
)
assert "to be defined with `INDEXEMPTY`" in e.value.args[0]

res = client.ft().search(
Query("@features:{ }").dialect(5).return_field("id").no_content()
)
_assert_search_result(client, res, ["property:2"])

res = client.ft().search(
Query("-@features:{ }").dialect(5).return_field("id").no_content()
)
_assert_search_result(client, res, ["property:1", "property:3"])

res = client.ft().search(
Query("@description:''").dialect(5).return_field("id").no_content()
)
_assert_search_result(client, res, ["property:3"])

res = client.ft().search(
Query("-@description:''").dialect(5).return_field("id").no_content()
)
_assert_search_result(client, res, ["property:1", "property:2"])


def _assert_search_result(client, result, expected_doc_ids):
"""
Make sure the result of a geo search is as expected, taking into account the RESP
version being used.
"""
if is_resp2_connection(client):
assert set([doc.id for doc in result.docs]) == set(expected_doc_ids)
assert result.total == len(expected_doc_ids)
else:
assert set([doc["id"] for doc in result["results"]]) == set(expected_doc_ids)
assert result["total_results"] == len(expected_doc_ids)
Loading