Skip to content
This repository has been archived by the owner on Feb 22, 2023. It is now read-only.

Commit

Permalink
Make quoted queries behave as described in the API documentation (ret…
Browse files Browse the repository at this point in the history
…urn exact matches only) (#1012)

* Fix quoted audio search example escaping

* Make quoted queries behave as described in API documentation

* Undo change breaking title match boosting

* Fix and future proof tests against additional test data
  • Loading branch information
sarayourfriend authored Nov 29, 2022
1 parent 55f55de commit 4edae23
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 7 deletions.
17 changes: 12 additions & 5 deletions api/catalog/api/controllers/search_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,18 +340,25 @@ def search(
search_fields = ["tags.name", "title", "description"]
if "q" in search_params.data:
query = _quote_escape(search_params.data["q"])
base_query_kwargs = {
"query": query,
"fields": search_fields,
"default_operator": "AND",
}

if '"' in query:
base_query_kwargs["quote_field_suffix"] = ".exact"

s = s.query(
"simple_query_string",
query=query,
fields=search_fields,
default_operator="AND",
**base_query_kwargs,
)
# Boost exact matches
# Boost exact matches on the title
quotes_stripped = query.replace('"', "")
exact_match_boost = Q(
"simple_query_string",
fields=["title"],
query=f'"{quotes_stripped}"',
query=f"{quotes_stripped}",
boost=10000,
)
s = search_client.query(Q("bool", must=s.query, should=exact_match_boost))
Expand Down
2 changes: 1 addition & 1 deletion api/catalog/api/examples/audio_requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
syntax_examples = {
"using single query parameter": "test",
"using multiple query parameters": "test&license=pdm,by&categories=illustration&page_size=1&page=1", # noqa: E501
"that is an exact match of Giacomo Puccini": '"Giacomo Puccini"',
"that is an exact match of Giacomo Puccini": r"%22Giacomo%20Puccini%22",
"related to both dog and cat": "dog+cat",
"related to dog or cat, but not necessarily both": "dog|cat",
"related to dog but won't include results related to 'pug'": "dog -pug",
Expand Down
2 changes: 1 addition & 1 deletion api/catalog/api/examples/image_requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
syntax_examples = {
"using single query parameter": "test",
"using multiple query parameters": "test&license=pdm,by&categories=illustration&page_size=1&page=1", # noqa: E501
"that are an exact match of Claude Monet": '"Claude Monet"',
"that are an exact match of Claude Monet": "%22Claude%20Monet%22",
"related to both dog and cat": "dog+cat",
"related to dog or cat, but not necessarily both": "dog|cat",
"related to dog but won't include results related to 'pug'": "dog -pug",
Expand Down
6 changes: 6 additions & 0 deletions api/test/audio_integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
search_by_category,
search_consistency,
search_quotes,
search_quotes_exact,
search_source_and_excluded,
search_special_chars,
stats,
Expand Down Expand Up @@ -101,6 +102,11 @@ def test_search_quotes():
search_quotes("audio", "love")


def test_search_quotes_exact():
# ``water running`` returns different results when quoted vs unquoted
search_quotes_exact("audio", "water running")


def test_search_with_special_characters():
search_special_chars("audio", "love")

Expand Down
6 changes: 6 additions & 0 deletions api/test/image_integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
search_all_excluded,
search_consistency,
search_quotes,
search_quotes_exact,
search_source_and_excluded,
search_special_chars,
stats,
Expand Down Expand Up @@ -53,6 +54,11 @@ def test_search_quotes():
search_quotes("images", "dog")


def test_search_quotes_exact():
# ``bird perched`` returns different results when quoted vs unquoted
search_quotes_exact("images", "bird perched")


def test_search_with_special_characters():
search_special_chars("images", "dog")

Expand Down
19 changes: 19 additions & 0 deletions api/test/media_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,25 @@ def search_quotes(media_path, q="test"):
assert response.status_code == 200


def search_quotes_exact(media_path, q):
"""Only returns exact matches for the given query"""
unquoted_response = requests.get(f"{API_URL}/v1/{media_path}?q={q}", verify=False)
assert unquoted_response.status_code == 200
unquoted_result_count = unquoted_response.json()["result_count"]
assert unquoted_result_count > 0

quoted_response = requests.get(f'{API_URL}/v1/{media_path}?q="{q}"', verify=False)
assert quoted_response.status_code == 200
quoted_result_count = quoted_response.json()["result_count"]
assert quoted_result_count > 0

# The rationale here is that the unquoted results will match more records due
# to the query being overall less strict. Quoting the query will make it more
# strict causing it to return fewer results.
# Above we check that the results are not 0 to confirm that we do still get results back.
assert quoted_result_count < unquoted_result_count


def search_special_chars(media_path, q="test"):
"""Returns a response when query includes special characters."""
response = requests.get(f"{API_URL}/v1/{media_path}?q={q}!", verify=False)
Expand Down

0 comments on commit 4edae23

Please sign in to comment.