Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove unused simple query string features #3360

Merged
merged 9 commits into from
Dec 1, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions api/api/controllers/search_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import logging
import logging as log
import re
from math import ceil
from typing import TYPE_CHECKING

Expand Down Expand Up @@ -58,6 +59,7 @@
QUERY_SPECIAL_CHARACTER_ERROR = "Unescaped special characters are not allowed."
DEFAULT_BOOST = 10000
DEFAULT_SEARCH_FIELDS = ["title", "description", "tags.name"]
DEFAULT_SQS_FLAGS = "AND|NOT|PHRASE|WHITESPACE"


def _quote_escape(query_string):
Expand Down Expand Up @@ -287,8 +289,11 @@ def build_search_query(
# individual field-level queries specified.
if "q" in search_params.data:
query = _quote_escape(search_params.data["q"])
log_query_features(query, query_name="q")

base_query_kwargs = {
"query": query,
"flags": DEFAULT_SQS_FLAGS,
"fields": DEFAULT_SEARCH_FIELDS,
"default_operator": "AND",
}
Expand All @@ -301,6 +306,7 @@ def build_search_query(
quotes_stripped = query.replace('"', "")
exact_match_boost = Q(
"simple_query_string",
flags=DEFAULT_SQS_FLAGS,
fields=["title"],
query=f"{quotes_stripped}",
boost=10000,
Expand All @@ -313,9 +319,11 @@ def build_search_query(
("tags", "tags.name"),
]:
if field_value := search_params.data.get(field):
log_query_features(field_value, query_name="field")
search_queries["must"].append(
Q(
"simple_query_string",
flags=DEFAULT_SQS_FLAGS,
query=_quote_escape(field_value),
fields=[field_name],
)
Expand All @@ -339,6 +347,18 @@ def build_search_query(
)


def log_query_features(query: str, query_name) -> None:
flags = [
("PRECEDENCE", r"\(.*\)"),
("ESCAPE", r"\\"),
("FUZZY|SLOP", r"~\d"),
("PREFIX", r"\*"),
]
for flag, pattern in flags:
if bool(re.search(pattern, query)):
log.info(f"Special feature in `{query_name}` query string. {flag}: {query}")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a suggestion, but it would produce less logs overall while making it easier to tally them if the logging did something like, counting the instances of each flag and logging all flags at once in JSON like { "log_message": "Special features present in query", "query_name": query_name, "query": query} | {flag: count_flag(flag, query) for flag in flags } (one-liner just for commenting, not to suggest that's the way to do it).

No big deal, though. This way works too, it just produces slightly more logs (not many, because we know these are used so rarely and mostly following the frontend examples) and requires parsing the message string rather than using Logs Insights discovered fields feature.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for this suggestion, I was actually not certain that the logging pattern would be good, and I didn't even realize that there could be several flags used in a single search. I'll update the logging

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated the logging, now if you go to http://0.0.0.0:50280/v1/images/?q=cat+net*+(dog)+%5C(+someth~2, you get the following message logged:
[2023-11-30 08:32:53,795 - root - 362][INFO] [3419191a256b4058b2e8ffd5066b1ec2] {'log_message': 'Special features present in query', 'query_name': 'q', 'query': 'cat net* (dog) \\( someth~2', 'flags': ['PRECEDENCE', 'ESCAPE', 'FUZZY|SLOP', 'PREFIX']}

I don't think we need to log the flag cound separately, do you, @sarayourfriend ?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess the raw count isn't that important, is it! Good call 👍 The change LGTM.



def build_collection_query(
search_params: MediaListRequestSerializer,
collection_params: dict[str, str],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from api.controllers import search_controller
from api.controllers.search_controller import (
DEFAULT_SQS_FLAGS,
FILTERED_PROVIDERS_CACHE_KEY,
FILTERED_PROVIDERS_CACHE_VERSION,
)
Expand Down Expand Up @@ -71,6 +72,7 @@ def test_create_search_query_q_search_no_filters(media_type_config):
"default_operator": "AND",
"fields": ["title", "description", "tags.name"],
"query": "cat",
"flags": DEFAULT_SQS_FLAGS,
}
}
],
Expand All @@ -80,14 +82,15 @@ def test_create_search_query_q_search_no_filters(media_type_config):
"boost": 10000,
"fields": ["title"],
"query": "cat",
"flags": DEFAULT_SQS_FLAGS,
}
},
{"rank_feature": {"boost": 10000, "field": "standardized_popularity"}},
],
}


def test_create_search_query_q_search_with_quotes_adds_exact_suffix(media_type_config):
def test_create_search_query_q_search_with_quotes_adds_raw_suffix(media_type_config):
serializer = media_type_config.search_request_serializer(
data={"q": '"The cutest cat"'}
)
Expand All @@ -104,6 +107,7 @@ def test_create_search_query_q_search_with_quotes_adds_exact_suffix(media_type_c
"fields": ["title", "description", "tags.name"],
"query": '"The cutest cat"',
"quote_field_suffix": ".raw",
"flags": DEFAULT_SQS_FLAGS,
}
}
],
Expand All @@ -113,6 +117,7 @@ def test_create_search_query_q_search_with_quotes_adds_exact_suffix(media_type_c
"boost": 10000,
"fields": ["title"],
"query": "The cutest cat",
"flags": DEFAULT_SQS_FLAGS,
}
},
{"rank_feature": {"boost": 10000, "field": "standardized_popularity"}},
Expand Down Expand Up @@ -152,6 +157,7 @@ def test_create_search_query_q_search_with_filters(image_media_type_config):
"default_operator": "AND",
"fields": ["title", "description", "tags.name"],
"query": "cat",
"flags": DEFAULT_SQS_FLAGS,
}
}
],
Expand All @@ -161,6 +167,7 @@ def test_create_search_query_q_search_with_filters(image_media_type_config):
"boost": 10000,
"fields": ["title"],
"query": "cat",
"flags": DEFAULT_SQS_FLAGS,
}
},
{"rank_feature": {"boost": 10000, "field": "standardized_popularity"}},
Expand Down Expand Up @@ -188,10 +195,23 @@ def test_create_search_query_non_q_query(image_media_type_config):
"simple_query_string": {
"fields": ["creator"],
"query": "Artist From Openverse",
"flags": DEFAULT_SQS_FLAGS,
}
},
{
"simple_query_string": {
"fields": ["title"],
"query": "kitten🐱",
"flags": DEFAULT_SQS_FLAGS,
}
},
{
"simple_query_string": {
"fields": ["tags.name"],
"query": "cute",
"flags": DEFAULT_SQS_FLAGS,
}
},
{"simple_query_string": {"fields": ["title"], "query": "kitten🐱"}},
{"simple_query_string": {"fields": ["tags.name"], "query": "cute"}},
],
"should": [
{"rank_feature": {"boost": 10000, "field": "standardized_popularity"}},
Expand Down
72 changes: 8 additions & 64 deletions frontend/src/locales/scripts/en.json5
Original file line number Diff line number Diff line change
Expand Up @@ -177,70 +177,14 @@
title: "Search for an exact match",
ariaLabel: "quote unquote Claude Monet",
claudeMonet: '"Claude Monet"',
content: "Put a word or phrase inside quotes. For example, {link}.",
},
combine: {
title: "Combining terms",
description: "If you want to combine terms, you can use the following operators to perform more complex queries",
and: "{symbol} signifies AND operation",
or: "{symbol} signifies OR operation",
not: "{symbol} negates a single token",
prefix: "{symbol} at the end of a term signifies a prefix query",
precedence: "{symbol} signify precedence",
fuzziness: "{symbol} after a word signifies edit distance (fuzziness)",
ariaLabels: {
fuzziness: "tilde N",
open: "open parenthesis",
close: "close parenthesis",
star: "star symbol",
not: "minus symbol",
and: "plus symbol",
or: "vertical bar symbol",
prefix: "asterisk symbol",
verticalBar: "vertical bar symbol",
precedence: "parentheses",
},
},
example: {
and: {
description: "Example: {link}{br} This will search for images related to both dog and cat.",
ariaLabel: "dog plus cat",
example: "dog+cat",
},
or: {
description: "Example: {link}{br} This will search for images related to dog or cat, but not necessarily both.",
ariaLabel: "dog vertical bar cat",
example: "dog|cat",
},
negate: {
description: "You can use the {operator} to exclude a search term from the results.",
operatorName: "operator (signifies NOT)",
operatorAriaLabel: "minus operator (signifies NOT)",
ariaLabel: "dog minus pug",
example: "dog -pug",
content: "Example: {link}{br} This will search for images related to dog but won't include results related to 'pug'",
},
prefix: {
description: "You can use the {operatorName} to mark a prefix term. This will match anything after the *.",
operatorName: "operator (wildcard)",
operatorAriaLabel: "star operator (wildcard)",
ariaLabel: "net star symbol",
example: "net*",
content: "Example: {link}{br} This will search for images matching anything with 'net'. This might include 'network', 'Netflix', 'Netherlands', etc.",
},
precedence: {
description: "You can use parentheses {highlight} to specify precedence of terms or combine more complex queries.",
ariaLabel: "dogs plus open parenthesis corgis vertical bar labrador close parenthesis",
example: "dogs + (corgis | labrador)",
content: "Example: {link}{br} This will search for images that match dogs that are either corgis or labrador.",
},
fuzziness: {
description: "You can use {highlight} to specify some fuzzy logic to the term according to the {link} — the number of one character changes that need to be made to one string to make it the same as another string.",
linkText: "Levenshtein Edit Distance",
ariaLabel: "theatre tilde 1",
example: "theatre~1",
content: "Example: {link}{br} This will search for images that match strings close to the term 'theatre' with a difference of one character. Results might include terms with different spellings like 'theater'.",
},
content: "To search for an exact word or phrase, put it inside quotes. For example, {link}.",
},
negate: {
title: "Excluding terms",
operatorName: "minus operator",
ariaLabel: "dog minus pug",
example: "dog -pug",
content: 'To exclude a term from your results, put the {operator} in front of it. Example: {link}{br} This will search for media related to "dog" but won\'t include results related to "pug".',
},
},
feedback: {
Expand Down
Loading
Loading