Skip to content

Commit

Permalink
Use top-level keyword fields instead of subfields (#3161)
Browse files Browse the repository at this point in the history
  • Loading branch information
dhruvkb authored Oct 23, 2023
1 parent a46a7e4 commit df51293
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 18 deletions.
18 changes: 8 additions & 10 deletions api/api/controllers/search_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,12 +350,14 @@ def search(
("extension", None),
("category", None),
("categories", "category"),
("source", None),
("license", None),
("license_type", "license"),
# Audio-specific filters
("length", None),
# Image-specific filters
("aspect_ratio", None),
("size", None),
("source", None),
("license", "license__keyword"),
("license_type", "license__keyword"),
]
for serializer_field, es_field in filters:
if serializer_field in search_params.data:
Expand Down Expand Up @@ -512,9 +514,7 @@ def related_media(uuid: str, index: str, filter_dead: bool) -> list[Hit]:

# Search the default index for the item itself as it might be sensitive.
item_search = Search(index=index)
# TODO: remove `__keyword` after
# https://github.com/WordPress/openverse/pull/3143 is merged.
item_hit = item_search.query(Term(identifier__keyword=uuid)).execute().hits[0]
item_hit = item_search.query(Term(identifier=uuid)).execute().hits[0]

# Match related using title.
title = getattr(item_hit, "title", None)
Expand All @@ -539,9 +539,7 @@ def related_media(uuid: str, index: str, filter_dead: bool) -> list[Hit]:
s = Search(index=f"{index}-filtered")

# Exclude the current item and mature content.
# TODO: remove `__keyword` after
# https://github.com/WordPress/openverse/pull/3143 is merged.
s = s.query(related_query & ~Term(identifier__keyword=uuid) & ~Term(mature=True))
s = s.query(related_query & ~Term(identifier=uuid) & ~Term(mature=True))
# Exclude the dynamically disabled sources.
s = _exclude_filtered(s)

Expand Down Expand Up @@ -579,7 +577,7 @@ def get_sources(index):
aggs = {
"unique_sources": {
"terms": {
"field": "source.keyword",
"field": "source",
"size": size,
"order": {"_key": "desc"},
}
Expand Down
8 changes: 1 addition & 7 deletions api/api/utils/search_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,7 @@ def build(
# Use `identifier` rather than the document `id` due to
# `id` instability between refreshes:
# https://github.com/WordPress/openverse/issues/2306
# `identifier` is mapped as `text` which will match fuzzily.
# Use `identifier.keyword` to match _exactly_
# cf: https://github.com/WordPress/openverse/issues/2154
Q(
"terms",
**{"identifier.keyword": all_result_identifiers},
)
Q("terms", identifier=all_result_identifiers)
)

# The default query size is 10, so we need to slice the query
Expand Down
2 changes: 1 addition & 1 deletion ingestion_server/ingestion_server/elasticsearch_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def get_popularity(raw):
@staticmethod
def parse_detailed_tags(json_tags):
if not json_tags:
return None
return []
parsed_tags = []
for tag in json_tags:
if "name" in tag:
Expand Down

0 comments on commit df51293

Please sign in to comment.