From d9f83e53761dd502c93384f0aa6e2f7b711151e2 Mon Sep 17 00:00:00 2001 From: Olga Bulat Date: Tue, 17 May 2022 18:21:03 +0300 Subject: [PATCH] Fix `length` query for audio (#661) * Add `length` as the field for audio duration categories * Update api/catalog/api/serializers/audio_serializers.py * Add `shortest` length, fix other length ranges * Make `duration` query param work for `length` ES field Signed-off-by: Olga Bulat * Final renaming of 'duration' query to 'length' --- api/catalog/api/constants/field_values.py | 4 +++- api/catalog/api/controllers/search_controller.py | 1 + api/catalog/api/serializers/audio_serializers.py | 14 +++++++------- .../ingestion_server/elasticsearch_models.py | 12 ++++++++---- ingestion_server/ingestion_server/es_mapping.py | 1 + 5 files changed, 20 insertions(+), 12 deletions(-) diff --git a/api/catalog/api/constants/field_values.py b/api/catalog/api/constants/field_values.py index 23b1bc977..9f90144c5 100644 --- a/api/catalog/api/constants/field_values.py +++ b/api/catalog/api/constants/field_values.py @@ -25,7 +25,9 @@ "large", } -DURATION = { +LENGTHS = { + "shortest", "short", + "medium", "long", } diff --git a/api/catalog/api/controllers/search_controller.py b/api/catalog/api/controllers/search_controller.py index d825fad72..0a9fb9801 100644 --- a/api/catalog/api/controllers/search_controller.py +++ b/api/catalog/api/controllers/search_controller.py @@ -240,6 +240,7 @@ def search( ("extension", None), ("category", None), ("categories", "category"), + ("length", None), ("aspect_ratio", None), ("size", None), ("source", None), diff --git a/api/catalog/api/serializers/audio_serializers.py b/api/catalog/api/serializers/audio_serializers.py index d6be6feef..5b769a24b 100644 --- a/api/catalog/api/serializers/audio_serializers.py +++ b/api/catalog/api/serializers/audio_serializers.py @@ -2,7 +2,7 @@ from elasticsearch_dsl.response import Hit -from catalog.api.constants.field_values import AUDIO_CATEGORIES, DURATION +from catalog.api.constants.field_values import AUDIO_CATEGORIES, LENGTHS from catalog.api.docs.media_docs import fields_to_md from catalog.api.models import AudioReport from catalog.api.models.audio import Audio @@ -57,7 +57,7 @@ class AudioSearchRequestSerializer( *MediaSearchRequestSerializer.fields_names, *AudioSearchRequestSourceSerializer.field_names, "category", - "duration", + "length", ] """ Keep the fields names in sync with the actual fields below as this list is @@ -69,9 +69,9 @@ class AudioSearchRequestSerializer( help_text=make_comma_separated_help_text(AUDIO_CATEGORIES, "categories"), required=False, ) - duration = serializers.CharField( - label="duration", - help_text=make_comma_separated_help_text(DURATION, "audio lengths"), + length = serializers.CharField( + label="length", + help_text=make_comma_separated_help_text(LENGTHS, "audio lengths"), required=False, ) @@ -81,8 +81,8 @@ def validate_category(value): return value.lower() @staticmethod - def validate_duration(value): - _validate_enum("duration", DURATION, value) + def validate_length(value): + _validate_enum("length", LENGTHS, value) return value.lower() diff --git a/ingestion_server/ingestion_server/elasticsearch_models.py b/ingestion_server/ingestion_server/elasticsearch_models.py index d8616885b..3efe60cb8 100644 --- a/ingestion_server/ingestion_server/elasticsearch_models.py +++ b/ingestion_server/ingestion_server/elasticsearch_models.py @@ -304,9 +304,10 @@ class Durations(Enum): api/catalog/api/serializers/audio_serializers.py. """ - SHORT = 4 * 60 * 1e3 # under 4 minutes - MEDIUM = 20 * 60 * 1e3 # 4 - 20 minutes - LONG = float("inf") # longer than 20 minutes + SHORTEST = 30 * 1e3 # under 30 seconds + SHORT = 2 * 60 * 1e3 # 30 seconds - 2 minutes + MEDIUM = 10 * 60 * 1e3 # 2 - 10 minutes + LONG = float("inf") # longer than 10 minutes class Index: name = "audio" @@ -320,12 +321,15 @@ def database_row_to_elasticsearch_doc(row, schema): attrs = Audio.get_instance_attrs(row, schema) popularity = attrs["standardized_popularity"] + length = Audio.get_length(row[schema["duration"]]) + return Audio( bit_rate=row[schema["bit_rate"]], sample_rate=row[schema["sample_rate"]], genres=row[schema["genres"]], category=row[schema["category"]], duration=row[schema["duration"]], + length=length, authority_boost=authority_boost, max_boost=max(popularity or 1, authority_boost or 1), min_boost=min(popularity or 1, authority_boost or 1), @@ -333,7 +337,7 @@ def database_row_to_elasticsearch_doc(row, schema): ) @staticmethod - def get_duration(duration): + def get_length(duration): if not duration: return None for length in Audio.Durations: diff --git a/ingestion_server/ingestion_server/es_mapping.py b/ingestion_server/ingestion_server/es_mapping.py index e6058e990..0b8d6c7c4 100644 --- a/ingestion_server/ingestion_server/es_mapping.py +++ b/ingestion_server/ingestion_server/es_mapping.py @@ -137,6 +137,7 @@ def index_settings(table_name): "sample_rate": {"type": "integer"}, "genres": {"fields": {"keyword": {"type": "keyword"}}, "type": "text"}, "duration": {"type": "integer"}, + "length": {"type": "keyword"}, }, } media_mappings = common_mappings.copy()