From b1c4b6762dd163302233657f6d361c91fd01cee0 Mon Sep 17 00:00:00 2001 From: sarayourfriend <24264157+sarayourfriend@users.noreply.github.com> Date: Mon, 7 Feb 2022 09:18:29 -0500 Subject: [PATCH 01/10] Allow running specific tests --- justfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/justfile b/justfile index 9d350a18b..3e3cc6742 100644 --- a/justfile +++ b/justfile @@ -165,12 +165,12 @@ _api-install: "Waiting for the API to be healthy..." # Run API tests inside Docker -api-test args="": up wait-for-es wait-for-ing wait-for-web - docker-compose exec {{ args }} web ./test/run_test.sh +api-test docker_args="" tests="": up wait-for-es wait-for-ing wait-for-web + docker-compose exec {{ docker_args }} web ./test/run_test.sh {{ tests }} # Run API tests locally -api-testlocal: - cd api && pipenv run ./test/run_test.sh +api-testlocal args="": + cd api && pipenv run ./test/run_test.sh {{ args }} # Run Django administrative commands dj args="": From f53f3e8284e89e0cda11d9f6f9637caa9eb84167 Mon Sep 17 00:00:00 2001 From: sarayourfriend <24264157+sarayourfriend@users.noreply.github.com> Date: Mon, 7 Feb 2022 09:18:50 -0500 Subject: [PATCH 02/10] Document external binary and library dependencies --- CONTRIBUTING.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1a68d429c..09493db6e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -46,6 +46,12 @@ cd openverse_api pipenv install ``` +Note: The following non-Python libraries or binaries are transient dependencies which will also need to be present on your computer for the project to run and install as expected: + +* `librdkafka` +* `exempi` +* `audiowaveform` + 3. Launch a new shell session ``` From 32765cce0c49010c9f022d436e15e45331063b2b Mon Sep 17 00:00:00 2001 From: sarayourfriend <24264157+sarayourfriend@users.noreply.github.com> Date: Mon, 7 Feb 2022 09:20:19 -0500 Subject: [PATCH 03/10] Cache audio waveform in database --- .../api/migrations/0045_audiowaveformaddon.py | 28 ++++++++++++++ api/catalog/api/models/audio.py | 37 +++++++++++++++++++ api/catalog/api/models/media.py | 22 +++++++++++ api/catalog/api/utils/waveform.py | 12 ++++++ api/catalog/api/views/audio_views.py | 16 +------- api/test/unit/__init__.py | 0 api/test/unit/models/__init__.py | 0 api/test/unit/models/audio_test.py | 36 ++++++++++++++++++ 8 files changed, 136 insertions(+), 15 deletions(-) create mode 100644 api/catalog/api/migrations/0045_audiowaveformaddon.py create mode 100644 api/test/unit/__init__.py create mode 100644 api/test/unit/models/__init__.py create mode 100644 api/test/unit/models/audio_test.py diff --git a/api/catalog/api/migrations/0045_audiowaveformaddon.py b/api/catalog/api/migrations/0045_audiowaveformaddon.py new file mode 100644 index 000000000..03d7b2542 --- /dev/null +++ b/api/catalog/api/migrations/0045_audiowaveformaddon.py @@ -0,0 +1,28 @@ +# Generated by Django 3.2.9 on 2022-02-07 14:05 + +import django.contrib.postgres.fields +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('api', '0044_singular_category'), + ] + + operations = [ + migrations.CreateModel( + name='AudioWaveformAddOn', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('created_on', models.DateTimeField(auto_now_add=True)), + ('updated_on', models.DateTimeField(auto_now=True)), + ('peaks', django.contrib.postgres.fields.ArrayField(base_field=models.FloatField(), help_text='The waveform peaks. A list of floats in the range of 0 -> 1 inclusively.', size=1500)), + ('audio', models.OneToOneField(help_text='The foreign key of the audio.', on_delete=django.db.models.deletion.CASCADE, related_name='waveform', to='api.audio')), + ], + options={ + 'abstract': False, + }, + ), + ] diff --git a/api/catalog/api/models/audio.py b/api/catalog/api/models/audio.py index ca3bdb779..a9caa9392 100644 --- a/api/catalog/api/models/audio.py +++ b/api/catalog/api/models/audio.py @@ -5,10 +5,12 @@ AbstractDeletedMedia, AbstractMatureMedia, AbstractMedia, + AbstractMediaAddOn, AbstractMediaList, AbstractMediaReport, ) from catalog.api.models.mixins import FileMixin, ForeignIdentifierMixin, MediaMixin +from catalog.api.utils.waveform import generate_peaks from django.contrib.postgres.fields import ArrayField from django.db import models from uuslug import uuslug @@ -157,6 +159,18 @@ def audio_set(self): except AudioSet.DoesNotExist: return None + def get_or_create_waveform(self): + if hasattr(self, "waveform"): + return self.waveform.peaks + + peaks = generate_peaks(self) + + self.waveform = AudioWaveformAddOn(audio=self, peaks=peaks) + + self.waveform.save() + + return self.waveform.peaks + class Meta(AbstractMedia.Meta): db_table = "audio" @@ -209,3 +223,26 @@ class Meta: def save(self, *args, **kwargs): self.slug = uuslug(self.title, instance=self) super(AudioList, self).save(*args, **kwargs) + + +class AudioWaveformAddOn(AbstractMediaAddOn): + audio = models.OneToOneField( + to=Audio, + on_delete=models.CASCADE, + related_name="waveform", + help_text=("The foreign key of the audio."), + ) + + peaks = ArrayField( + base_field=models.FloatField(), + # The approximate resolution of waveform generation + # results in _about_ 1000 peaks. We use 1500 to give + # sufficient wiggle room should we have any outlier + # files pop up. + # https://github.com/WordPress/openverse-api/blob/a7955c86d43bff504e8d41454f68717d79dd3a44/api/catalog/api/utils/waveform.py#L71 + size=1500, + help_text=( + "The waveform peaks. A list of floats in" + " the range of 0 -> 1 inclusively." + ), + ) diff --git a/api/catalog/api/models/media.py b/api/catalog/api/models/media.py index 5a8ad9449..f90fb1503 100644 --- a/api/catalog/api/models/media.py +++ b/api/catalog/api/models/media.py @@ -1,4 +1,5 @@ import mimetypes +from abc import abstractmethod import catalog.api.controllers.search_controller as search_controller from catalog.api.licenses import ATTRIBUTION, get_license_url @@ -23,6 +24,27 @@ OTHER = "other" +class AbstractMediaAddOn(OpenLedgerModel): + """ + Base class for "add-on" data for a particular media entry. + + These models are appropriate places to store generated and infinitely + cacheable data that is not retrieved or generated at ingestion time. + + For example, audio waveforms are generated by the API, but can be + cached indefinitely. + """ + + @property + @abstractmethod + def identifier(): + """A foreign key to the media having data added-on""" + ... + + class Meta: + abstract = True + + class AbstractMedia( IdentifierMixin, ForeignIdentifierMixin, MediaMixin, OpenLedgerModel ): diff --git a/api/catalog/api/utils/waveform.py b/api/catalog/api/utils/waveform.py index 4f2dd79bb..821fd3c99 100644 --- a/api/catalog/api/utils/waveform.py +++ b/api/catalog/api/utils/waveform.py @@ -6,6 +6,7 @@ import pathlib import shutil import subprocess +from typing import List import requests @@ -128,3 +129,14 @@ def cleanup(file_name): os.remove(file_path) else: log.info("File not found, nothing deleted") + + +def generate_peaks(audio) -> List[float]: + file_name = None + try: + file_name = download_audio(audio.url, audio.identifier) + awf_out = generate_waveform(file_name, audio.duration) + return process_waveform_output(awf_out) + finally: + if file_name is not None: + cleanup(file_name) diff --git a/api/catalog/api/views/audio_views.py b/api/catalog/api/views/audio_views.py index 61ecc0bde..9bf2b7021 100644 --- a/api/catalog/api/views/audio_views.py +++ b/api/catalog/api/views/audio_views.py @@ -16,12 +16,6 @@ ) from catalog.api.utils.exceptions import get_api_exception from catalog.api.utils.throttle import OneThousandPerMinute -from catalog.api.utils.waveform import ( - cleanup, - download_audio, - generate_waveform, - process_waveform_output, -) from catalog.api.views.media_views import MediaViewSet from django.utils.decorators import method_decorator from drf_yasg.utils import swagger_auto_schema @@ -84,21 +78,13 @@ def thumbnail(self, request, *_, **__): def waveform(self, *_, **__): audio = self.get_object() - file_name = None try: - file_name = download_audio(audio.url, audio.identifier) - awf_out = generate_waveform(file_name, audio.duration) - data = process_waveform_output(awf_out) - - obj = {"points": data} + obj = {"points": audio.get_or_create_waveform()} serializer = self.get_serializer(obj) return Response(status=200, data=serializer.data) except Exception as e: raise get_api_exception(getattr(e, "message", str(e))) - finally: - if file_name is not None: - cleanup(file_name) @action(detail=True, methods=["post"], serializer_class=AudioReportSerializer) def report(self, *args, **kwargs): diff --git a/api/test/unit/__init__.py b/api/test/unit/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/api/test/unit/models/__init__.py b/api/test/unit/models/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/api/test/unit/models/audio_test.py b/api/test/unit/models/audio_test.py new file mode 100644 index 000000000..97e0f2995 --- /dev/null +++ b/api/test/unit/models/audio_test.py @@ -0,0 +1,36 @@ +import uuid +from unittest import mock + +import pytest +from catalog.api.models.audio import Audio, AudioWaveformAddOn + + +@pytest.fixture +@pytest.mark.django_db +def audio_fixture(): + audio = Audio( + identifier=uuid.uuid4(), + ) + + audio.save() + + return audio + + +@pytest.mark.django_db +@mock.patch("catalog.api.models.audio.generate_peaks") +def test_audio_waveform_caches(generate_peaks_mock, audio_fixture): + mock_peaks = [0.4, 0.3, 0.1, 0, 1, 0.6] + generate_peaks_mock.return_value = mock_peaks + + assert not hasattr(audio_fixture, "waveform") + assert audio_fixture.get_or_create_waveform() == mock_peaks + assert hasattr(audio_fixture, "waveform") + assert audio_fixture.waveform.peaks == mock_peaks + assert audio_fixture.get_or_create_waveform() == mock_peaks + # Should only be called once if Audio.get_or_create_waveform is using the DB value on subsequent calls + generate_peaks_mock.assert_called_once() + + # Ensure the waveform addon was saved + waveform = AudioWaveformAddOn.objects.get(audio=audio_fixture) + assert waveform.peaks == mock_peaks From 049a0835a3b69c0c743e96dec2cdd175fc69ee79 Mon Sep 17 00:00:00 2001 From: sarayourfriend <24264157+sarayourfriend@users.noreply.github.com> Date: Mon, 7 Feb 2022 09:26:47 -0500 Subject: [PATCH 04/10] Remove unnecessary abstractmethod --- api/catalog/api/models/media.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/api/catalog/api/models/media.py b/api/catalog/api/models/media.py index f90fb1503..b653a2249 100644 --- a/api/catalog/api/models/media.py +++ b/api/catalog/api/models/media.py @@ -1,5 +1,4 @@ import mimetypes -from abc import abstractmethod import catalog.api.controllers.search_controller as search_controller from catalog.api.licenses import ATTRIBUTION, get_license_url @@ -35,12 +34,6 @@ class AbstractMediaAddOn(OpenLedgerModel): cached indefinitely. """ - @property - @abstractmethod - def identifier(): - """A foreign key to the media having data added-on""" - ... - class Meta: abstract = True From 6e0b589bf2827fe0cf55479ec1bbbae78088b525 Mon Sep 17 00:00:00 2001 From: sarayourfriend <24264157+sarayourfriend@users.noreply.github.com> Date: Wed, 9 Feb 2022 11:47:47 -0500 Subject: [PATCH 05/10] Add waveform directly to Audio model --- .../api/migrations/0045_audio_waveform.py | 19 +++++++ .../api/migrations/0045_audiowaveformaddon.py | 28 ---------- api/catalog/api/models/audio.py | 52 ++++++++----------- api/catalog/api/models/media.py | 15 ------ api/test/unit/models/audio_test.py | 24 ++++----- 5 files changed, 52 insertions(+), 86 deletions(-) create mode 100644 api/catalog/api/migrations/0045_audio_waveform.py delete mode 100644 api/catalog/api/migrations/0045_audiowaveformaddon.py diff --git a/api/catalog/api/migrations/0045_audio_waveform.py b/api/catalog/api/migrations/0045_audio_waveform.py new file mode 100644 index 000000000..7a3913ec6 --- /dev/null +++ b/api/catalog/api/migrations/0045_audio_waveform.py @@ -0,0 +1,19 @@ +# Generated by Django 3.2.9 on 2022-02-09 16:45 + +import django.contrib.postgres.fields +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('api', '0044_singular_category'), + ] + + operations = [ + migrations.AddField( + model_name='audio', + name='waveform', + field=django.contrib.postgres.fields.ArrayField(base_field=models.FloatField(), blank=True, help_text='The waveform peaks. A list of floats in the range of 0 -> 1 inclusively.', null=True, size=1500), + ), + ] diff --git a/api/catalog/api/migrations/0045_audiowaveformaddon.py b/api/catalog/api/migrations/0045_audiowaveformaddon.py deleted file mode 100644 index 03d7b2542..000000000 --- a/api/catalog/api/migrations/0045_audiowaveformaddon.py +++ /dev/null @@ -1,28 +0,0 @@ -# Generated by Django 3.2.9 on 2022-02-07 14:05 - -import django.contrib.postgres.fields -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - - dependencies = [ - ('api', '0044_singular_category'), - ] - - operations = [ - migrations.CreateModel( - name='AudioWaveformAddOn', - fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('created_on', models.DateTimeField(auto_now_add=True)), - ('updated_on', models.DateTimeField(auto_now=True)), - ('peaks', django.contrib.postgres.fields.ArrayField(base_field=models.FloatField(), help_text='The waveform peaks. A list of floats in the range of 0 -> 1 inclusively.', size=1500)), - ('audio', models.OneToOneField(help_text='The foreign key of the audio.', on_delete=django.db.models.deletion.CASCADE, related_name='waveform', to='api.audio')), - ], - options={ - 'abstract': False, - }, - ), - ] diff --git a/api/catalog/api/models/audio.py b/api/catalog/api/models/audio.py index a9caa9392..0ffd4eb1e 100644 --- a/api/catalog/api/models/audio.py +++ b/api/catalog/api/models/audio.py @@ -5,7 +5,6 @@ AbstractDeletedMedia, AbstractMatureMedia, AbstractMedia, - AbstractMediaAddOn, AbstractMediaList, AbstractMediaReport, ) @@ -159,17 +158,31 @@ def audio_set(self): except AudioSet.DoesNotExist: return None - def get_or_create_waveform(self): - if hasattr(self, "waveform"): - return self.waveform.peaks + waveform = ArrayField( + base_field=models.FloatField(), + # The approximate resolution of waveform generation + # results in _about_ 1000 peaks. We use 1500 to give + # sufficient wiggle room should we have any outlier + # files pop up. + # https://github.com/WordPress/openverse-api/blob/a7955c86d43bff504e8d41454f68717d79dd3a44/api/catalog/api/utils/waveform.py#L71 + size=1500, + help_text=( + "The waveform peaks. A list of floats in" + " the range of 0 -> 1 inclusively." + ), + blank=True, + null=True, + ) - peaks = generate_peaks(self) + def get_or_create_waveform(self): + if self.waveform is not None: + return self.waveform - self.waveform = AudioWaveformAddOn(audio=self, peaks=peaks) + self.waveform = generate_peaks(self) - self.waveform.save() + self.save() - return self.waveform.peaks + return self.waveform class Meta(AbstractMedia.Meta): db_table = "audio" @@ -223,26 +236,3 @@ class Meta: def save(self, *args, **kwargs): self.slug = uuslug(self.title, instance=self) super(AudioList, self).save(*args, **kwargs) - - -class AudioWaveformAddOn(AbstractMediaAddOn): - audio = models.OneToOneField( - to=Audio, - on_delete=models.CASCADE, - related_name="waveform", - help_text=("The foreign key of the audio."), - ) - - peaks = ArrayField( - base_field=models.FloatField(), - # The approximate resolution of waveform generation - # results in _about_ 1000 peaks. We use 1500 to give - # sufficient wiggle room should we have any outlier - # files pop up. - # https://github.com/WordPress/openverse-api/blob/a7955c86d43bff504e8d41454f68717d79dd3a44/api/catalog/api/utils/waveform.py#L71 - size=1500, - help_text=( - "The waveform peaks. A list of floats in" - " the range of 0 -> 1 inclusively." - ), - ) diff --git a/api/catalog/api/models/media.py b/api/catalog/api/models/media.py index b653a2249..5a8ad9449 100644 --- a/api/catalog/api/models/media.py +++ b/api/catalog/api/models/media.py @@ -23,21 +23,6 @@ OTHER = "other" -class AbstractMediaAddOn(OpenLedgerModel): - """ - Base class for "add-on" data for a particular media entry. - - These models are appropriate places to store generated and infinitely - cacheable data that is not retrieved or generated at ingestion time. - - For example, audio waveforms are generated by the API, but can be - cached indefinitely. - """ - - class Meta: - abstract = True - - class AbstractMedia( IdentifierMixin, ForeignIdentifierMixin, MediaMixin, OpenLedgerModel ): diff --git a/api/test/unit/models/audio_test.py b/api/test/unit/models/audio_test.py index 97e0f2995..a9108ccec 100644 --- a/api/test/unit/models/audio_test.py +++ b/api/test/unit/models/audio_test.py @@ -2,7 +2,7 @@ from unittest import mock import pytest -from catalog.api.models.audio import Audio, AudioWaveformAddOn +from catalog.api.models.audio import Audio @pytest.fixture @@ -20,17 +20,17 @@ def audio_fixture(): @pytest.mark.django_db @mock.patch("catalog.api.models.audio.generate_peaks") def test_audio_waveform_caches(generate_peaks_mock, audio_fixture): - mock_peaks = [0.4, 0.3, 0.1, 0, 1, 0.6] - generate_peaks_mock.return_value = mock_peaks - - assert not hasattr(audio_fixture, "waveform") - assert audio_fixture.get_or_create_waveform() == mock_peaks - assert hasattr(audio_fixture, "waveform") - assert audio_fixture.waveform.peaks == mock_peaks - assert audio_fixture.get_or_create_waveform() == mock_peaks + mock_waveform = [0.4, 0.3, 0.1, 0, 1, 0.6] + generate_peaks_mock.return_value = mock_waveform + + assert audio_fixture.waveform is None + assert audio_fixture.get_or_create_waveform() == mock_waveform + assert audio_fixture.waveform is not None + assert audio_fixture.waveform == mock_waveform + assert audio_fixture.get_or_create_waveform() == mock_waveform # Should only be called once if Audio.get_or_create_waveform is using the DB value on subsequent calls generate_peaks_mock.assert_called_once() - # Ensure the waveform addon was saved - waveform = AudioWaveformAddOn.objects.get(audio=audio_fixture) - assert waveform.peaks == mock_peaks + # Ensure the waveform was saved + saved_audio = Audio.objects.get(pk=audio_fixture.pk, waveform__isnull=False) + assert saved_audio == audio_fixture From 9d471f5d94e4076a0ab60f6395db8dce32863c8f Mon Sep 17 00:00:00 2001 From: sarayourfriend <24264157+sarayourfriend@users.noreply.github.com> Date: Mon, 14 Feb 2022 18:32:57 -0500 Subject: [PATCH 06/10] Enable not following logs --- justfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/justfile b/justfile index 3e3cc6742..7a89759e7 100644 --- a/justfile +++ b/justfile @@ -44,8 +44,8 @@ recreate: @just up "--force-recreate --build" # Show logs of all, or named, Docker services -logs services="": - docker-compose {{ DOCKER_FILE }} logs -f {{ services }} +logs services="" args="-f": + docker-compose {{ DOCKER_FILE }} logs {{ args }} {{ services }} ######## From 7a0e29d7bc3f5314b54b8b2e395048005547305b Mon Sep 17 00:00:00 2001 From: sarayourfriend <24264157+sarayourfriend@users.noreply.github.com> Date: Mon, 14 Feb 2022 18:33:16 -0500 Subject: [PATCH 07/10] Switch back to side-table for audio waveform data --- .../api/migrations/0045_audio_waveform.py | 19 ------ api/catalog/api/migrations/0045_audioaddon.py | 27 ++++++++ api/catalog/api/models/audio.py | 68 +++++++++++++------ api/test/unit/models/audio_test.py | 13 ++-- 4 files changed, 79 insertions(+), 48 deletions(-) delete mode 100644 api/catalog/api/migrations/0045_audio_waveform.py create mode 100644 api/catalog/api/migrations/0045_audioaddon.py diff --git a/api/catalog/api/migrations/0045_audio_waveform.py b/api/catalog/api/migrations/0045_audio_waveform.py deleted file mode 100644 index 7a3913ec6..000000000 --- a/api/catalog/api/migrations/0045_audio_waveform.py +++ /dev/null @@ -1,19 +0,0 @@ -# Generated by Django 3.2.9 on 2022-02-09 16:45 - -import django.contrib.postgres.fields -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('api', '0044_singular_category'), - ] - - operations = [ - migrations.AddField( - model_name='audio', - name='waveform', - field=django.contrib.postgres.fields.ArrayField(base_field=models.FloatField(), blank=True, help_text='The waveform peaks. A list of floats in the range of 0 -> 1 inclusively.', null=True, size=1500), - ), - ] diff --git a/api/catalog/api/migrations/0045_audioaddon.py b/api/catalog/api/migrations/0045_audioaddon.py new file mode 100644 index 000000000..807ab3e35 --- /dev/null +++ b/api/catalog/api/migrations/0045_audioaddon.py @@ -0,0 +1,27 @@ +# Generated by Django 3.2.9 on 2022-02-14 23:22 + +import django.contrib.postgres.fields +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('api', '0044_singular_category'), + ] + + operations = [ + migrations.CreateModel( + name='AudioAddOn', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('created_on', models.DateTimeField(auto_now_add=True)), + ('updated_on', models.DateTimeField(auto_now=True)), + ('audio_identifier', models.UUIDField(db_index=True, help_text='The identifier of the audio object.', unique=True)), + ('waveform_peaks', django.contrib.postgres.fields.ArrayField(base_field=models.FloatField(), blank=True, help_text='The waveform peaks. A list of floats in the range of 0 -> 1 inclusively.', null=True, size=1500)), + ], + options={ + 'abstract': False, + }, + ), + ] diff --git a/api/catalog/api/models/audio.py b/api/catalog/api/models/audio.py index 0ffd4eb1e..0097e259d 100644 --- a/api/catalog/api/models/audio.py +++ b/api/catalog/api/models/audio.py @@ -97,6 +97,46 @@ class Meta: abstract = True +class AudioAddOn(OpenLedgerModel): + audio_identifier = models.UUIDField( + db_index=True, + unique=True, + blank=False, + null=False, + help_text=( + "The identifier of the audio object." + ) + ) + """ + This cannot be a "ForeignKey" or "OneToOneRel" because the refresh process + wipes out the Audio table completely and recreates it. If we made these a FK + or OneToOneRel there'd be foreign key constraint added that would be violated + when the Audio table is recreated. + + The index is necessary as this column is used by the Audio object to query + for the relevant add on. + + The refresh process will also eventually include cleaning up any potentially + dangling audio_add_on rows. + """ + + waveform_peaks = ArrayField( + base_field=models.FloatField(), + # The approximate resolution of waveform generation + # results in _about_ 1000 peaks. We use 1500 to give + # sufficient wiggle room should we have any outlier + # files pop up. + # https://github.com/WordPress/openverse-api/blob/a7955c86d43bff504e8d41454f68717d79dd3a44/api/catalog/api/utils/waveform.py#L71 + size=1500, + help_text=( + "The waveform peaks. A list of floats in" + " the range of 0 -> 1 inclusively." + ), + blank=True, + null=True, + ) + + class Audio(AudioFileMixin, AbstractMedia): """ Inherited fields @@ -158,31 +198,17 @@ def audio_set(self): except AudioSet.DoesNotExist: return None - waveform = ArrayField( - base_field=models.FloatField(), - # The approximate resolution of waveform generation - # results in _about_ 1000 peaks. We use 1500 to give - # sufficient wiggle room should we have any outlier - # files pop up. - # https://github.com/WordPress/openverse-api/blob/a7955c86d43bff504e8d41454f68717d79dd3a44/api/catalog/api/utils/waveform.py#L71 - size=1500, - help_text=( - "The waveform peaks. A list of floats in" - " the range of 0 -> 1 inclusively." - ), - blank=True, - null=True, - ) - def get_or_create_waveform(self): - if self.waveform is not None: - return self.waveform + add_on, _ = AudioAddOn.objects.get_or_create(audio_identifier=self.identifier) + + if add_on.waveform_peaks is not None: + return add_on.waveform_peaks - self.waveform = generate_peaks(self) + add_on.waveform_peaks = generate_peaks(self) - self.save() + add_on.save() - return self.waveform + return add_on.waveform_peaks class Meta(AbstractMedia.Meta): db_table = "audio" diff --git a/api/test/unit/models/audio_test.py b/api/test/unit/models/audio_test.py index a9108ccec..5fbdaafc2 100644 --- a/api/test/unit/models/audio_test.py +++ b/api/test/unit/models/audio_test.py @@ -2,7 +2,7 @@ from unittest import mock import pytest -from catalog.api.models.audio import Audio +from catalog.api.models.audio import Audio, AudioAddOn @pytest.fixture @@ -23,14 +23,11 @@ def test_audio_waveform_caches(generate_peaks_mock, audio_fixture): mock_waveform = [0.4, 0.3, 0.1, 0, 1, 0.6] generate_peaks_mock.return_value = mock_waveform - assert audio_fixture.waveform is None + assert AudioAddOn.objects.count() == 0 assert audio_fixture.get_or_create_waveform() == mock_waveform - assert audio_fixture.waveform is not None - assert audio_fixture.waveform == mock_waveform + assert AudioAddOn.objects.count() == 1 + # Ensure the waveform was saved + assert AudioAddOn.objects.get(audio_identifier=audio_fixture.identifier).waveform_peaks == mock_waveform assert audio_fixture.get_or_create_waveform() == mock_waveform # Should only be called once if Audio.get_or_create_waveform is using the DB value on subsequent calls generate_peaks_mock.assert_called_once() - - # Ensure the waveform was saved - saved_audio = Audio.objects.get(pk=audio_fixture.pk, waveform__isnull=False) - assert saved_audio == audio_fixture From 50006f7656145c53fbcafd59c919cec02c64963d Mon Sep 17 00:00:00 2001 From: sarayourfriend <24264157+sarayourfriend@users.noreply.github.com> Date: Mon, 14 Feb 2022 18:35:33 -0500 Subject: [PATCH 08/10] Ensure refresh will not cause constraint errors --- api/test/unit/models/audio_test.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/api/test/unit/models/audio_test.py b/api/test/unit/models/audio_test.py index 5fbdaafc2..16d0bd6d7 100644 --- a/api/test/unit/models/audio_test.py +++ b/api/test/unit/models/audio_test.py @@ -31,3 +31,8 @@ def test_audio_waveform_caches(generate_peaks_mock, audio_fixture): assert audio_fixture.get_or_create_waveform() == mock_waveform # Should only be called once if Audio.get_or_create_waveform is using the DB value on subsequent calls generate_peaks_mock.assert_called_once() + + # Ensure there are no foreign constraints on the AudioAddOn that would cause failures during refresh + audio_fixture.delete() + + assert AudioAddOn.objects.count() == 1 From 1731209665a9a63d809abf58df96dec145b5017c Mon Sep 17 00:00:00 2001 From: sarayourfriend <24264157+sarayourfriend@users.noreply.github.com> Date: Tue, 15 Feb 2022 11:48:17 -0500 Subject: [PATCH 09/10] Apply linting changes --- api/catalog/api/models/audio.py | 4 +--- api/test/unit/models/audio_test.py | 7 +++++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/api/catalog/api/models/audio.py b/api/catalog/api/models/audio.py index 0097e259d..9560c0d98 100644 --- a/api/catalog/api/models/audio.py +++ b/api/catalog/api/models/audio.py @@ -103,9 +103,7 @@ class AudioAddOn(OpenLedgerModel): unique=True, blank=False, null=False, - help_text=( - "The identifier of the audio object." - ) + help_text=("The identifier of the audio object."), ) """ This cannot be a "ForeignKey" or "OneToOneRel" because the refresh process diff --git a/api/test/unit/models/audio_test.py b/api/test/unit/models/audio_test.py index 16d0bd6d7..a32e2bcdd 100644 --- a/api/test/unit/models/audio_test.py +++ b/api/test/unit/models/audio_test.py @@ -27,7 +27,10 @@ def test_audio_waveform_caches(generate_peaks_mock, audio_fixture): assert audio_fixture.get_or_create_waveform() == mock_waveform assert AudioAddOn.objects.count() == 1 # Ensure the waveform was saved - assert AudioAddOn.objects.get(audio_identifier=audio_fixture.identifier).waveform_peaks == mock_waveform + assert ( + AudioAddOn.objects.get(audio_identifier=audio_fixture.identifier).waveform_peaks + == mock_waveform + ) assert audio_fixture.get_or_create_waveform() == mock_waveform # Should only be called once if Audio.get_or_create_waveform is using the DB value on subsequent calls generate_peaks_mock.assert_called_once() @@ -35,4 +38,4 @@ def test_audio_waveform_caches(generate_peaks_mock, audio_fixture): # Ensure there are no foreign constraints on the AudioAddOn that would cause failures during refresh audio_fixture.delete() - assert AudioAddOn.objects.count() == 1 + assert AudioAddOn.objects.count() == 1 From 8423ab89bf1c688f987c6fde0f60b937c663f2fa Mon Sep 17 00:00:00 2001 From: sarayourfriend <24264157+sarayourfriend@users.noreply.github.com> Date: Wed, 16 Feb 2022 14:30:20 -0500 Subject: [PATCH 10/10] Use primary_key flag for audio id field on the add on --- api/catalog/api/migrations/0045_audioaddon.py | 7 +++---- api/catalog/api/models/audio.py | 7 +------ 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/api/catalog/api/migrations/0045_audioaddon.py b/api/catalog/api/migrations/0045_audioaddon.py index 807ab3e35..e34286a79 100644 --- a/api/catalog/api/migrations/0045_audioaddon.py +++ b/api/catalog/api/migrations/0045_audioaddon.py @@ -1,4 +1,4 @@ -# Generated by Django 3.2.9 on 2022-02-14 23:22 +# Generated by Django 3.2.12 on 2022-02-16 19:28 import django.contrib.postgres.fields from django.db import migrations, models @@ -14,11 +14,10 @@ class Migration(migrations.Migration): migrations.CreateModel( name='AudioAddOn', fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('created_on', models.DateTimeField(auto_now_add=True)), ('updated_on', models.DateTimeField(auto_now=True)), - ('audio_identifier', models.UUIDField(db_index=True, help_text='The identifier of the audio object.', unique=True)), - ('waveform_peaks', django.contrib.postgres.fields.ArrayField(base_field=models.FloatField(), blank=True, help_text='The waveform peaks. A list of floats in the range of 0 -> 1 inclusively.', null=True, size=1500)), + ('audio_identifier', models.UUIDField(help_text='The identifier of the audio object.', primary_key=True, serialize=False)), + ('waveform_peaks', django.contrib.postgres.fields.ArrayField(base_field=models.FloatField(), help_text='The waveform peaks. A list of floats in the range of 0 -> 1 inclusively.', null=True, size=1500)), ], options={ 'abstract': False, diff --git a/api/catalog/api/models/audio.py b/api/catalog/api/models/audio.py index 9560c0d98..3b8d8ffd4 100644 --- a/api/catalog/api/models/audio.py +++ b/api/catalog/api/models/audio.py @@ -99,10 +99,7 @@ class Meta: class AudioAddOn(OpenLedgerModel): audio_identifier = models.UUIDField( - db_index=True, - unique=True, - blank=False, - null=False, + primary_key=True, help_text=("The identifier of the audio object."), ) """ @@ -130,7 +127,6 @@ class AudioAddOn(OpenLedgerModel): "The waveform peaks. A list of floats in" " the range of 0 -> 1 inclusively." ), - blank=True, null=True, ) @@ -203,7 +199,6 @@ def get_or_create_waveform(self): return add_on.waveform_peaks add_on.waveform_peaks = generate_peaks(self) - add_on.save() return add_on.waveform_peaks