diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1a68d429c..09493db6e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -46,6 +46,12 @@ cd openverse_api pipenv install ``` +Note: The following non-Python libraries or binaries are transient dependencies which will also need to be present on your computer for the project to run and install as expected: + +* `librdkafka` +* `exempi` +* `audiowaveform` + 3. Launch a new shell session ``` diff --git a/api/catalog/api/migrations/0045_audioaddon.py b/api/catalog/api/migrations/0045_audioaddon.py new file mode 100644 index 000000000..e34286a79 --- /dev/null +++ b/api/catalog/api/migrations/0045_audioaddon.py @@ -0,0 +1,26 @@ +# Generated by Django 3.2.12 on 2022-02-16 19:28 + +import django.contrib.postgres.fields +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('api', '0044_singular_category'), + ] + + operations = [ + migrations.CreateModel( + name='AudioAddOn', + fields=[ + ('created_on', models.DateTimeField(auto_now_add=True)), + ('updated_on', models.DateTimeField(auto_now=True)), + ('audio_identifier', models.UUIDField(help_text='The identifier of the audio object.', primary_key=True, serialize=False)), + ('waveform_peaks', django.contrib.postgres.fields.ArrayField(base_field=models.FloatField(), help_text='The waveform peaks. A list of floats in the range of 0 -> 1 inclusively.', null=True, size=1500)), + ], + options={ + 'abstract': False, + }, + ), + ] diff --git a/api/catalog/api/models/audio.py b/api/catalog/api/models/audio.py index ca3bdb779..3b8d8ffd4 100644 --- a/api/catalog/api/models/audio.py +++ b/api/catalog/api/models/audio.py @@ -9,6 +9,7 @@ AbstractMediaReport, ) from catalog.api.models.mixins import FileMixin, ForeignIdentifierMixin, MediaMixin +from catalog.api.utils.waveform import generate_peaks from django.contrib.postgres.fields import ArrayField from django.db import models from uuslug import uuslug @@ -96,6 +97,40 @@ class Meta: abstract = True +class AudioAddOn(OpenLedgerModel): + audio_identifier = models.UUIDField( + primary_key=True, + help_text=("The identifier of the audio object."), + ) + """ + This cannot be a "ForeignKey" or "OneToOneRel" because the refresh process + wipes out the Audio table completely and recreates it. If we made these a FK + or OneToOneRel there'd be foreign key constraint added that would be violated + when the Audio table is recreated. + + The index is necessary as this column is used by the Audio object to query + for the relevant add on. + + The refresh process will also eventually include cleaning up any potentially + dangling audio_add_on rows. + """ + + waveform_peaks = ArrayField( + base_field=models.FloatField(), + # The approximate resolution of waveform generation + # results in _about_ 1000 peaks. We use 1500 to give + # sufficient wiggle room should we have any outlier + # files pop up. + # https://github.com/WordPress/openverse-api/blob/a7955c86d43bff504e8d41454f68717d79dd3a44/api/catalog/api/utils/waveform.py#L71 + size=1500, + help_text=( + "The waveform peaks. A list of floats in" + " the range of 0 -> 1 inclusively." + ), + null=True, + ) + + class Audio(AudioFileMixin, AbstractMedia): """ Inherited fields @@ -157,6 +192,17 @@ def audio_set(self): except AudioSet.DoesNotExist: return None + def get_or_create_waveform(self): + add_on, _ = AudioAddOn.objects.get_or_create(audio_identifier=self.identifier) + + if add_on.waveform_peaks is not None: + return add_on.waveform_peaks + + add_on.waveform_peaks = generate_peaks(self) + add_on.save() + + return add_on.waveform_peaks + class Meta(AbstractMedia.Meta): db_table = "audio" diff --git a/api/catalog/api/utils/waveform.py b/api/catalog/api/utils/waveform.py index 4f2dd79bb..821fd3c99 100644 --- a/api/catalog/api/utils/waveform.py +++ b/api/catalog/api/utils/waveform.py @@ -6,6 +6,7 @@ import pathlib import shutil import subprocess +from typing import List import requests @@ -128,3 +129,14 @@ def cleanup(file_name): os.remove(file_path) else: log.info("File not found, nothing deleted") + + +def generate_peaks(audio) -> List[float]: + file_name = None + try: + file_name = download_audio(audio.url, audio.identifier) + awf_out = generate_waveform(file_name, audio.duration) + return process_waveform_output(awf_out) + finally: + if file_name is not None: + cleanup(file_name) diff --git a/api/catalog/api/views/audio_views.py b/api/catalog/api/views/audio_views.py index 61ecc0bde..9bf2b7021 100644 --- a/api/catalog/api/views/audio_views.py +++ b/api/catalog/api/views/audio_views.py @@ -16,12 +16,6 @@ ) from catalog.api.utils.exceptions import get_api_exception from catalog.api.utils.throttle import OneThousandPerMinute -from catalog.api.utils.waveform import ( - cleanup, - download_audio, - generate_waveform, - process_waveform_output, -) from catalog.api.views.media_views import MediaViewSet from django.utils.decorators import method_decorator from drf_yasg.utils import swagger_auto_schema @@ -84,21 +78,13 @@ def thumbnail(self, request, *_, **__): def waveform(self, *_, **__): audio = self.get_object() - file_name = None try: - file_name = download_audio(audio.url, audio.identifier) - awf_out = generate_waveform(file_name, audio.duration) - data = process_waveform_output(awf_out) - - obj = {"points": data} + obj = {"points": audio.get_or_create_waveform()} serializer = self.get_serializer(obj) return Response(status=200, data=serializer.data) except Exception as e: raise get_api_exception(getattr(e, "message", str(e))) - finally: - if file_name is not None: - cleanup(file_name) @action(detail=True, methods=["post"], serializer_class=AudioReportSerializer) def report(self, *args, **kwargs): diff --git a/api/test/unit/__init__.py b/api/test/unit/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/api/test/unit/models/__init__.py b/api/test/unit/models/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/api/test/unit/models/audio_test.py b/api/test/unit/models/audio_test.py new file mode 100644 index 000000000..a32e2bcdd --- /dev/null +++ b/api/test/unit/models/audio_test.py @@ -0,0 +1,41 @@ +import uuid +from unittest import mock + +import pytest +from catalog.api.models.audio import Audio, AudioAddOn + + +@pytest.fixture +@pytest.mark.django_db +def audio_fixture(): + audio = Audio( + identifier=uuid.uuid4(), + ) + + audio.save() + + return audio + + +@pytest.mark.django_db +@mock.patch("catalog.api.models.audio.generate_peaks") +def test_audio_waveform_caches(generate_peaks_mock, audio_fixture): + mock_waveform = [0.4, 0.3, 0.1, 0, 1, 0.6] + generate_peaks_mock.return_value = mock_waveform + + assert AudioAddOn.objects.count() == 0 + assert audio_fixture.get_or_create_waveform() == mock_waveform + assert AudioAddOn.objects.count() == 1 + # Ensure the waveform was saved + assert ( + AudioAddOn.objects.get(audio_identifier=audio_fixture.identifier).waveform_peaks + == mock_waveform + ) + assert audio_fixture.get_or_create_waveform() == mock_waveform + # Should only be called once if Audio.get_or_create_waveform is using the DB value on subsequent calls + generate_peaks_mock.assert_called_once() + + # Ensure there are no foreign constraints on the AudioAddOn that would cause failures during refresh + audio_fixture.delete() + + assert AudioAddOn.objects.count() == 1 diff --git a/justfile b/justfile index 9d350a18b..7a89759e7 100644 --- a/justfile +++ b/justfile @@ -44,8 +44,8 @@ recreate: @just up "--force-recreate --build" # Show logs of all, or named, Docker services -logs services="": - docker-compose {{ DOCKER_FILE }} logs -f {{ services }} +logs services="" args="-f": + docker-compose {{ DOCKER_FILE }} logs {{ args }} {{ services }} ######## @@ -165,12 +165,12 @@ _api-install: "Waiting for the API to be healthy..." # Run API tests inside Docker -api-test args="": up wait-for-es wait-for-ing wait-for-web - docker-compose exec {{ args }} web ./test/run_test.sh +api-test docker_args="" tests="": up wait-for-es wait-for-ing wait-for-web + docker-compose exec {{ docker_args }} web ./test/run_test.sh {{ tests }} # Run API tests locally -api-testlocal: - cd api && pipenv run ./test/run_test.sh +api-testlocal args="": + cd api && pipenv run ./test/run_test.sh {{ args }} # Run Django administrative commands dj args="":