Skip to content
This repository has been archived by the owner on Feb 22, 2023. It is now read-only.

Commit

Permalink
Cache waveform data in database (#510)
Browse files Browse the repository at this point in the history
* Allow running specific tests

* Document external binary and library dependencies

* Cache audio waveform in database

* Remove unnecessary abstractmethod

* Add waveform directly to Audio model

* Enable not following logs

* Switch back to side-table for audio waveform data

* Ensure refresh will not cause constraint errors

* Apply linting changes

* Use primary_key flag for audio id field on the add on
  • Loading branch information
sarayourfriend authored Feb 17, 2022
1 parent 0fa3575 commit 24a9bf1
Show file tree
Hide file tree
Showing 9 changed files with 138 additions and 21 deletions.
6 changes: 6 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ cd openverse_api
pipenv install
```

Note: The following non-Python libraries or binaries are transient dependencies which will also need to be present on your computer for the project to run and install as expected:

* `librdkafka`
* `exempi`
* `audiowaveform`

3. Launch a new shell session

```
Expand Down
26 changes: 26 additions & 0 deletions api/catalog/api/migrations/0045_audioaddon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Generated by Django 3.2.12 on 2022-02-16 19:28

import django.contrib.postgres.fields
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('api', '0044_singular_category'),
]

operations = [
migrations.CreateModel(
name='AudioAddOn',
fields=[
('created_on', models.DateTimeField(auto_now_add=True)),
('updated_on', models.DateTimeField(auto_now=True)),
('audio_identifier', models.UUIDField(help_text='The identifier of the audio object.', primary_key=True, serialize=False)),
('waveform_peaks', django.contrib.postgres.fields.ArrayField(base_field=models.FloatField(), help_text='The waveform peaks. A list of floats in the range of 0 -> 1 inclusively.', null=True, size=1500)),
],
options={
'abstract': False,
},
),
]
46 changes: 46 additions & 0 deletions api/catalog/api/models/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
AbstractMediaReport,
)
from catalog.api.models.mixins import FileMixin, ForeignIdentifierMixin, MediaMixin
from catalog.api.utils.waveform import generate_peaks
from django.contrib.postgres.fields import ArrayField
from django.db import models
from uuslug import uuslug
Expand Down Expand Up @@ -96,6 +97,40 @@ class Meta:
abstract = True


class AudioAddOn(OpenLedgerModel):
audio_identifier = models.UUIDField(
primary_key=True,
help_text=("The identifier of the audio object."),
)
"""
This cannot be a "ForeignKey" or "OneToOneRel" because the refresh process
wipes out the Audio table completely and recreates it. If we made these a FK
or OneToOneRel there'd be foreign key constraint added that would be violated
when the Audio table is recreated.
The index is necessary as this column is used by the Audio object to query
for the relevant add on.
The refresh process will also eventually include cleaning up any potentially
dangling audio_add_on rows.
"""

waveform_peaks = ArrayField(
base_field=models.FloatField(),
# The approximate resolution of waveform generation
# results in _about_ 1000 peaks. We use 1500 to give
# sufficient wiggle room should we have any outlier
# files pop up.
# https://github.com/WordPress/openverse-api/blob/a7955c86d43bff504e8d41454f68717d79dd3a44/api/catalog/api/utils/waveform.py#L71
size=1500,
help_text=(
"The waveform peaks. A list of floats in"
" the range of 0 -> 1 inclusively."
),
null=True,
)


class Audio(AudioFileMixin, AbstractMedia):
"""
Inherited fields
Expand Down Expand Up @@ -157,6 +192,17 @@ def audio_set(self):
except AudioSet.DoesNotExist:
return None

def get_or_create_waveform(self):
add_on, _ = AudioAddOn.objects.get_or_create(audio_identifier=self.identifier)

if add_on.waveform_peaks is not None:
return add_on.waveform_peaks

add_on.waveform_peaks = generate_peaks(self)
add_on.save()

return add_on.waveform_peaks

class Meta(AbstractMedia.Meta):
db_table = "audio"

Expand Down
12 changes: 12 additions & 0 deletions api/catalog/api/utils/waveform.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pathlib
import shutil
import subprocess
from typing import List

import requests

Expand Down Expand Up @@ -128,3 +129,14 @@ def cleanup(file_name):
os.remove(file_path)
else:
log.info("File not found, nothing deleted")


def generate_peaks(audio) -> List[float]:
file_name = None
try:
file_name = download_audio(audio.url, audio.identifier)
awf_out = generate_waveform(file_name, audio.duration)
return process_waveform_output(awf_out)
finally:
if file_name is not None:
cleanup(file_name)
16 changes: 1 addition & 15 deletions api/catalog/api/views/audio_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,6 @@
)
from catalog.api.utils.exceptions import get_api_exception
from catalog.api.utils.throttle import OneThousandPerMinute
from catalog.api.utils.waveform import (
cleanup,
download_audio,
generate_waveform,
process_waveform_output,
)
from catalog.api.views.media_views import MediaViewSet
from django.utils.decorators import method_decorator
from drf_yasg.utils import swagger_auto_schema
Expand Down Expand Up @@ -84,21 +78,13 @@ def thumbnail(self, request, *_, **__):
def waveform(self, *_, **__):
audio = self.get_object()

file_name = None
try:
file_name = download_audio(audio.url, audio.identifier)
awf_out = generate_waveform(file_name, audio.duration)
data = process_waveform_output(awf_out)

obj = {"points": data}
obj = {"points": audio.get_or_create_waveform()}
serializer = self.get_serializer(obj)

return Response(status=200, data=serializer.data)
except Exception as e:
raise get_api_exception(getattr(e, "message", str(e)))
finally:
if file_name is not None:
cleanup(file_name)

@action(detail=True, methods=["post"], serializer_class=AudioReportSerializer)
def report(self, *args, **kwargs):
Expand Down
Empty file added api/test/unit/__init__.py
Empty file.
Empty file.
41 changes: 41 additions & 0 deletions api/test/unit/models/audio_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import uuid
from unittest import mock

import pytest
from catalog.api.models.audio import Audio, AudioAddOn


@pytest.fixture
@pytest.mark.django_db
def audio_fixture():
audio = Audio(
identifier=uuid.uuid4(),
)

audio.save()

return audio


@pytest.mark.django_db
@mock.patch("catalog.api.models.audio.generate_peaks")
def test_audio_waveform_caches(generate_peaks_mock, audio_fixture):
mock_waveform = [0.4, 0.3, 0.1, 0, 1, 0.6]
generate_peaks_mock.return_value = mock_waveform

assert AudioAddOn.objects.count() == 0
assert audio_fixture.get_or_create_waveform() == mock_waveform
assert AudioAddOn.objects.count() == 1
# Ensure the waveform was saved
assert (
AudioAddOn.objects.get(audio_identifier=audio_fixture.identifier).waveform_peaks
== mock_waveform
)
assert audio_fixture.get_or_create_waveform() == mock_waveform
# Should only be called once if Audio.get_or_create_waveform is using the DB value on subsequent calls
generate_peaks_mock.assert_called_once()

# Ensure there are no foreign constraints on the AudioAddOn that would cause failures during refresh
audio_fixture.delete()

assert AudioAddOn.objects.count() == 1
12 changes: 6 additions & 6 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ recreate:
@just up "--force-recreate --build"

# Show logs of all, or named, Docker services
logs services="":
docker-compose {{ DOCKER_FILE }} logs -f {{ services }}
logs services="" args="-f":
docker-compose {{ DOCKER_FILE }} logs {{ args }} {{ services }}


########
Expand Down Expand Up @@ -165,12 +165,12 @@ _api-install:
"Waiting for the API to be healthy..."

# Run API tests inside Docker
api-test args="": up wait-for-es wait-for-ing wait-for-web
docker-compose exec {{ args }} web ./test/run_test.sh
api-test docker_args="" tests="": up wait-for-es wait-for-ing wait-for-web
docker-compose exec {{ docker_args }} web ./test/run_test.sh {{ tests }}

# Run API tests locally
api-testlocal:
cd api && pipenv run ./test/run_test.sh
api-testlocal args="":
cd api && pipenv run ./test/run_test.sh {{ args }}

# Run Django administrative commands
dj args="":
Expand Down

0 comments on commit 24a9bf1

Please sign in to comment.