-
Notifications
You must be signed in to change notification settings - Fork 50
Cache waveform data in database #510
Changes from all commits
b1c4b67
f53f3e8
32765cc
049a083
6e0b589
9d471f5
7a0e29d
50006f7
1731209
8423ab8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# Generated by Django 3.2.12 on 2022-02-16 19:28 | ||
|
||
import django.contrib.postgres.fields | ||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('api', '0044_singular_category'), | ||
] | ||
|
||
operations = [ | ||
migrations.CreateModel( | ||
name='AudioAddOn', | ||
fields=[ | ||
('created_on', models.DateTimeField(auto_now_add=True)), | ||
('updated_on', models.DateTimeField(auto_now=True)), | ||
('audio_identifier', models.UUIDField(help_text='The identifier of the audio object.', primary_key=True, serialize=False)), | ||
('waveform_peaks', django.contrib.postgres.fields.ArrayField(base_field=models.FloatField(), help_text='The waveform peaks. A list of floats in the range of 0 -> 1 inclusively.', null=True, size=1500)), | ||
], | ||
options={ | ||
'abstract': False, | ||
}, | ||
), | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ | |
AbstractMediaReport, | ||
) | ||
from catalog.api.models.mixins import FileMixin, ForeignIdentifierMixin, MediaMixin | ||
from catalog.api.utils.waveform import generate_peaks | ||
from django.contrib.postgres.fields import ArrayField | ||
from django.db import models | ||
from uuslug import uuslug | ||
|
@@ -96,6 +97,40 @@ class Meta: | |
abstract = True | ||
|
||
|
||
class AudioAddOn(OpenLedgerModel): | ||
audio_identifier = models.UUIDField( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this is the primary key of this model we should drop the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should also note that leaving it as it is wouldn't be wrong either, as it refers to the Audio model instance. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I think we should keep it as it's not the identifier for the add-on exactly... it just happens to act as that and we're throwing away the actual canonical PK for the add on table because we never use it anyway. |
||
primary_key=True, | ||
help_text=("The identifier of the audio object."), | ||
) | ||
""" | ||
This cannot be a "ForeignKey" or "OneToOneRel" because the refresh process | ||
wipes out the Audio table completely and recreates it. If we made these a FK | ||
or OneToOneRel there'd be foreign key constraint added that would be violated | ||
when the Audio table is recreated. | ||
|
||
The index is necessary as this column is used by the Audio object to query | ||
for the relevant add on. | ||
|
||
The refresh process will also eventually include cleaning up any potentially | ||
dangling audio_add_on rows. | ||
Comment on lines
+106
to
+115
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a great piece of documentation! |
||
""" | ||
|
||
waveform_peaks = ArrayField( | ||
base_field=models.FloatField(), | ||
# The approximate resolution of waveform generation | ||
# results in _about_ 1000 peaks. We use 1500 to give | ||
# sufficient wiggle room should we have any outlier | ||
# files pop up. | ||
# https://github.com/WordPress/openverse-api/blob/a7955c86d43bff504e8d41454f68717d79dd3a44/api/catalog/api/utils/waveform.py#L71 | ||
size=1500, | ||
help_text=( | ||
"The waveform peaks. A list of floats in" | ||
" the range of 0 -> 1 inclusively." | ||
), | ||
null=True, | ||
) | ||
|
||
|
||
class Audio(AudioFileMixin, AbstractMedia): | ||
""" | ||
Inherited fields | ||
|
@@ -157,6 +192,17 @@ def audio_set(self): | |
except AudioSet.DoesNotExist: | ||
return None | ||
|
||
def get_or_create_waveform(self): | ||
add_on, _ = AudioAddOn.objects.get_or_create(audio_identifier=self.identifier) | ||
|
||
if add_on.waveform_peaks is not None: | ||
return add_on.waveform_peaks | ||
|
||
add_on.waveform_peaks = generate_peaks(self) | ||
add_on.save() | ||
|
||
return add_on.waveform_peaks | ||
|
||
class Meta(AbstractMedia.Meta): | ||
db_table = "audio" | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ | |
import pathlib | ||
import shutil | ||
import subprocess | ||
from typing import List | ||
|
||
import requests | ||
|
||
|
@@ -128,3 +129,14 @@ def cleanup(file_name): | |
os.remove(file_path) | ||
else: | ||
log.info("File not found, nothing deleted") | ||
|
||
|
||
def generate_peaks(audio) -> List[float]: | ||
dhruvkb marked this conversation as resolved.
Show resolved
Hide resolved
|
||
file_name = None | ||
try: | ||
file_name = download_audio(audio.url, audio.identifier) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is maybe something that can be done in a separate issue - There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @AetherUnbound wouldn't the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It does! I just mention |
||
awf_out = generate_waveform(file_name, audio.duration) | ||
return process_waveform_output(awf_out) | ||
finally: | ||
if file_name is not None: | ||
cleanup(file_name) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
import uuid | ||
from unittest import mock | ||
|
||
import pytest | ||
from catalog.api.models.audio import Audio, AudioAddOn | ||
|
||
|
||
@pytest.fixture | ||
@pytest.mark.django_db | ||
def audio_fixture(): | ||
audio = Audio( | ||
identifier=uuid.uuid4(), | ||
) | ||
|
||
audio.save() | ||
|
||
return audio | ||
|
||
|
||
@pytest.mark.django_db | ||
@mock.patch("catalog.api.models.audio.generate_peaks") | ||
def test_audio_waveform_caches(generate_peaks_mock, audio_fixture): | ||
mock_waveform = [0.4, 0.3, 0.1, 0, 1, 0.6] | ||
generate_peaks_mock.return_value = mock_waveform | ||
|
||
assert AudioAddOn.objects.count() == 0 | ||
assert audio_fixture.get_or_create_waveform() == mock_waveform | ||
assert AudioAddOn.objects.count() == 1 | ||
# Ensure the waveform was saved | ||
assert ( | ||
AudioAddOn.objects.get(audio_identifier=audio_fixture.identifier).waveform_peaks | ||
== mock_waveform | ||
) | ||
assert audio_fixture.get_or_create_waveform() == mock_waveform | ||
# Should only be called once if Audio.get_or_create_waveform is using the DB value on subsequent calls | ||
generate_peaks_mock.assert_called_once() | ||
|
||
# Ensure there are no foreign constraints on the AudioAddOn that would cause failures during refresh | ||
audio_fixture.delete() | ||
|
||
assert AudioAddOn.objects.count() == 1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍