-
Notifications
You must be signed in to change notification settings - Fork 50
Cache waveform data in database #510
Changes from 4 commits
b1c4b67
f53f3e8
32765cc
049a083
6e0b589
9d471f5
7a0e29d
50006f7
1731209
8423ab8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Generated by Django 3.2.9 on 2022-02-07 14:05 | ||
|
||
import django.contrib.postgres.fields | ||
from django.db import migrations, models | ||
import django.db.models.deletion | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('api', '0044_singular_category'), | ||
] | ||
|
||
operations = [ | ||
migrations.CreateModel( | ||
name='AudioWaveformAddOn', | ||
fields=[ | ||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), | ||
('created_on', models.DateTimeField(auto_now_add=True)), | ||
('updated_on', models.DateTimeField(auto_now=True)), | ||
('peaks', django.contrib.postgres.fields.ArrayField(base_field=models.FloatField(), help_text='The waveform peaks. A list of floats in the range of 0 -> 1 inclusively.', size=1500)), | ||
('audio', models.OneToOneField(help_text='The foreign key of the audio.', on_delete=django.db.models.deletion.CASCADE, related_name='waveform', to='api.audio')), | ||
], | ||
options={ | ||
'abstract': False, | ||
}, | ||
), | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,10 +5,12 @@ | |
AbstractDeletedMedia, | ||
AbstractMatureMedia, | ||
AbstractMedia, | ||
AbstractMediaAddOn, | ||
AbstractMediaList, | ||
AbstractMediaReport, | ||
) | ||
from catalog.api.models.mixins import FileMixin, ForeignIdentifierMixin, MediaMixin | ||
from catalog.api.utils.waveform import generate_peaks | ||
from django.contrib.postgres.fields import ArrayField | ||
from django.db import models | ||
from uuslug import uuslug | ||
|
@@ -157,6 +159,18 @@ def audio_set(self): | |
except AudioSet.DoesNotExist: | ||
return None | ||
|
||
def get_or_create_waveform(self): | ||
if hasattr(self, "waveform"): | ||
return self.waveform.peaks | ||
|
||
peaks = generate_peaks(self) | ||
|
||
self.waveform = AudioWaveformAddOn(audio=self, peaks=peaks) | ||
|
||
self.waveform.save() | ||
|
||
return self.waveform.peaks | ||
|
||
class Meta(AbstractMedia.Meta): | ||
db_table = "audio" | ||
|
||
|
@@ -209,3 +223,26 @@ class Meta: | |
def save(self, *args, **kwargs): | ||
self.slug = uuslug(self.title, instance=self) | ||
super(AudioList, self).save(*args, **kwargs) | ||
|
||
|
||
class AudioWaveformAddOn(AbstractMediaAddOn): | ||
audio = models.OneToOneField( | ||
to=Audio, | ||
on_delete=models.CASCADE, | ||
related_name="waveform", | ||
help_text=("The foreign key of the audio."), | ||
) | ||
|
||
peaks = ArrayField( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of the strict typing as an array of floats, we could possibly use a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought about using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To boost the performance, I was at some point considering compressing the JSON data of the waveform and storing it as a text field in the DB 😄. I'm not sure which of our choices is most performant, maybe @AetherUnbound has more knowledge in that area, but imo, we should aim for max performance even if the cost is absence of validation. |
||
base_field=models.FloatField(), | ||
# The approximate resolution of waveform generation | ||
# results in _about_ 1000 peaks. We use 1500 to give | ||
# sufficient wiggle room should we have any outlier | ||
# files pop up. | ||
# https://github.com/WordPress/openverse-api/blob/a7955c86d43bff504e8d41454f68717d79dd3a44/api/catalog/api/utils/waveform.py#L71 | ||
size=1500, | ||
help_text=( | ||
"The waveform peaks. A list of floats in" | ||
" the range of 0 -> 1 inclusively." | ||
), | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ | |
import pathlib | ||
import shutil | ||
import subprocess | ||
from typing import List | ||
|
||
import requests | ||
|
||
|
@@ -128,3 +129,14 @@ def cleanup(file_name): | |
os.remove(file_path) | ||
else: | ||
log.info("File not found, nothing deleted") | ||
|
||
|
||
def generate_peaks(audio) -> List[float]: | ||
dhruvkb marked this conversation as resolved.
Show resolved
Hide resolved
|
||
file_name = None | ||
try: | ||
file_name = download_audio(audio.url, audio.identifier) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is maybe something that can be done in a separate issue - There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @AetherUnbound wouldn't the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It does! I just mention |
||
awf_out = generate_waveform(file_name, audio.duration) | ||
return process_waveform_output(awf_out) | ||
finally: | ||
if file_name is not None: | ||
cleanup(file_name) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import uuid | ||
from unittest import mock | ||
|
||
import pytest | ||
from catalog.api.models.audio import Audio, AudioWaveformAddOn | ||
|
||
|
||
@pytest.fixture | ||
@pytest.mark.django_db | ||
def audio_fixture(): | ||
audio = Audio( | ||
identifier=uuid.uuid4(), | ||
) | ||
|
||
audio.save() | ||
|
||
return audio | ||
|
||
|
||
@pytest.mark.django_db | ||
@mock.patch("catalog.api.models.audio.generate_peaks") | ||
def test_audio_waveform_caches(generate_peaks_mock, audio_fixture): | ||
mock_peaks = [0.4, 0.3, 0.1, 0, 1, 0.6] | ||
generate_peaks_mock.return_value = mock_peaks | ||
|
||
assert not hasattr(audio_fixture, "waveform") | ||
assert audio_fixture.get_or_create_waveform() == mock_peaks | ||
assert hasattr(audio_fixture, "waveform") | ||
assert audio_fixture.waveform.peaks == mock_peaks | ||
assert audio_fixture.get_or_create_waveform() == mock_peaks | ||
# Should only be called once if Audio.get_or_create_waveform is using the DB value on subsequent calls | ||
generate_peaks_mock.assert_called_once() | ||
|
||
# Ensure the waveform addon was saved | ||
waveform = AudioWaveformAddOn.objects.get(audio=audio_fixture) | ||
assert waveform.peaks == mock_peaks |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍