Skip to content
This repository has been archived by the owner on Feb 22, 2023. It is now read-only.

Cache waveform data in database #510

Merged
merged 10 commits into from
Feb 17, 2022
19 changes: 0 additions & 19 deletions api/catalog/api/migrations/0045_audio_waveform.py

This file was deleted.

27 changes: 27 additions & 0 deletions api/catalog/api/migrations/0045_audioaddon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Generated by Django 3.2.9 on 2022-02-14 23:22

import django.contrib.postgres.fields
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('api', '0044_singular_category'),
]

operations = [
migrations.CreateModel(
name='AudioAddOn',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created_on', models.DateTimeField(auto_now_add=True)),
('updated_on', models.DateTimeField(auto_now=True)),
('audio_identifier', models.UUIDField(db_index=True, help_text='The identifier of the audio object.', unique=True)),
('waveform_peaks', django.contrib.postgres.fields.ArrayField(base_field=models.FloatField(), blank=True, help_text='The waveform peaks. A list of floats in the range of 0 -> 1 inclusively.', null=True, size=1500)),
],
options={
'abstract': False,
},
),
]
68 changes: 47 additions & 21 deletions api/catalog/api/models/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,46 @@ class Meta:
abstract = True


class AudioAddOn(OpenLedgerModel):
audio_identifier = models.UUIDField(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is the primary key of this model we should drop the audio_ prefix.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should also note that leaving it as it is wouldn't be wrong either, as it refers to the Audio model instance.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I think we should keep it as it's not the identifier for the add-on exactly... it just happens to act as that and we're throwing away the actual canonical PK for the add on table because we never use it anyway.

db_index=True,
unique=True,
blank=False,
null=False,
sarayourfriend marked this conversation as resolved.
Show resolved Hide resolved
help_text=(
"The identifier of the audio object."
)
)
"""
This cannot be a "ForeignKey" or "OneToOneRel" because the refresh process
wipes out the Audio table completely and recreates it. If we made these a FK
or OneToOneRel there'd be foreign key constraint added that would be violated
when the Audio table is recreated.

The index is necessary as this column is used by the Audio object to query
for the relevant add on.

The refresh process will also eventually include cleaning up any potentially
dangling audio_add_on rows.
Comment on lines +106 to +115
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a great piece of documentation!

"""

waveform_peaks = ArrayField(
base_field=models.FloatField(),
# The approximate resolution of waveform generation
# results in _about_ 1000 peaks. We use 1500 to give
# sufficient wiggle room should we have any outlier
# files pop up.
# https://github.com/WordPress/openverse-api/blob/a7955c86d43bff504e8d41454f68717d79dd3a44/api/catalog/api/utils/waveform.py#L71
size=1500,
help_text=(
"The waveform peaks. A list of floats in"
" the range of 0 -> 1 inclusively."
),
blank=True,
null=True,
)


class Audio(AudioFileMixin, AbstractMedia):
"""
Inherited fields
Expand Down Expand Up @@ -158,31 +198,17 @@ def audio_set(self):
except AudioSet.DoesNotExist:
return None

waveform = ArrayField(
base_field=models.FloatField(),
# The approximate resolution of waveform generation
# results in _about_ 1000 peaks. We use 1500 to give
# sufficient wiggle room should we have any outlier
# files pop up.
# https://github.com/WordPress/openverse-api/blob/a7955c86d43bff504e8d41454f68717d79dd3a44/api/catalog/api/utils/waveform.py#L71
size=1500,
help_text=(
"The waveform peaks. A list of floats in"
" the range of 0 -> 1 inclusively."
),
blank=True,
null=True,
)

def get_or_create_waveform(self):
if self.waveform is not None:
return self.waveform
add_on, _ = AudioAddOn.objects.get_or_create(audio_identifier=self.identifier)

if add_on.waveform_peaks is not None:
return add_on.waveform_peaks

self.waveform = generate_peaks(self)
add_on.waveform_peaks = generate_peaks(self)

self.save()
add_on.save()

return self.waveform
return add_on.waveform_peaks

class Meta(AbstractMedia.Meta):
db_table = "audio"
Expand Down
16 changes: 9 additions & 7 deletions api/test/unit/models/audio_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from unittest import mock

import pytest
from catalog.api.models.audio import Audio
from catalog.api.models.audio import Audio, AudioAddOn


@pytest.fixture
Expand All @@ -23,14 +23,16 @@ def test_audio_waveform_caches(generate_peaks_mock, audio_fixture):
mock_waveform = [0.4, 0.3, 0.1, 0, 1, 0.6]
generate_peaks_mock.return_value = mock_waveform

assert audio_fixture.waveform is None
assert AudioAddOn.objects.count() == 0
assert audio_fixture.get_or_create_waveform() == mock_waveform
assert audio_fixture.waveform is not None
assert audio_fixture.waveform == mock_waveform
assert AudioAddOn.objects.count() == 1
# Ensure the waveform was saved
assert AudioAddOn.objects.get(audio_identifier=audio_fixture.identifier).waveform_peaks == mock_waveform
assert audio_fixture.get_or_create_waveform() == mock_waveform
# Should only be called once if Audio.get_or_create_waveform is using the DB value on subsequent calls
generate_peaks_mock.assert_called_once()

# Ensure the waveform was saved
saved_audio = Audio.objects.get(pk=audio_fixture.pk, waveform__isnull=False)
assert saved_audio == audio_fixture
# Ensure there are no foreign constraints on the AudioAddOn that would cause failures during refresh
audio_fixture.delete()

assert AudioAddOn.objects.count() == 1
4 changes: 2 additions & 2 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ recreate:
@just up "--force-recreate --build"

# Show logs of all, or named, Docker services
logs services="":
docker-compose {{ DOCKER_FILE }} logs -f {{ services }}
logs services="" args="-f":
sarayourfriend marked this conversation as resolved.
Show resolved Hide resolved
docker-compose {{ DOCKER_FILE }} logs {{ args }} {{ services }}


########
Expand Down