From 4a1d2aad2316c3c15720c0eea6165f9cf0e11814 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Tue, 5 Oct 2021 18:15:27 +0400 Subject: [PATCH 1/5] Add file type and properties to the file classes --- openverse_api/catalog/api/models/media.py | 23 +++++++++++++++- openverse_api/catalog/api/models/mixins.py | 32 +++++++++++++++++++++- 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/openverse_api/catalog/api/models/media.py b/openverse_api/catalog/api/models/media.py index 5132817b7..7422e8e52 100644 --- a/openverse_api/catalog/api/models/media.py +++ b/openverse_api/catalog/api/models/media.py @@ -1,3 +1,5 @@ +import mimetypes + import catalog.api.controllers.search_controller as search_controller from catalog.api.licenses import ATTRIBUTION, get_license_url from catalog.api.models.base import OpenLedgerModel @@ -259,9 +261,28 @@ class AbstractAltFile: provides alternative qualities, formats and resolutions that are available from the provider that are not canonical. - The schema of the class must correspond to that of the ``FileMixin`` class. + The schema of the class must correspond to that of the + :py:class:`catalog.api.models.mixins.FileMixin` class. """ def __init__(self, attrs): self.url = attrs.get("url") self.filesize = attrs.get("filesize") + self.filetype = attrs.get("filetype") + + @property + def size_in_mib(self): # ~ MiB or mibibytes + return self.filesize / 2 ** 20 + + @property + def size_in_mb(self): # ~ MB or megabytes + return self.filesize / 1e6 + + @property + def mime_type(self): + """ + Get the MIME type of the file inferred from the extension of the file. + :return: the inferred MIME type of the file + """ + + return mimetypes.types_map[f".{self.filetype}"] diff --git a/openverse_api/catalog/api/models/mixins.py b/openverse_api/catalog/api/models/mixins.py index 85beb6e0b..898d2094e 100644 --- a/openverse_api/catalog/api/models/mixins.py +++ b/openverse_api/catalog/api/models/mixins.py @@ -1,3 +1,5 @@ +import mimetypes + from django.db import models @@ -64,7 +66,35 @@ class FileMixin(models.Model): url = models.URLField( unique=True, max_length=1000, help_text="The actual URL to the media file." ) - filesize = models.IntegerField(blank=True, null=True) + filesize = models.IntegerField( + blank=True, + null=True, + help_text="Number in bytes, e.g. 1024.", + # Bytes for parity with the HTTP Content-Length header + ) + filetype = models.CharField( + max_length=80, + blank=True, + null=True, + help_text="The type of the file, related to the file extension.", + ) + + @property + def size_in_mib(self): # ~ MiB or mibibytes + return self.filesize / 2 ** 20 + + @property + def size_in_mbs(self): # ~ MB or megabytes + return self.filesize / 1e6 + + @property + def mime_type(self): + """ + Get the MIME type of the file inferred from the extension of the file. + :return: the inferred MIME type of the file + """ + + return mimetypes.types_map[f".{self.filetype}"] class Meta: abstract = True From d5c5827508d290cc090592315cd645be0ba977f3 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Tue, 5 Oct 2021 18:15:57 +0400 Subject: [PATCH 2/5] Remove direct inheritance of `FileMixin` in media --- openverse_api/catalog/api/models/media.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openverse_api/catalog/api/models/media.py b/openverse_api/catalog/api/models/media.py index 7422e8e52..415abbb46 100644 --- a/openverse_api/catalog/api/models/media.py +++ b/openverse_api/catalog/api/models/media.py @@ -3,7 +3,7 @@ import catalog.api.controllers.search_controller as search_controller from catalog.api.licenses import ATTRIBUTION, get_license_url from catalog.api.models.base import OpenLedgerModel -from catalog.api.models.mixins import FileMixin, IdentifierMixin, MediaMixin +from catalog.api.models.mixins import IdentifierMixin, MediaMixin from django.contrib.postgres.fields import ArrayField from django.db import models from django.utils.html import format_html @@ -19,7 +19,7 @@ OTHER = "other" -class AbstractMedia(IdentifierMixin, MediaMixin, FileMixin, OpenLedgerModel): +class AbstractMedia(IdentifierMixin, MediaMixin, OpenLedgerModel): """ Generic model from which to inherit all media classes. This class stores information common to all media types indexed by Openverse. From b37dde342404a22f891ec3e5389bdd7bfa488cfb Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Tue, 5 Oct 2021 18:33:09 +0400 Subject: [PATCH 3/5] Extend `FileMixin` for media-specific file information --- openverse_api/catalog/api/models/audio.py | 49 ++++++++++++++--------- openverse_api/catalog/api/models/image.py | 19 ++++++++- 2 files changed, 48 insertions(+), 20 deletions(-) diff --git a/openverse_api/catalog/api/models/audio.py b/openverse_api/catalog/api/models/audio.py index 3b67602e2..b453f4fed 100644 --- a/openverse_api/catalog/api/models/audio.py +++ b/openverse_api/catalog/api/models/audio.py @@ -49,7 +49,36 @@ class AudioSet(IdentifierMixin, MediaMixin, FileMixin, OpenLedgerModel): pass -class Audio(AbstractMedia): +class AudioFileMixin(FileMixin): + """ + This mixin adds fields related to audio quality to the standard file mixin. + Do not use this as the sole base class. + """ + + bit_rate = models.IntegerField( + blank=True, + null=True, + help_text="Number in bits per second, eg. 128000.", + ) + sample_rate = models.IntegerField( + blank=True, + null=True, + help_text="Number in hertz, eg. 44100.", + ) + + @property + def sample_rate_in_khz(self): + return self.sample_rate / 1e3 + + @property + def bit_rate_in_kbps(self): + return self.bit_rate / 1e3 + + class Meta: + abstract = True + + +class Audio(AudioFileMixin, AbstractMedia): audio_set = models.ForeignKey( help_text="Reference to set of which this track is a part.", to=AudioSet, @@ -85,16 +114,6 @@ class Audio(AbstractMedia): null=True, help_text="The time length of the audio file in milliseconds.", ) - bit_rate = models.IntegerField( - blank=True, - null=True, - help_text="Number in bits per second, eg. 128000.", - ) - sample_rate = models.IntegerField( - blank=True, - null=True, - help_text="Number in hertz, eg. 44100.", - ) alt_files = models.JSONField( blank=True, @@ -112,14 +131,6 @@ def alternative_files(self): def duration_in_s(self): return self.duration / 1e3 - @property - def sample_rate_in_khz(self): - return self.sample_rate / 1e3 - - @property - def bit_rate_in_kbps(self): - return self.bit_rate / 1e3 - class Meta(AbstractMedia.Meta): db_table = "audio" diff --git a/openverse_api/catalog/api/models/image.py b/openverse_api/catalog/api/models/image.py index 47f612aca..291c157ed 100644 --- a/openverse_api/catalog/api/models/image.py +++ b/openverse_api/catalog/api/models/image.py @@ -6,14 +6,31 @@ AbstractMediaList, AbstractMediaReport, ) +from catalog.api.models.mixins import FileMixin from django.db import models from uuslug import uuslug -class Image(AbstractMedia): +class ImageFileMixin(FileMixin): + """ + This mixin adds fields related to image resolution to the standard file + mixin. Do not use this as the sole base class. + """ + width = models.IntegerField(blank=True, null=True) height = models.IntegerField(blank=True, null=True) + @property + def resolution_in_mp(self): # ~ MP or megapixels + return (self.width * self.height) / 1e6 + + class Meta: + abstract = True + + +class Image(ImageFileMixin, AbstractMedia): + pass + class Meta(AbstractMedia.Meta): db_table = "image" From 114d4ea950109085de1c14d7c3e473cb5a5706ea Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Tue, 5 Oct 2021 18:34:07 +0400 Subject: [PATCH 4/5] Make a migration for the changes --- .../catalog/api/migrations/0038_filetypes.py | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 openverse_api/catalog/api/migrations/0038_filetypes.py diff --git a/openverse_api/catalog/api/migrations/0038_filetypes.py b/openverse_api/catalog/api/migrations/0038_filetypes.py new file mode 100644 index 000000000..9235bd948 --- /dev/null +++ b/openverse_api/catalog/api/migrations/0038_filetypes.py @@ -0,0 +1,43 @@ +# Generated by Django 3.2.7 on 2021-10-05 14:29 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('api', '0037_media_thumbnails'), + ] + + operations = [ + migrations.AddField( + model_name='audio', + name='filetype', + field=models.CharField(blank=True, help_text='The type of the file, related to the file extension.', max_length=80, null=True), + ), + migrations.AddField( + model_name='audioset', + name='filetype', + field=models.CharField(blank=True, help_text='The type of the file, related to the file extension.', max_length=80, null=True), + ), + migrations.AddField( + model_name='image', + name='filetype', + field=models.CharField(blank=True, help_text='The type of the file, related to the file extension.', max_length=80, null=True), + ), + migrations.AlterField( + model_name='audio', + name='filesize', + field=models.IntegerField(blank=True, help_text='Number in bytes, e.g. 1024.', null=True), + ), + migrations.AlterField( + model_name='audioset', + name='filesize', + field=models.IntegerField(blank=True, help_text='Number in bytes, e.g. 1024.', null=True), + ), + migrations.AlterField( + model_name='image', + name='filesize', + field=models.IntegerField(blank=True, help_text='Number in bytes, e.g. 1024.', null=True), + ), + ] From 6cd22b135f44b51cd22efd00bd6929edf609bb34 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Tue, 5 Oct 2021 18:48:44 +0400 Subject: [PATCH 5/5] Remove addition of `filetype` column --- load_sample_data.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/load_sample_data.sh b/load_sample_data.sh index dcd19e7d4..ab2a36d89 100755 --- a/load_sample_data.sh +++ b/load_sample_data.sh @@ -57,7 +57,7 @@ docker-compose exec -T "$UPSTREAM_DB_SERVICE_NAME" /bin/bash -c "psql -U deploy docker-compose exec -T "$UPSTREAM_DB_SERVICE_NAME" /bin/bash -c "PGPASSWORD=deploy pg_dump -s -t audio -U deploy -d openledger -h db | head -n -14 | psql -U deploy -d openledger" docker-compose exec -T "$UPSTREAM_DB_SERVICE_NAME" /bin/bash -c "psql -U deploy -d openledger <<-EOF ALTER TABLE audio RENAME TO audio_view; - ALTER TABLE audio_view ADD COLUMN standardized_popularity double precision, ADD COLUMN ingestion_type varchar(1000), ADD column filetype varchar(80), ADD COLUMN audio_set jsonb; + ALTER TABLE audio_view ADD COLUMN standardized_popularity double precision, ADD COLUMN ingestion_type varchar(1000), ADD COLUMN audio_set jsonb; \copy audio_view (identifier,created_on,updated_on,ingestion_type,provider,source,foreign_identifier,foreign_landing_url,url,thumbnail,filetype,duration,bit_rate,sample_rate,category,genres,audio_set,audio_set_position,alt_files,filesize,license,license_version,creator,creator_url,title,meta_data,tags,watermarked,last_synced_with_source,removed_from_source,standardized_popularity) from './sample_data/sample_audio_data.csv' with (FORMAT csv, HEADER true) EOF"