From 84bb9e3f66f8cf0e1ee20fd7bd6b138b772bb5f5 Mon Sep 17 00:00:00 2001 From: Oliver Roick Date: Wed, 20 Jul 2016 14:08:51 +0200 Subject: [PATCH] Improvements to file upload post processing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fixes #355 — Adds standard thumbnails for various file types - Adds `video/mp4` to the list of accepted mime types for file upload - Improves process to create thumbnails. The file is now only downloaded for post processing when the mime-type indicates an image. - Adds a migration to change the mime_type field maximum length to 100, to make sure that users can add Office2007+ documents. --- cadasta/party/tests/test_views_default.py | 6 +- cadasta/resources/forms.py | 5 +- cadasta/resources/managers.py | 4 - .../migrations/0002_auto_20160720_1259.py | 37 ++++++ cadasta/resources/models.py | 40 ++++++- cadasta/resources/tests/test_forms.py | 3 +- cadasta/resources/tests/test_managers.py | 3 +- cadasta/resources/tests/test_models.py | 105 +++++++++++++++++- cadasta/resources/tests/test_validators.py | 25 +---- cadasta/resources/tests/test_views_default.py | 6 +- cadasta/resources/validators.py | 13 +-- cadasta/spatial/tests/test_views_default.py | 3 +- cadasta/templates/resources/form.html | 1 + cadasta/templates/resources/scripts.html | 4 +- 14 files changed, 201 insertions(+), 54 deletions(-) create mode 100644 cadasta/resources/migrations/0002_auto_20160720_1259.py diff --git a/cadasta/party/tests/test_views_default.py b/cadasta/party/tests/test_views_default.py index f02c4be8e..d15dc84bb 100644 --- a/cadasta/party/tests/test_views_default.py +++ b/cadasta/party/tests/test_views_default.py @@ -552,7 +552,8 @@ def get_post_data(self): 'name': 'Some name', 'description': '', 'file': file_name, - 'original_file': 'image.png' + 'original_file': 'image.png', + 'mime_type': 'image/jpeg' } def test_get_with_authorized_user(self): @@ -990,7 +991,8 @@ def get_post_data(self): 'name': 'Some name', 'description': '', 'file': file_name, - 'original_file': 'image.png' + 'original_file': 'image.png', + 'mime_type': 'image/jpeg' } def test_get_with_authorized_user(self): diff --git a/cadasta/resources/forms.py b/cadasta/resources/forms.py index 588f434a9..d151af660 100644 --- a/cadasta/resources/forms.py +++ b/cadasta/resources/forms.py @@ -13,7 +13,7 @@ class ResourceForm(forms.ModelForm): class Meta: model = Resource - fields = ['file', 'original_file', 'name', 'description'] + fields = ['file', 'original_file', 'name', 'description', 'mime_type'] def __init__(self, data=None, content_object=None, contributor=None, project_id=None, *args, **kwargs): @@ -22,9 +22,6 @@ def __init__(self, data=None, content_object=None, contributor=None, self.contributor = contributor self.project_id = project_id - def clean(self): - pass - def save(self, *args, **kwargs): if self.instance.id: self.instance.contributor = self.contributor diff --git a/cadasta/resources/managers.py b/cadasta/resources/managers.py index 9f3f9d9e1..d8bf10892 100644 --- a/cadasta/resources/managers.py +++ b/cadasta/resources/managers.py @@ -1,4 +1,3 @@ -import magic from django.db import models, transaction from django.apps import apps @@ -7,9 +6,6 @@ class ResourceManager(models.Manager): def create(self, content_object=None, *args, **kwargs): with transaction.atomic(): resource = self.model(**kwargs) - file = resource.file.open().name - - resource.mime_type = magic.from_file(file, mime=True).decode() resource.save() if content_object: diff --git a/cadasta/resources/migrations/0002_auto_20160720_1259.py b/cadasta/resources/migrations/0002_auto_20160720_1259.py new file mode 100644 index 000000000..d5c071688 --- /dev/null +++ b/cadasta/resources/migrations/0002_auto_20160720_1259.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.9.6 on 2016-07-20 12:59 +from __future__ import unicode_literals + +import buckets.fields +from django.db import migrations, models +import resources.validators + + +class Migration(migrations.Migration): + + dependencies = [ + ('resources', '0001_initial'), + ] + + operations = [ + migrations.AlterField( + model_name='historicalresource', + name='file', + field=buckets.fields.S3FileField(upload_to='resources'), + ), + migrations.AlterField( + model_name='historicalresource', + name='mime_type', + field=models.CharField(max_length=100, validators=[resources.validators.validate_file_type]), + ), + migrations.AlterField( + model_name='resource', + name='file', + field=buckets.fields.S3FileField(upload_to='resources'), + ), + migrations.AlterField( + model_name='resource', + name='mime_type', + field=models.CharField(max_length=100, validators=[resources.validators.validate_file_type]), + ), + ] diff --git a/cadasta/resources/models.py b/cadasta/resources/models.py index 4c038028c..d2bf89df6 100644 --- a/cadasta/resources/models.py +++ b/cadasta/resources/models.py @@ -1,5 +1,4 @@ import os -import magic from datetime import datetime from django.db import models from django.contrib.contenttypes.fields import GenericForeignKey @@ -26,10 +25,11 @@ class Resource(RandomIDModel): name = models.CharField(max_length=200) description = models.TextField(null=True, blank=True) - file = S3FileField(upload_to='resources', validators=[validate_file_type]) + file = S3FileField(upload_to='resources') original_file = models.CharField(max_length=200) file_versions = JSONField(null=True, blank=True) - mime_type = models.CharField(max_length=50) + mime_type = models.CharField(max_length=100, + validators=[validate_file_type]) archived = models.BooleanField(default=False) last_updated = models.DateTimeField(auto_now=True) contributor = models.ForeignKey('accounts.User') @@ -82,13 +82,43 @@ def file_type(self): @property def thumbnail(self): + icon_url = ('https://s3-us-west-2.amazonaws.com/cadasta-platformprod' + '-bucket/icons/{}.png') + if not hasattr(self, '_thumbnail'): if 'image' in self.mime_type: ext = self.file_name.split('.')[-1] base_url = self.file.url[:self.file.url.rfind('.')] self._thumbnail = base_url + '-128x128.' + ext + + elif self.mime_type == 'application/pdf': + self._thumbnail = icon_url.format('pdf') + + elif self.mime_type in ['audio/mpeg3', 'audio/x-mpeg-3', + 'video/mpeg', 'video/x-mpeg']: + self._thumbnail = icon_url.format('mp3') + + elif self.mime_type == 'video/mp4': + self._thumbnail = icon_url.format('mp4') + + elif self.mime_type == 'application/msword': + self._thumbnail = icon_url.format('doc') + + elif self.mime_type == ('application/vnd.openxmlformats-office' + 'document.wordprocessingml.document'): + self._thumbnail = icon_url.format('docx') + + elif self.mime_type in ['application/msexcel', + 'application/vnd.ms-excel']: + self._thumbnail = icon_url.format('xls') + + elif self.mime_type == ('application/vnd.openxmlformats-' + 'officedocument.spreadsheetml.sheet'): + self._thumbnail = icon_url.format('xlsx') + else: self._thumbnail = '' + return self._thumbnail @property @@ -108,8 +138,7 @@ def archive_file(sender, instance, **kwargs): @receiver(models.signals.post_save, sender=Resource) def create_thumbnails(sender, instance, created, **kwargs): if created or instance._orginial_url != instance.file.url: - file = instance.file.open() - if 'image' in magic.from_file(file.name, mime=True).decode(): + if 'image' in instance.mime_type: io.ensure_dirs() file_name = instance.file.url.split('/')[-1] name = file_name[:file_name.rfind('.')] @@ -120,6 +149,7 @@ def create_thumbnails(sender, instance, created, **kwargs): size = 128, 128 + file = instance.file.open() thumb = thumbnail.make(file, size) thumb.save(write_path) if instance.file.field.upload_to: diff --git a/cadasta/resources/tests/test_forms.py b/cadasta/resources/tests/test_forms.py index 821b06d3b..0e35ca613 100644 --- a/cadasta/resources/tests/test_forms.py +++ b/cadasta/resources/tests/test_forms.py @@ -24,7 +24,8 @@ def setUp(self): 'name': 'Some name', 'description': '', 'file': file_name, - 'original_file': 'image.jpg' + 'original_file': 'image.jpg', + 'mime_type': 'image/jpeg' } self.project = ProjectFactory.create() diff --git a/cadasta/resources/tests/test_managers.py b/cadasta/resources/tests/test_managers.py index d85e08dba..fd0b8f337 100644 --- a/cadasta/resources/tests/test_managers.py +++ b/cadasta/resources/tests/test_managers.py @@ -27,7 +27,8 @@ def test_project_resource(self): file=file_name, content_object=project, contributor=user, - project=project) + project=project, + mime_type='image/jpeg') assert resource.name == 'Re' assert resource.content_objects.count() == 1 assert resource.mime_type == 'image/jpeg' diff --git a/cadasta/resources/tests/test_models.py b/cadasta/resources/tests/test_models.py index 8768fe74a..d9002ff34 100644 --- a/cadasta/resources/tests/test_models.py +++ b/cadasta/resources/tests/test_models.py @@ -22,7 +22,7 @@ def test_file_type_property(self): resource = Resource(file='http://example.com/dir/filename.txt') assert resource.file_type == 'txt' - def test_thumbnail(self): + def test_thumbnail_img(self): ensure_dirs(add='s3/uploads/resources') resource = ResourceFactory.build( file='http://example.com/dir/filename.jpg', @@ -31,11 +31,109 @@ def test_thumbnail(self): assert (resource.thumbnail == 'http://example.com/dir/filename-128x128.jpg') + def test_thumbnail_pdf(self): resource = ResourceFactory.build( file='http://example.com/dir/filename.pdf', mime_type='application/pdf' ) - assert (resource.thumbnail == '') + assert (resource.thumbnail == + 'https://s3-us-west-2.amazonaws.com/cadasta-platformprod-' + 'bucket/icons/pdf.png') + + def test_thumbnail_mp3(self): + resource = ResourceFactory.build( + file='http://example.com/dir/filename.mp3', + mime_type='audio/mpeg3' + ) + assert (resource.thumbnail == + 'https://s3-us-west-2.amazonaws.com/cadasta-platformprod-' + 'bucket/icons/mp3.png') + + resource = ResourceFactory.build( + file='http://example.com/dir/filename.mp3', + mime_type='audio/x-mpeg-3' + ) + assert (resource.thumbnail == + 'https://s3-us-west-2.amazonaws.com/cadasta-platformprod-' + 'bucket/icons/mp3.png') + + resource = ResourceFactory.build( + file='http://example.com/dir/filename.mp3', + mime_type='video/mpeg' + ) + assert (resource.thumbnail == + 'https://s3-us-west-2.amazonaws.com/cadasta-platformprod-' + 'bucket/icons/mp3.png') + + resource = ResourceFactory.build( + file='http://example.com/dir/filename.mp3', + mime_type='video/x-mpeg' + ) + assert (resource.thumbnail == + 'https://s3-us-west-2.amazonaws.com/cadasta-platformprod-' + 'bucket/icons/mp3.png') + + def test_thumbnail_mp4(self): + resource = ResourceFactory.build( + file='http://example.com/dir/filename.mp4', + mime_type='video/mp4' + ) + assert (resource.thumbnail == + 'https://s3-us-west-2.amazonaws.com/cadasta-platformprod-' + 'bucket/icons/mp4.png') + + def test_thumbnail_doc(self): + resource = ResourceFactory.build( + file='http://example.com/dir/filename.doc', + mime_type='application/msword' + ) + assert (resource.thumbnail == + 'https://s3-us-west-2.amazonaws.com/cadasta-platformprod-' + 'bucket/icons/doc.png') + + def test_thumbnail_docx(self): + resource = ResourceFactory.build( + file='http://example.com/dir/filename.doc', + mime_type=('application/vnd.openxmlformats-officedocument.' + 'wordprocessingml.document') + ) + assert (resource.thumbnail == + 'https://s3-us-west-2.amazonaws.com/cadasta-platformprod-' + 'bucket/icons/docx.png') + + def test_thumbnail_xls(self): + resource = ResourceFactory.build( + file='http://example.com/dir/filename.doc', + mime_type='application/msexcel' + ) + assert (resource.thumbnail == + 'https://s3-us-west-2.amazonaws.com/cadasta-platformprod-' + 'bucket/icons/xls.png') + + resource = ResourceFactory.build( + file='http://example.com/dir/filename.doc', + mime_type='application/vnd.ms-excel' + ) + assert (resource.thumbnail == + 'https://s3-us-west-2.amazonaws.com/cadasta-platformprod-' + 'bucket/icons/xls.png') + + def test_thumbnail_xlsx(self): + resource = ResourceFactory.build( + file='http://example.com/dir/filename.doc', + mime_type=('application/vnd.openxmlformats-officedocument' + '.spreadsheetml.sheet') + ) + assert (resource.thumbnail == + 'https://s3-us-west-2.amazonaws.com/cadasta-platformprod-' + 'bucket/icons/xlsx.png') + + def test_thumbnail_other(self): + resource = ResourceFactory.build( + file='http://example.com/dir/filename.pdf', + mime_type='application/other' + ) + assert resource.thumbnail == '' def test_num_entities(self): resource = ResourceFactory.create() @@ -63,7 +161,8 @@ def test_create_thumbnail(self): storage = FakeS3Storage() file = open(path + '/resources/tests/files/image.jpg', 'rb').read() file_name = storage.save('resources/thumb_test.jpg', file) - resource = ResourceFactory.build(file=file_name) + resource = ResourceFactory.build(file=file_name, + mime_type='image/jpeg') create_thumbnails(Resource, resource, True) assert os.path.isfile(os.path.join(settings.MEDIA_ROOT, diff --git a/cadasta/resources/tests/test_validators.py b/cadasta/resources/tests/test_validators.py index 2a9742410..bbc2060f5 100644 --- a/cadasta/resources/tests/test_validators.py +++ b/cadasta/resources/tests/test_validators.py @@ -1,39 +1,18 @@ import pytest -import os from django.test import TestCase from django.core.exceptions import ValidationError -from django.conf import settings -from buckets.fields import S3File, S3FileField -from buckets.test.utils import ensure_dirs -from buckets.test.storage import FakeS3Storage from ..validators import validate_file_type -path = os.path.dirname(settings.BASE_DIR) - class ValidateFileTypeTest(TestCase): def test_valid_file(self): - ensure_dirs() - storage = FakeS3Storage() - field = S3FileField(storage=storage) - file = open(path + '/resources/tests/files/image.jpg', 'rb').read() - file = storage.save('image.jpg', file) - s3_file = S3File(file, field=field) - try: - validate_file_type(s3_file) + validate_file_type('image/png') except ValidationError: pytest.fail('ValidationError raised unexpectedly') def test_invalid_file(self): - ensure_dirs() - storage = FakeS3Storage() - field = S3FileField(storage=storage) - file = open(path + '/resources/tests/files/text.txt', 'rb').read() - file = storage.save('text.txt', file) - s3_file = S3File(file, field=field) - with pytest.raises(ValidationError) as e: - validate_file_type(s3_file) + validate_file_type('text/plain') assert e.value.message == "Files of type text/plain are not accepted." diff --git a/cadasta/resources/tests/test_views_default.py b/cadasta/resources/tests/test_views_default.py index f1a1e2d01..4a067a83b 100644 --- a/cadasta/resources/tests/test_views_default.py +++ b/cadasta/resources/tests/test_views_default.py @@ -279,7 +279,8 @@ def _post(self, user=None, status=None, expected_redirect=None): 'name': 'Some name', 'description': '', 'file': file_name, - 'original_file': 'image.png' + 'original_file': 'image.png', + 'mime_type': 'image/jpeg' } if user is None: @@ -475,7 +476,8 @@ def _post(self, user=None, status=None, expected_redirect=None, get=None): 'name': 'Some name', 'description': '', 'file': file_name, - 'original_file': 'image.png' + 'original_file': 'image.png', + 'mime_type': 'image/jpeg' } setattr(self.request, 'method', 'POST') diff --git a/cadasta/resources/validators.py b/cadasta/resources/validators.py index 2036b58be..5ad786339 100644 --- a/cadasta/resources/validators.py +++ b/cadasta/resources/validators.py @@ -1,18 +1,17 @@ -import magic from django.core.exceptions import ValidationError from django.utils.translation import ugettext as _ ACCEPTED_TYPES = [ 'application/pdf', 'audio/mpeg3', 'audio/x-mpeg-3', 'video/mpeg', - 'video/x-mpeg', 'image/jpeg', 'image/png', 'image/gif', 'image/tiff', - 'application/msword', 'application/msexcel', 'application/vnd.ms-excel', + 'video/x-mpeg', 'video/mp4', 'image/jpeg', 'image/png', 'image/gif', + 'image/tiff', 'application/msword', 'application/msexcel', + 'application/vnd.ms-excel', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'] -def validate_file_type(file): - mime = magic.from_file(file.open().name, mime=True).decode() - if mime not in ACCEPTED_TYPES: +def validate_file_type(type): + if type not in ACCEPTED_TYPES: raise ValidationError( - _("Files of type {mime} are not accepted.").format(mime=mime) + _("Files of type {mime} are not accepted.").format(mime=type) ) diff --git a/cadasta/spatial/tests/test_views_default.py b/cadasta/spatial/tests/test_views_default.py index 1d2316f41..2284def26 100644 --- a/cadasta/spatial/tests/test_views_default.py +++ b/cadasta/spatial/tests/test_views_default.py @@ -597,7 +597,8 @@ def get_post_data(self): 'name': 'Some name', 'description': '', 'file': file_name, - 'original_file': 'image.png' + 'original_file': 'image.png', + 'mime_type': 'image/jpeg' } def test_get_with_authorized_user(self): diff --git a/cadasta/templates/resources/form.html b/cadasta/templates/resources/form.html index 899c935f0..03eb52af6 100644 --- a/cadasta/templates/resources/form.html +++ b/cadasta/templates/resources/form.html @@ -23,5 +23,6 @@