Skip to content

Commit

Permalink
Add S3OptimizedUploadStorage (#128)
Browse files Browse the repository at this point in the history
  • Loading branch information
drakon authored Sep 8, 2021
1 parent 393647f commit 83d1d17
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 0 deletions.
20 changes: 20 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -195,3 +195,23 @@ uploaded to AWS S3 directly and not to your Django application server.
:target: https://codecov.io/gh/codingjoe/django-s3file
.. |GitHub license| image:: https://img.shields.io/badge/license-MIT-blue.svg
:target: https://raw.githubusercontent.com/codingjoe/django-s3file/master/LICENSE

Using optimized S3Boto3Storage
------------------------------

Since ``S3Boto3Storage`` supports storing data from any other fileobj,
it uses a generalized ``_save`` function. This leads to the frontend uploading
the file to S3 and then copying it byte-by-byte to perform a move operation just
to rename the uploaded object. For large files this leads to additional loading
times for the user.

That's why S3File provides an optimized version of this method at
``storages_optimized.S3OptimizedUploadStorage``. It uses the more efficient
``copy`` method from S3, given that we know that we only copy from one S3 location to another.

.. code:: python
from s3file.storages_optimized import S3OptimizedUploadStorage
class MyStorage(S3OptimizedUploadStorage): # Subclass and use like any other storage
default_acl = 'private'
43 changes: 43 additions & 0 deletions s3file/storages_optimized.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from storages.backends.s3boto3 import S3Boto3Storage


class S3OptimizedUploadStorage(S3Boto3Storage):
"""
Class for an optimized S3 storage.
This storage prevents unnecessary operation to copy with the general ``upload_fileobj``
command when the object already is a S3 object where the faster copy command can be used.
The assumption is that ``content`` contains a S3 object from which we can copy.
See also discussion here: https://github.com/codingjoe/django-s3file/discussions/126
"""

def _save(self, name, content):
# Basically copy the implementation of _save of S3Boto3Storage
# and replace the obj.upload_fileobj with a copy function
cleaned_name = self._clean_name(name)
name = self._normalize_name(cleaned_name)
params = self._get_write_parameters(name, content)

if (
self.gzip
and params["ContentType"] in self.gzip_content_types
and "ContentEncoding" not in params
):
content = self._compress_content(content)
params["ContentEncoding"] = "gzip"

obj = self.bucket.Object(name)
# content.seek(0, os.SEEK_SET) # Disable unnecessary seek operation
# obj.upload_fileobj(content, ExtraArgs=params) # Disable upload function

if not hasattr(content, "obj") or not hasattr(content.obj, "key"):
raise TypeError(
"The content object must be a S3 object and contain a valid key."
)

# Copy the file instead uf uploading
obj.copy({"Bucket": self.bucket.name, "Key": content.obj.key}, ExtraArgs=params)

return cleaned_name
76 changes: 76 additions & 0 deletions tests/test_storages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import pytest
from django.core.files.base import ContentFile

from s3file.storages_optimized import S3OptimizedUploadStorage


class S3OptimizedMockStorage(S3OptimizedUploadStorage):
created_objects = {}

def _compress_content(self, content):
return content

class bucket:
name = "test-bucket"

class Object:
def __init__(self, key):
self.key = key
self.copy_from_bucket = None
self.copy_from_key = None
S3OptimizedMockStorage.created_objects[self.key] = self

def copy(self, s3_object, ExtraArgs):
self.copy_from_bucket = s3_object["Bucket"]
self.copy_from_key = s3_object["Key"]


class TestStorages:
url = "/__s3_mock__/"

def test_post__save_optimized(self):
storage = S3OptimizedMockStorage()
obj = storage.bucket.Object("tmp/s3file/s3_file.txt")

class Content:
def __init__(self, obj):
self.obj = obj

content = Content(obj)
key = storage._save("tmp/s3file/s3_file_copied.txt", content)
stored_object = storage.created_objects[
"custom/location/tmp/s3file/s3_file_copied.txt"
]

assert key == "tmp/s3file/s3_file_copied.txt"
assert stored_object.copy_from_bucket == storage.bucket.name
assert stored_object.copy_from_key == "tmp/s3file/s3_file.txt"

def test_post__save_optimized_gzip(self):
storage = S3OptimizedMockStorage()
obj = storage.bucket.Object("tmp/s3file/s3_file.css")
storage.gzip = True

class Content:
def __init__(self, obj):
self.obj = obj

content = Content(obj)
key = storage._save("tmp/s3file/s3_file_copied.css", content)
stored_object = storage.created_objects[
"custom/location/tmp/s3file/s3_file_copied.css"
]

assert key == "tmp/s3file/s3_file_copied.css"
assert stored_object.copy_from_bucket == storage.bucket.name
assert stored_object.copy_from_key == "tmp/s3file/s3_file.css"

def test_post__save_optimized_fail(self):
storage = S3OptimizedMockStorage()

with pytest.raises(TypeError) as excinfo:
storage._save("tmp/s3file/s3_file_copied.txt", ContentFile(b"s3file"))

assert "The content object must be a S3 object and contain a valid key." in str(
excinfo.value
)

0 comments on commit 83d1d17

Please sign in to comment.