From eadb83409accfd53ee3881f1d274164d693ec2e2 Mon Sep 17 00:00:00 2001
From: David Fischer <david@readthedocs.org>
Date: Fri, 29 Mar 2019 13:43:32 -0700
Subject: [PATCH 1/8] Store build artifacts from build servers proposal

---
 readthedocs/projects/tasks.py | 95 ++++++++++++++++++++---------------
 1 file changed, 54 insertions(+), 41 deletions(-)

diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py
index c2963139d31..3324b37785e 100644
--- a/readthedocs/projects/tasks.py
+++ b/readthedocs/projects/tasks.py
@@ -564,24 +564,33 @@ def run_build(self, docker, record):
                 # TODO the build object should have an idea of these states,
                 # extend the model to include an idea of these outcomes
                 outcomes = self.build_docs()
-                build_id = self.build.get('id')
             except vcs_support_utils.LockTimeout as e:
                 self.task.retry(exc=e, throw=False)
                 raise VersionLockedError
             except SoftTimeLimitExceeded:
                 raise BuildTimeoutError
-
-            # Finalize build and update web servers
-            if build_id:
-                self.update_app_instances(
-                    html=bool(outcomes['html']),
-                    search=bool(outcomes['search']),
-                    localmedia=bool(outcomes['localmedia']),
-                    pdf=bool(outcomes['pdf']),
-                    epub=bool(outcomes['epub']),
-                )
             else:
-                log.warning('No build ID, not syncing files')
+                build_id = self.build.get('id')
+                if build_id:
+                    # Store build artifacts to storage (local or cloud storage)
+                    self.store_build_artifacts(
+                        html=bool(outcomes['html']),
+                        search=bool(outcomes['search']),
+                        localmedia=bool(outcomes['localmedia']),
+                        pdf=bool(outcomes['pdf']),
+                        epub=bool(outcomes['epub']),
+                    )
+
+                    # Finalize build and update web servers
+                    self.update_app_instances(
+                        html=bool(outcomes['html']),
+                        search=bool(outcomes['search']),
+                        localmedia=bool(outcomes['localmedia']),
+                        pdf=bool(outcomes['pdf']),
+                        epub=bool(outcomes['epub']),
+                    )
+                else:
+                    log.warning('No build ID, not syncing files')
 
         if self.build_env.failed:
             self.send_notifications()
@@ -715,6 +724,39 @@ def save_build_config(self):
         })
         self.build['config'] = config
 
+    def store_build_artifacts(
+            self,
+            html=False,
+            localmedia=False,
+            search=False,
+            pdf=False,
+            epub=False,
+    ):
+        """
+        Save build artifacts to "storage" using Django's storage API
+
+        The storage could be local filesystem storage OR cloud blob storage
+        such as S3, Azure storage or Google Cloud Storage.
+
+        Remove build artifacts of types not included in this build.
+
+        :param html: whether to save HTML output
+        :param localmedia: whether to save localmedia (htmlzip) output
+        :param search: whether to save search artifacts
+        :param pdf: whether to save PDF output
+        :param epub: whether to save ePub output
+        :return:
+        """
+        if getattr(settings, 'WRITE_BUILD_MEDIA', False):
+            log.info(
+                LOG_TEMPLATE.format(
+                    project=self.version.project.slug,
+                    version=self.version.slug,
+                    msg='Write build artifacts to media storage',
+                ),
+            )
+            # TODO: This will be where we actually implement this
+
     def update_app_instances(
             self,
             html=False,
@@ -745,25 +787,6 @@ def update_app_instances(
 
         delete_unsynced_media = True
 
-        if getattr(storage, 'write_build_media', False):
-            # Handle the case where we want to upload some built assets to our storage
-            move_files.delay(
-                self.version.pk,
-                hostname,
-                self.config.doctype,
-                html=False,
-                search=False,
-                localmedia=localmedia,
-                pdf=pdf,
-                epub=epub,
-                delete_unsynced_media=delete_unsynced_media,
-            )
-            # Set variables so they don't get synced in the next broadcast step
-            localmedia = False
-            pdf = False
-            epub = False
-            delete_unsynced_media = False
-
         # Broadcast finalization steps to web application instances
         broadcast(
             type='app',
@@ -1017,16 +1040,6 @@ def move_files(
                 ),
             ])
 
-            if getattr(storage, 'write_build_media', False):
-                # Remove the media from remote storage if it exists
-                storage_path = version.project.get_storage_path(
-                    type_=media_type,
-                    version_slug=version.slug,
-                )
-                if storage.exists(storage_path):
-                    log.info('Removing %s from media storage', storage_path)
-                    storage.delete(storage_path)
-
     log.debug(
         LOG_TEMPLATE.format(
             project=version.project.slug,

From 9c1eae03e0c84888f9ef8301a90749e47b449f88 Mon Sep 17 00:00:00 2001
From: David Fischer <david@readthedocs.org>
Date: Sat, 6 Apr 2019 00:28:00 -0700
Subject: [PATCH 2/8] Optionally, write build artifacts to storage

---
 docs/settings.rst              | 10 +++++
 readthedocs/builds/storage.py  | 79 ++++++++++++++++++++++++++++++++++
 readthedocs/builds/syncers.py  | 56 ------------------------
 readthedocs/projects/models.py | 16 ++++---
 readthedocs/projects/tasks.py  | 73 ++++++++++++++++++++++++++++---
 readthedocs/settings/base.py   |  4 ++
 requirements/pip.txt           |  3 ++
 7 files changed, 174 insertions(+), 67 deletions(-)
 create mode 100644 readthedocs/builds/storage.py

diff --git a/docs/settings.rst b/docs/settings.rst
index 195fddecc24..20585519434 100644
--- a/docs/settings.rst
+++ b/docs/settings.rst
@@ -94,6 +94,16 @@ Default: :djangosetting:`ALLOW_ADMIN`
 Whether to include `django.contrib.admin` in the URL's.
 
 
+BUILD_MEDIA_STORAGE
+-------------------
+
+Default: ``None``
+
+Use this storage class to upload build artifacts to cloud storage (S3, Azure storage).
+This should be a dotted path to the relevant class (eg. ``'path.to.MyBuildMediaStorage'``).
+This class should mixin :class:`readthedocs.builds.storage.BuildMediaStorageMixin`.
+
+
 ELASTICSEARCH_DSL
 -----------------
 
diff --git a/readthedocs/builds/storage.py b/readthedocs/builds/storage.py
new file mode 100644
index 00000000000..f2bba277aa7
--- /dev/null
+++ b/readthedocs/builds/storage.py
@@ -0,0 +1,79 @@
+import logging
+from pathlib import Path
+
+from storages.utils import safe_join, get_available_overwrite_name
+
+
+log = logging.getLogger(__name__)
+
+
+class BuildMediaStorageMixin:
+
+    """
+    A mixin for Storage classes needed to write build artifacts
+
+    This adds and modifies some functionality to Django's File Storage API.
+    By default, classes mixing this in will now overwrite files by default instead
+    of finding an available name.
+    This adds functionality to copy and delete entire directories.
+
+    See: https://docs.djangoproject.com/en/1.11/ref/files/storage
+    """
+
+    @staticmethod
+    def _dirpath(path):
+        """It may just be Azure, but for listdir to work correctly, the path must end in '/'"""
+        path = str(path)
+        if not path.endswith('/'):
+            path += '/'
+
+        return path
+
+    def get_available_name(self, name, max_length=None):
+        """
+        Overrides Django's storage implementation to always return the passed name (overwrite)
+
+        By default, Django will not overwrite files even if the same name is specified.
+        This changes that functionality so that the default is to use the same name and overwrite
+        rather than modify the path to not clobber files.
+        """
+        return get_available_overwrite_name(name, max_length=max_length)
+
+    def delete_directory(self, path):
+        """
+        Delete all files under a certain path from storage
+
+        Many storage backends (S3, Azure storage) don't care about "directories".
+        The directory effectively doesn't exist if there are no files in it.
+        However, in these backends, there is no "rmdir" operation so you have to recursively
+        delete all files.
+
+        :param path: the path to the directory to remove
+        """
+        log.debug('Deleting directory %s from media storage', path)
+        folders, files = self.listdir(self._dirpath(path))
+        for folder_name in folders:
+            if folder_name:
+                # Recursively delete the subdirectory
+                self.delete_directory(safe_join(path, folder_name))
+        for filename in files:
+            if filename:
+                self.delete(safe_join(path, filename))
+
+    def copy_directory(self, source, destination):
+        """
+        Copy a directory recursively to storage
+
+        :param source: the source path on the local disk
+        :param destination: the destination path in storage
+        """
+        log.debug('Copying source directory %s to media storage at %s', source, destination)
+        source = Path(source)
+        for filepath in source.iterdir():
+            sub_destination = safe_join(destination, filepath.name)
+            if filepath.is_dir():
+                # Recursively copy the subdirectory
+                self.copy_directory(filepath, sub_destination)
+            elif filepath.is_file():
+                with filepath.open('rb') as fd:
+                    self.save(sub_destination, fd)
diff --git a/readthedocs/builds/syncers.py b/readthedocs/builds/syncers.py
index f0b20b16855..f0dbd8b01a8 100644
--- a/readthedocs/builds/syncers.py
+++ b/readthedocs/builds/syncers.py
@@ -169,62 +169,6 @@ def copy(cls, path, target, host, is_file=False, **kwargs):  # pylint: disable=a
             )
 
 
-class SelectiveStorageRemotePuller(RemotePuller):
-
-    """
-    Like RemotePuller but certain files are copied via Django's storage system.
-
-    If a file with extensions specified by ``extensions`` is copied, it will be copied to storage
-    and the original is removed.
-
-    See: https://docs.djangoproject.com/en/1.11/ref/settings/#std:setting-DEFAULT_FILE_STORAGE
-    """
-
-    extensions = ('.pdf', '.epub', '.zip')
-
-    @classmethod
-    def get_storage_path(cls, path):
-        """
-        Gets the path to the file within the storage engine.
-
-        For example, if the path was $MEDIA_ROOT/pdfs/latest.pdf
-         the storage_path is 'pdfs/latest.pdf'
-
-        :raises: SuspiciousFileOperation if the path isn't under settings.MEDIA_ROOT
-        """
-        path = os.path.normpath(path)
-        if not path.startswith(settings.MEDIA_ROOT):
-            raise SuspiciousFileOperation
-
-        path = path.replace(settings.MEDIA_ROOT, '').lstrip('/')
-        return path
-
-    @classmethod
-    def copy(cls, path, target, host, is_file=False, **kwargs):  # pylint: disable=arguments-differ
-        RemotePuller.copy(path, target, host, is_file, **kwargs)
-
-        if getattr(storage, 'write_build_media', False):
-            # This is a sanity check for the case where
-            # storage is backed by the local filesystem
-            # In that case, removing the original target file locally
-            # would remove the file from storage as well
-
-            if is_file and os.path.exists(target) and \
-                    any([target.lower().endswith(ext) for ext in cls.extensions]):
-                log.info('Selective Copy %s to media storage', target)
-
-                try:
-                    storage_path = cls.get_storage_path(target)
-
-                    if storage.exists(storage_path):
-                        storage.delete(storage_path)
-
-                    with open(target, 'rb') as fd:
-                        storage.save(storage_path, fd)
-                except Exception:
-                    log.exception('Storage access failed for file. Not failing build.')
-
-
 class Syncer(SettingsOverrideObject):
     _default_class = LocalSyncer
     _override_setting = 'FILE_SYNCER'
diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py
index fab466a58f5..c58f0a8e55f 100644
--- a/readthedocs/projects/models.py
+++ b/readthedocs/projects/models.py
@@ -510,23 +510,29 @@ def get_subproject_urls(self):
         return [(proj.child.slug, proj.child.get_docs_url())
                 for proj in self.subprojects.all()]
 
-    def get_storage_path(self, type_, version_slug=LATEST):
+    def get_storage_path(self, type_, version_slug=LATEST, include_file=True):
         """
         Get a path to a build artifact for use with Django's storage system.
 
         :param type_: Media content type, ie - 'pdf', 'htmlzip'
         :param version_slug: Project version slug for lookup
+        :param include_file: Include file name in return
         :return: the path to an item in storage
             (can be used with ``storage.url`` to get the URL)
         """
-        extension = type_.replace('htmlzip', 'zip')
-        return '{}/{}/{}/{}.{}'.format(
+        folder_path = '{}/{}/{}'.format(
             type_,
             self.slug,
             version_slug,
-            self.slug,
-            extension,
         )
+        if include_file:
+            extension = type_.replace('htmlzip', 'zip')
+            return '{}/{}.{}'.format(
+                folder_path,
+                self.slug,
+                extension,
+            )
+        return folder_path
 
     def get_production_media_path(self, type_, version_slug, include_file=True):
         """
diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py
index 3324b37785e..15bbdcb4963 100644
--- a/readthedocs/projects/tasks.py
+++ b/readthedocs/projects/tasks.py
@@ -82,7 +82,6 @@
 
 
 log = logging.getLogger(__name__)
-storage = get_storage_class()()
 
 
 class SyncRepositoryMixin:
@@ -738,24 +737,86 @@ def store_build_artifacts(
         The storage could be local filesystem storage OR cloud blob storage
         such as S3, Azure storage or Google Cloud Storage.
 
-        Remove build artifacts of types not included in this build.
+        Remove build artifacts of types not included in this build (PDF, ePub, zip only).
+
+        This looks very similar to `move_files` and is intended to replace it!
 
         :param html: whether to save HTML output
         :param localmedia: whether to save localmedia (htmlzip) output
         :param search: whether to save search artifacts
         :param pdf: whether to save PDF output
         :param epub: whether to save ePub output
-        :return:
         """
-        if getattr(settings, 'WRITE_BUILD_MEDIA', False):
+        if getattr(settings, 'BUILD_MEDIA_STORAGE', None):
             log.info(
                 LOG_TEMPLATE.format(
                     project=self.version.project.slug,
                     version=self.version.slug,
-                    msg='Write build artifacts to media storage',
+                    msg='Writing build artifacts to media storage',
                 ),
             )
-            # TODO: This will be where we actually implement this
+
+            storage = get_storage_class(settings.BUILD_MEDIA_STORAGE)()
+
+            types_to_copy = []
+            types_to_delete = []
+
+            # HTML media
+            if html:
+                types_to_copy.append(('html', self.config.doctype))
+
+            # Search media (JSON)
+            if search:
+                types_to_copy.append(('json', 'sphinx_search'))
+
+            if localmedia:
+                types_to_copy.append(('htmlzip', 'sphinx_localmedia'))
+            else:
+                types_to_delete.append('htmlzip')
+
+            if pdf:
+                types_to_copy.append(('pdf', 'sphinx_pdf'))
+            else:
+                types_to_delete.append('pdf')
+
+            if epub:
+                types_to_copy.append(('epub', 'sphinx_epub'))
+            else:
+                types_to_delete.append('epub')
+
+            for media_type, build_type in types_to_copy:
+                from_path = self.version.project.artifact_path(
+                    version=self.version.slug,
+                    type_=build_type,
+                )
+                to_path = self.version.project.get_storage_path(
+                    type_=media_type,
+                    version_slug=self.version.slug,
+                    include_file=False,
+                )
+                log.info(
+                    LOG_TEMPLATE.format(
+                        project=self.version.project.slug,
+                        version=self.version.slug,
+                        msg=f'Writing {media_type} to media storage - {to_path}',
+                    ),
+                )
+                storage.copy_directory(from_path, to_path)
+
+            for media_type in types_to_delete:
+                media_path = self.version.project.get_storage_path(
+                    type_=media_type,
+                    version_slug=self.version.slug,
+                    include_file=False,
+                )
+                log.info(
+                    LOG_TEMPLATE.format(
+                        project=self.version.project.slug,
+                        version=self.version.slug,
+                        msg=f'Deleting {media_type} from media storage - {media_path}',
+                    ),
+                )
+                storage.delete_directory(media_path)
 
     def update_app_instances(
             self,
diff --git a/readthedocs/settings/base.py b/readthedocs/settings/base.py
index 3cf62523d59..5aab87ec8ee 100644
--- a/readthedocs/settings/base.py
+++ b/readthedocs/settings/base.py
@@ -180,6 +180,10 @@ def USE_PROMOS(self):  # noqa
         'django.contrib.staticfiles.finders.AppDirectoriesFinder',
     ]
 
+    # Optional storage class to use to upload build artifacts to cloud storage
+    # https://docs.readthedocs.io/en/stable/settings.html#build-media-storage
+    BUILD_MEDIA_STORAGE = None
+
     TEMPLATES = [
         {
             'BACKEND': 'django.template.backends.django.DjangoTemplates',
diff --git a/requirements/pip.txt b/requirements/pip.txt
index 01491c58b63..3b504a62e9b 100644
--- a/requirements/pip.txt
+++ b/requirements/pip.txt
@@ -97,5 +97,8 @@ django-cors-middleware==1.3.1
 # User agent parsing - used for analytics purposes
 user-agents<1.2.0
 
+# Utilities used to upload build media to cloud storage
+django-storages>=1.7,<1.8
+
 # Required only in development and linting
 django-debug-toolbar==1.11

From 535f31540fea36a321d148cfee302b34e90adfc9 Mon Sep 17 00:00:00 2001
From: David Fischer <david@readthedocs.org>
Date: Sat, 6 Apr 2019 11:11:19 -0700
Subject: [PATCH 3/8] Add a file system storage subclass

- Useful for writing build artifacts under MEDIA_ROOT in dev
---
 .gitignore                    |  1 +
 readthedocs/builds/storage.py | 28 +++++++++++++++++++++++++++-
 readthedocs/settings/base.py  |  2 +-
 3 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 92094448373..dbb0fa606cb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,6 +28,7 @@ logs/*
 media/dash
 media/epub
 media/export
+media/html
 media/htmlzip
 media/json
 media/man
diff --git a/readthedocs/builds/storage.py b/readthedocs/builds/storage.py
index f2bba277aa7..f79406428a9 100644
--- a/readthedocs/builds/storage.py
+++ b/readthedocs/builds/storage.py
@@ -1,6 +1,7 @@
 import logging
 from pathlib import Path
 
+from django.core.files.storage import FileSystemStorage
 from storages.utils import safe_join, get_available_overwrite_name
 
 
@@ -15,7 +16,7 @@ class BuildMediaStorageMixin:
     This adds and modifies some functionality to Django's File Storage API.
     By default, classes mixing this in will now overwrite files by default instead
     of finding an available name.
-    This adds functionality to copy and delete entire directories.
+    This mixin also adds convenience methods to copy and delete entire directories.
 
     See: https://docs.djangoproject.com/en/1.11/ref/files/storage
     """
@@ -77,3 +78,28 @@ def copy_directory(self, source, destination):
             elif filepath.is_file():
                 with filepath.open('rb') as fd:
                     self.save(sub_destination, fd)
+
+
+class BuildMediaFileSystemStorage(BuildMediaStorageMixin, FileSystemStorage):
+
+    """Storage subclass that writes build artifacts under MEDIA_ROOT"""
+
+    def get_available_name(self, name, max_length=None):
+        """
+        A hack to overwrite by default with the FileSystemStorage
+
+        After upgrading to Django 2.2, this method can be removed
+        because subclasses can set OS_OPEN_FLAGS such that FileSystemStorage._save
+        will properly overwrite files.
+        See: https://github.com/django/django/pull/8476
+        """
+        name = super().get_available_name(name, max_length=max_length)
+        if self.exists(name):
+            self.delete(name)
+        return name
+
+    def listdir(self, path):
+        """Return empty lists for nonexistent directories (as cloud storages do)"""
+        if not self.exists(path):
+            return [], []
+        return super().listdir(path)
diff --git a/readthedocs/settings/base.py b/readthedocs/settings/base.py
index 5aab87ec8ee..0c89c9c73e1 100644
--- a/readthedocs/settings/base.py
+++ b/readthedocs/settings/base.py
@@ -180,7 +180,7 @@ def USE_PROMOS(self):  # noqa
         'django.contrib.staticfiles.finders.AppDirectoriesFinder',
     ]
 
-    # Optional storage class to use to upload build artifacts to cloud storage
+    # Optional Django Storage subclass used to write build artifacts to cloud or local storage
     # https://docs.readthedocs.io/en/stable/settings.html#build-media-storage
     BUILD_MEDIA_STORAGE = None
 

From b61ae5dbd416e0e7cf1498b77014b0c2ea4bbcab Mon Sep 17 00:00:00 2001
From: David Fischer <david@readthedocs.org>
Date: Wed, 17 Apr 2019 13:03:32 -0400
Subject: [PATCH 4/8] Add a null syncer that doesn't sync files

---
 readthedocs/builds/syncers.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/readthedocs/builds/syncers.py b/readthedocs/builds/syncers.py
index f0dbd8b01a8..eeebd67b47a 100644
--- a/readthedocs/builds/syncers.py
+++ b/readthedocs/builds/syncers.py
@@ -11,7 +11,6 @@
 import shutil
 
 from django.conf import settings
-from django.core.exceptions import SuspiciousFileOperation
 from django.core.files.storage import get_storage_class
 
 from readthedocs.core.utils import safe_makedirs
@@ -31,6 +30,15 @@ def copy(cls, path, target, is_file=False, **kwargs):
         raise NotImplementedError
 
 
+class NullSyncer:
+
+    """A syncer that doesn't actually do anything"""
+
+    @classmethod
+    def copy(cls, path, target, is_file=False, **kwargs):
+        pass  # noqa
+
+
 class LocalSyncer(BaseSyncer):
 
     @classmethod

From 7f2d14f62afd6a39fecdec2f54fb391345cc230d Mon Sep 17 00:00:00 2001
From: David Fischer <david@readthedocs.org>
Date: Mon, 29 Apr 2019 11:42:20 -0700
Subject: [PATCH 5/8] Settings tweaks

- Use the RTD_ prefix
- Assume that settings.RTD_BUILD_MEDIA_STORAGE is set (defined in base)
---
 docs/settings.rst             | 4 ++--
 readthedocs/projects/tasks.py | 4 ++--
 readthedocs/settings/base.py  | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/settings.rst b/docs/settings.rst
index 20585519434..2b10ee4d6d1 100644
--- a/docs/settings.rst
+++ b/docs/settings.rst
@@ -94,8 +94,8 @@ Default: :djangosetting:`ALLOW_ADMIN`
 Whether to include `django.contrib.admin` in the URL's.
 
 
-BUILD_MEDIA_STORAGE
--------------------
+RTD_BUILD_MEDIA_STORAGE
+-----------------------
 
 Default: ``None``
 
diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py
index 15bbdcb4963..3271fefaf20 100644
--- a/readthedocs/projects/tasks.py
+++ b/readthedocs/projects/tasks.py
@@ -747,7 +747,7 @@ def store_build_artifacts(
         :param pdf: whether to save PDF output
         :param epub: whether to save ePub output
         """
-        if getattr(settings, 'BUILD_MEDIA_STORAGE', None):
+        if settings.RTD_BUILD_MEDIA_STORAGE:
             log.info(
                 LOG_TEMPLATE.format(
                     project=self.version.project.slug,
@@ -756,7 +756,7 @@ def store_build_artifacts(
                 ),
             )
 
-            storage = get_storage_class(settings.BUILD_MEDIA_STORAGE)()
+            storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)()
 
             types_to_copy = []
             types_to_delete = []
diff --git a/readthedocs/settings/base.py b/readthedocs/settings/base.py
index 0c89c9c73e1..db9f67f2bd0 100644
--- a/readthedocs/settings/base.py
+++ b/readthedocs/settings/base.py
@@ -182,7 +182,7 @@ def USE_PROMOS(self):  # noqa
 
     # Optional Django Storage subclass used to write build artifacts to cloud or local storage
     # https://docs.readthedocs.io/en/stable/settings.html#build-media-storage
-    BUILD_MEDIA_STORAGE = None
+    RTD_BUILD_MEDIA_STORAGE = None
 
     TEMPLATES = [
         {

From 89164997ede7d0d9ec9ca971b66a376751b7fa4b Mon Sep 17 00:00:00 2001
From: David Fischer <david@readthedocs.org>
Date: Mon, 29 Apr 2019 16:39:49 -0700
Subject: [PATCH 6/8] Be more defensive when writing to storage

---
 readthedocs/projects/tasks.py | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py
index 3271fefaf20..61b6fc9c541 100644
--- a/readthedocs/projects/tasks.py
+++ b/readthedocs/projects/tasks.py
@@ -801,7 +801,18 @@ def store_build_artifacts(
                         msg=f'Writing {media_type} to media storage - {to_path}',
                     ),
                 )
-                storage.copy_directory(from_path, to_path)
+                try:
+                    storage.copy_directory(from_path, to_path)
+                except Exception:
+                    # Ideally this should just be an IOError
+                    # but some storage backends unfortunately throw other errors
+                    log.warning(
+                        LOG_TEMPLATE.format(
+                            project=self.version.project.slug,
+                            version=self.version.slug,
+                            msg=f'Error copying {from_path} to storage (not failing build)',
+                        ),
+                    )
 
             for media_type in types_to_delete:
                 media_path = self.version.project.get_storage_path(
@@ -816,7 +827,18 @@ def store_build_artifacts(
                         msg=f'Deleting {media_type} from media storage - {media_path}',
                     ),
                 )
-                storage.delete_directory(media_path)
+                try:
+                    storage.delete_directory(media_path)
+                except Exception:
+                    # Ideally this should just be an IOError
+                    # but some storage backends unfortunately throw other errors
+                    log.warning(
+                        LOG_TEMPLATE.format(
+                            project=self.version.project.slug,
+                            version=self.version.slug,
+                            msg=f'Error deleting {media_path} from storage (not failing build)',
+                        ),
+                    )
 
     def update_app_instances(
             self,

From ceb7de221099db67738c107b07f1036df3037a37 Mon Sep 17 00:00:00 2001
From: David Fischer <david@readthedocs.org>
Date: Wed, 1 May 2019 09:23:01 -0700
Subject: [PATCH 7/8] Updates based on feedback

---
 readthedocs/projects/tasks.py | 4 ++--
 requirements/pip.txt          | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py
index 61b6fc9c541..b1796ba1933 100644
--- a/readthedocs/projects/tasks.py
+++ b/readthedocs/projects/tasks.py
@@ -806,7 +806,7 @@ def store_build_artifacts(
                 except Exception:
                     # Ideally this should just be an IOError
                     # but some storage backends unfortunately throw other errors
-                    log.warning(
+                    log.exception(
                         LOG_TEMPLATE.format(
                             project=self.version.project.slug,
                             version=self.version.slug,
@@ -832,7 +832,7 @@ def store_build_artifacts(
                 except Exception:
                     # Ideally this should just be an IOError
                     # but some storage backends unfortunately throw other errors
-                    log.warning(
+                    log.exception(
                         LOG_TEMPLATE.format(
                             project=self.version.project.slug,
                             version=self.version.slug,
diff --git a/requirements/pip.txt b/requirements/pip.txt
index 3b504a62e9b..ae13e5acacd 100644
--- a/requirements/pip.txt
+++ b/requirements/pip.txt
@@ -98,7 +98,7 @@ django-cors-middleware==1.3.1
 user-agents<1.2.0
 
 # Utilities used to upload build media to cloud storage
-django-storages>=1.7,<1.8
+django-storages==1.7.1  # pyup: <1.8
 
 # Required only in development and linting
 django-debug-toolbar==1.11

From 83165b7b721d4959bf988d953280cc16495b80ac Mon Sep 17 00:00:00 2001
From: David Fischer <david@readthedocs.org>
Date: Thu, 9 May 2019 15:24:25 -0700
Subject: [PATCH 8/8] Fix linting based on new configs

---
 readthedocs/projects/tasks.py | 55 +++++++++++++++++++----------------
 1 file changed, 30 insertions(+), 25 deletions(-)

diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py
index 44614a81fe1..4291f1dbc05 100644
--- a/readthedocs/projects/tasks.py
+++ b/readthedocs/projects/tasks.py
@@ -752,11 +752,12 @@ def store_build_artifacts(
         """
         if settings.RTD_BUILD_MEDIA_STORAGE:
             log.info(
-                LOG_TEMPLATE.format(
-                    project=self.version.project.slug,
-                    version=self.version.slug,
-                    msg='Writing build artifacts to media storage',
-                ),
+                LOG_TEMPLATE,
+                {
+                    'project': self.version.project.slug,
+                    'version': self.version.slug,
+                    'msg': 'Writing build artifacts to media storage',
+                },
             )
 
             storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)()
@@ -798,11 +799,12 @@ def store_build_artifacts(
                     include_file=False,
                 )
                 log.info(
-                    LOG_TEMPLATE.format(
-                        project=self.version.project.slug,
-                        version=self.version.slug,
-                        msg=f'Writing {media_type} to media storage - {to_path}',
-                    ),
+                    LOG_TEMPLATE,
+                    {
+                        'project': self.version.project.slug,
+                        'version': self.version.slug,
+                        'msg': f'Writing {media_type} to media storage - {to_path}',
+                    },
                 )
                 try:
                     storage.copy_directory(from_path, to_path)
@@ -810,11 +812,12 @@ def store_build_artifacts(
                     # Ideally this should just be an IOError
                     # but some storage backends unfortunately throw other errors
                     log.exception(
-                        LOG_TEMPLATE.format(
-                            project=self.version.project.slug,
-                            version=self.version.slug,
-                            msg=f'Error copying {from_path} to storage (not failing build)',
-                        ),
+                        LOG_TEMPLATE,
+                        {
+                            'project': self.version.project.slug,
+                            'version': self.version.slug,
+                            'msg': f'Error copying {from_path} to storage (not failing build)',
+                        },
                     )
 
             for media_type in types_to_delete:
@@ -824,11 +827,12 @@ def store_build_artifacts(
                     include_file=False,
                 )
                 log.info(
-                    LOG_TEMPLATE.format(
-                        project=self.version.project.slug,
-                        version=self.version.slug,
-                        msg=f'Deleting {media_type} from media storage - {media_path}',
-                    ),
+                    LOG_TEMPLATE,
+                    {
+                        'project': self.version.project.slug,
+                        'version': self.version.slug,
+                        'msg': f'Deleting {media_type} from media storage - {media_path}',
+                    },
                 )
                 try:
                     storage.delete_directory(media_path)
@@ -836,11 +840,12 @@ def store_build_artifacts(
                     # Ideally this should just be an IOError
                     # but some storage backends unfortunately throw other errors
                     log.exception(
-                        LOG_TEMPLATE.format(
-                            project=self.version.project.slug,
-                            version=self.version.slug,
-                            msg=f'Error deleting {media_path} from storage (not failing build)',
-                        ),
+                        LOG_TEMPLATE,
+                        {
+                            'project': self.version.project.slug,
+                            'version': self.version.slug,
+                            'msg': f'Error deleting {media_path} from storage (not failing build)',
+                        },
                     )
 
     def update_app_instances(