diff --git a/CHANGELOG.md b/CHANGELOG.md index e9a31368..9ef53ca8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,32 +34,38 @@ Using the following categories, list your changes in this order: ## [Unreleased](https://github.com/Archmonger/ServeStatic/compare/1.2.0...HEAD) +### Added + +- You can now utilize the Django manifest rather than scanning the filesystem when using `settings.py:SERVESTATIC_USE_MANIFEST`. + - When also using ServeStatic's `CompressedManifestStaticFilesStorage` backend, ServeStatic will no longer need to call `os.stat`. + ### Changed - Minimum python version is now 3.9. +- Django `setings.py:SERVESTATIC_USE_FINDERS` will now discover files strictly using the [finders API](https://docs.djangoproject.com/en/stable/ref/contrib/staticfiles/#finders-module). Previously, ServeStatic would also scan `settings.py:STATIC_ROOT` for files not found by the finders API. ## [1.2.0](https://github.com/Archmonger/ServeStatic/compare/1.1.0...1.2.0) - 2024-08-30 ### Added -- Verbose Django `404` error page when `settings.py:DEBUG` is `True` ([Upstream PR](https://github.com/evansd/whitenoise/pull/366)) +- Verbose Django `404` error page when `settings.py:DEBUG` is `True` ### Fixed -- Fix Django compatibility with third-party sync middleware - - ServeStatic Django middleware now only runs in async mode to avoid clashing with Django's internal usage of `asgiref.AsyncToSync` -- Respect Django `settings.py:FORCE_SCRIPT_NAME` configuration value ([Upstream PR](https://github.com/evansd/whitenoise/pull/486)) +- Fix Django compatibility with third-party sync middleware. + - ServeStatic Django middleware now only runs in async mode to avoid clashing with Django's internal usage of `asgiref.AsyncToSync`. +- Respect Django `settings.py:FORCE_SCRIPT_NAME` configuration value. ## [1.1.0](https://github.com/Archmonger/ServeStatic/compare/1.0.0...1.1.0) - 2024-08-27 ### Added -- Files are now compressed within a thread pool to increase performance ([Upstream PR](https://github.com/evansd/whitenoise/pull/484)) +- Files are now compressed within a thread pool to increase performance. ### Fixed -- Fix Django `StreamingHttpResponse must consume synchronous iterators` warning -- Fix Django bug where file paths could fail to be followed on Windows ([Upstream PR](https://github.com/evansd/whitenoise/pull/474)) +- Fix Django `StreamingHttpResponse must consume synchronous iterators` warning. +- Fix Django bug where file paths could fail to be followed on Windows. ## [1.0.0](https://github.com/Archmonger/ServeStatic/releases/tag/1.0.0) - 2024-05-08 diff --git a/docs/src/django-settings.md b/docs/src/django-settings.md index 3e707780..000d5424 100644 --- a/docs/src/django-settings.md +++ b/docs/src/django-settings.md @@ -22,11 +22,27 @@ Recheck the filesystem to see if any files have changed before responding. This --- +## `SERVESTATIC_USE_MANIFEST` + +**Default:** `not settings.py:DEBUG and isinstance(staticfiles_storage, ManifestStaticFilesStorage)` + +Find and serve files using Django's manifest file. + +This is the most efficient way to determine what files are available, but it requires that you are using a [manifest-compatible](https://docs.djangoproject.com/en/stable/ref/contrib/staticfiles/#manifeststaticfilesstorage) storage backend. + +When using ServeStatic's [`CompressedManifestStaticFilesStorage`](./django.md#step-2-add-compression-and-caching-support) storage backend, ServeStatic will no longer need to call `os.stat` on each file during startup which improves startup speeds. + +--- + ## `SERVESTATIC_USE_FINDERS` **Default:** `settings.py:DEBUG` -Instead of only picking up files collected into `STATIC_ROOT`, find and serve files in their original directories using Django's "finders" API. This is useful in development where it matches the behaviour of the old `runserver` command. It's also possible to use this setting in production, avoiding the need to run the `collectstatic` command during the build, so long as you do not wish to use any of the caching and compression features provided by the storage backends. +Find and serve files using Django's [`finders`](https://docs.djangoproject.com/en/stable/ref/contrib/staticfiles/#finders-module) API. + +It's possible to use this setting in production, but be mindful of the [`settings.py:STATICFILES_DIRS`](https://docs.djangoproject.com/en/stable/ref/settings/#staticfiles-dirs) and [`settings.py:STATICFILE_FINDERS`](https://docs.djangoproject.com/en/stable/ref/settings/#staticfiles-finders) settings. By default, the finders API only searches the `'static'` directory in each app, which are not the copies post-processed by ServeStatic. + +Note that `STATICFILES_DIRS` cannot equal `STATIC_ROOT` while running the `collectstatic` management command. --- diff --git a/src/servestatic/__init__.py b/src/servestatic/__init__.py index 67c41c73..713c1cd7 100644 --- a/src/servestatic/__init__.py +++ b/src/servestatic/__init__.py @@ -1,6 +1,6 @@ from __future__ import annotations -from .asgi import ServeStaticASGI -from .wsgi import ServeStatic +from servestatic.asgi import ServeStaticASGI +from servestatic.wsgi import ServeStatic __all__ = ["ServeStaticASGI", "ServeStatic"] diff --git a/src/servestatic/asgi.py b/src/servestatic/asgi.py index 38154162..daa0157f 100644 --- a/src/servestatic/asgi.py +++ b/src/servestatic/asgi.py @@ -5,8 +5,7 @@ from asgiref.compatibility import guarantee_single_callable from servestatic.base import BaseServeStatic - -from .utils import decode_path_info +from servestatic.utils import decode_path_info # This is the same size as wsgiref.FileWrapper BLOCK_SIZE = 8192 @@ -25,7 +24,6 @@ async def __call__(self, scope, receive, send): static_file = None if scope["type"] == "http": if self.autorefresh: - # Use a thread while searching disk for files on Python 3.9+ static_file = await asyncio.to_thread(self.find_file, path) else: static_file = self.files.get(path) diff --git a/src/servestatic/base.py b/src/servestatic/base.py index 03869e2e..3815817d 100644 --- a/src/servestatic/base.py +++ b/src/servestatic/base.py @@ -8,9 +8,14 @@ from typing import Callable from wsgiref.headers import Headers -from .media_types import MediaTypes -from .responders import IsDirectoryError, MissingFileError, Redirect, StaticFile -from .utils import ensure_leading_trailing_slash, scantree +from servestatic.media_types import MediaTypes +from servestatic.responders import ( + IsDirectoryError, + MissingFileError, + Redirect, + StaticFile, +) +from servestatic.utils import ensure_leading_trailing_slash, scantree class BaseServeStatic: @@ -71,15 +76,23 @@ def __init__( if root is not None: self.add_files(root, prefix) + def insert_directory(self, root, prefix): + # Exit early if the directory is already in the list + for existing_root, existing_prefix in self.directories: + if existing_root == root and existing_prefix == prefix: + return + + # Later calls to `add_files` overwrite earlier ones, hence we need + # to store the list of directories in reverse order so later ones + # match first when they're checked in "autorefresh" mode + self.directories.insert(0, (root, prefix)) + def add_files(self, root, prefix=None): root = os.path.abspath(root) root = root.rstrip(os.path.sep) + os.path.sep prefix = ensure_leading_trailing_slash(prefix) if self.autorefresh: - # Later calls to `add_files` overwrite earlier ones, hence we need - # to store the list of directories in reverse order so later ones - # match first when they're checked in "autorefresh" mode - self.directories.insert(0, (root, prefix)) + self.insert_directory(root, prefix) elif os.path.isdir(root): self.update_files_dictionary(root, prefix) else: diff --git a/src/servestatic/middleware.py b/src/servestatic/middleware.py index d47d2f69..4a5ba092 100644 --- a/src/servestatic/middleware.py +++ b/src/servestatic/middleware.py @@ -11,7 +11,10 @@ from asgiref.sync import iscoroutinefunction, markcoroutinefunction from django.conf import settings as django_settings from django.contrib.staticfiles import finders -from django.contrib.staticfiles.storage import staticfiles_storage +from django.contrib.staticfiles.storage import ( + ManifestStaticFilesStorage, + staticfiles_storage, +) from django.http import FileResponse from servestatic.responders import MissingFileError @@ -20,6 +23,7 @@ AsyncToSyncIterator, EmptyAsyncIterator, ensure_leading_trailing_slash, + stat_files, ) from servestatic.wsgi import ServeStatic @@ -35,7 +39,7 @@ class ServeStaticMiddleware(ServeStatic): async_capable = True sync_capable = False - def __init__(self, get_response, settings=django_settings): + def __init__(self, get_response=None, settings=django_settings): if not iscoroutinefunction(get_response): raise ValueError( "ServeStaticMiddleware requires an async compatible version of Django." @@ -43,8 +47,9 @@ def __init__(self, get_response, settings=django_settings): markcoroutinefunction(self) self.get_response = get_response - autorefresh = getattr(settings, "SERVESTATIC_AUTOREFRESH", settings.DEBUG) - max_age = getattr(settings, "SERVESTATIC_MAX_AGE", 0 if settings.DEBUG else 60) + debug = getattr(settings, "DEBUG") + autorefresh = getattr(settings, "SERVESTATIC_AUTOREFRESH", debug) + max_age = getattr(settings, "SERVESTATIC_MAX_AGE", 0 if debug else 60) allow_all_origins = getattr(settings, "SERVESTATIC_ALLOW_ALL_ORIGINS", True) charset = getattr(settings, "SERVESTATIC_CHARSET", "utf-8") mimetypes = getattr(settings, "SERVESTATIC_MIMETYPES", None) @@ -53,9 +58,17 @@ def __init__(self, get_response, settings=django_settings): ) self.index_file = getattr(settings, "SERVESTATIC_INDEX_FILE", None) immutable_file_test = getattr(settings, "SERVESTATIC_IMMUTABLE_FILE_TEST", None) - self.use_finders = getattr(settings, "SERVESTATIC_USE_FINDERS", settings.DEBUG) + self.use_finders = getattr(settings, "SERVESTATIC_USE_FINDERS", debug) + self.use_manifest = getattr( + settings, + "SERVESTATIC_USE_MANIFEST", + not debug and isinstance(staticfiles_storage, ManifestStaticFilesStorage), + ) self.static_prefix = getattr(settings, "SERVESTATIC_STATIC_PREFIX", None) self.static_root = getattr(settings, "STATIC_ROOT", None) + self.keep_only_hashed_files = getattr( + django_settings, "SERVESTATIC_KEEP_ONLY_HASHED_FILES", False + ) root = getattr(settings, "SERVESTATIC_ROOT", None) super().__init__( @@ -79,22 +92,25 @@ def __init__(self, get_response, settings=django_settings): self.static_prefix = ensure_leading_trailing_slash(self.static_prefix) if self.static_root: + self.insert_directory(self.static_root, self.static_prefix) + + if self.static_root and not self.use_manifest and not self.use_finders: self.add_files(self.static_root, prefix=self.static_prefix) - if root: - self.add_files(root) + if self.use_manifest: + self.add_files_from_manifest() - if self.use_finders and not self.autorefresh: + if self.use_finders: self.add_files_from_finders() + if root: + self.add_files(root) + async def __call__(self, request): """If the URL contains a static file, serve it. Otherwise, continue to the next middleware.""" - if self.autorefresh and hasattr(asyncio, "to_thread"): - # Use a thread while searching disk for files on Python 3.9+ + if self.autorefresh: static_file = await asyncio.to_thread(self.find_file, request.path_info) - elif self.autorefresh: - static_file = self.find_file(request.path_info) else: static_file = self.files.get(request.path_info) if static_file is not None: @@ -145,10 +161,49 @@ def add_files_from_finders(self): ) # Use setdefault as only first matching file should be used files.setdefault(url, storage.path(path)) - stat_cache = {path: os.stat(path) for path in files.values()} + self.insert_directory(storage.location, self.static_prefix) + + stat_cache = stat_files(files.values()) for url, path in files.items(): self.add_file_to_dictionary(url, path, stat_cache=stat_cache) + def add_files_from_manifest(self): + if not isinstance(staticfiles_storage, ManifestStaticFilesStorage): + raise ValueError( + "SERVESTATIC_USE_MANIFEST is set to True but " + "staticfiles storage is not using a manifest." + ) + staticfiles: dict = staticfiles_storage.hashed_files + stat_cache = None + + # Fetch stats from manifest if using ServeStatic's manifest storage + if hasattr(staticfiles_storage, "load_manifest_stats"): + manifest_stats: dict = staticfiles_storage.load_manifest_stats() + if manifest_stats: + stat_cache = { + staticfiles_storage.path(k): os.stat_result(v) + for k, v in manifest_stats.items() + } + + # Add files to ServeStatic + for unhashed_name, hashed_name in staticfiles.items(): + file_path = staticfiles_storage.path(unhashed_name) + if not self.keep_only_hashed_files: + self.add_file_to_dictionary( + f"{self.static_prefix}{unhashed_name}", + file_path, + stat_cache=stat_cache, + ) + self.add_file_to_dictionary( + f"{self.static_prefix}{hashed_name}", + file_path, + stat_cache=stat_cache, + ) + + # Add the static directory to ServeStatic + if staticfiles_storage.location: + self.insert_directory(staticfiles_storage.location, self.static_prefix) + def candidate_paths_for_url(self, url): if self.use_finders and url.startswith(self.static_prefix): relative_url = url[len(self.static_prefix) :] diff --git a/src/servestatic/storage.py b/src/servestatic/storage.py index c5efe6b4..8ba7caf4 100644 --- a/src/servestatic/storage.py +++ b/src/servestatic/storage.py @@ -1,7 +1,9 @@ from __future__ import annotations import concurrent.futures +import contextlib import errno +import json import os import re import textwrap @@ -12,8 +14,10 @@ ManifestStaticFilesStorage, StaticFilesStorage, ) +from django.core.files.base import ContentFile -from .compress import Compressor +from servestatic.compress import Compressor +from servestatic.utils import stat_files _PostProcessT = Iterator[Union[Tuple[str, str, bool], Tuple[str, None, RuntimeError]]] @@ -88,6 +92,47 @@ def post_process(self, *args, **kwargs): processed = self.make_helpful_exception(processed, name) yield name, hashed_name, processed + self.add_stats_to_manifest() + + def add_stats_to_manifest(self): + """Adds additional `stats` field to Django's manifest file.""" + current = self.read_manifest() + current = json.loads(current) if current else {} + payload = current | { + "stats": self.stat_static_root(), + } + new = json.dumps(payload).encode() + # Django < 3.2 doesn't have a manifest_storage attribute + manifest_storage = getattr(self, "manifest_storage", self) + manifest_storage.delete(self.manifest_name) + manifest_storage._save(self.manifest_name, ContentFile(new)) + + def stat_static_root(self): + """Stats all the files within the static root folder.""" + static_root = getattr(settings, "STATIC_ROOT", None) + if static_root is None: + return {} + + file_paths = [] + for root, _, files in os.walk(static_root): + file_paths.extend( + os.path.join(root, f) for f in files if f != self.manifest_name + ) + stats = stat_files(file_paths) + + # Remove the static root folder from the path + return {path[len(static_root) + 1 :]: stat for path, stat in stats.items()} + + def load_manifest_stats(self): + """Derivative of Django's `load_manifest` but for the `stats` field.""" + content = self.read_manifest() + if content is None: + return {} + with contextlib.suppress(json.JSONDecodeError): + stored = json.loads(content) + return stored.get("stats", {}) + raise ValueError(f"Couldn't load stats from manifest '{self.manifest_name}'") + def post_process_with_compression(self, files): # Files may get hashed multiple times, we want to keep track of all the # intermediate files generated during the process and which of these diff --git a/src/servestatic/utils.py b/src/servestatic/utils.py index 7d1d08e2..103591ab 100644 --- a/src/servestatic/utils.py +++ b/src/servestatic/utils.py @@ -32,6 +32,14 @@ def scantree(root): yield entry.path, entry.stat() +def stat_files(paths) -> dict: + """Stat all files in `relative_paths` via threads.""" + + with concurrent.futures.ThreadPoolExecutor() as executor: + futures = {rel_path: executor.submit(os.stat, rel_path) for rel_path in paths} + return {rel_path: future.result() for rel_path, future in futures.items()} + + class AsyncToSyncIterator: """Converts any async iterator to sync as efficiently as possible while retaining full compatibility with any environment. diff --git a/src/servestatic/wsgi.py b/src/servestatic/wsgi.py index e07d1ad9..4d0a0008 100644 --- a/src/servestatic/wsgi.py +++ b/src/servestatic/wsgi.py @@ -2,8 +2,8 @@ from wsgiref.util import FileWrapper -from .base import BaseServeStatic -from .utils import decode_path_info +from servestatic.base import BaseServeStatic +from servestatic.utils import decode_path_info class ServeStatic(BaseServeStatic): diff --git a/tests/test_django_whitenoise.py b/tests/test_django_whitenoise.py index 55a42ea5..428e9b74 100644 --- a/tests/test_django_whitenoise.py +++ b/tests/test_django_whitenoise.py @@ -91,6 +91,12 @@ def test_get_root_file(server, root_files, _collect_static): assert response.content == root_files.robots_content +@override_settings(SERVESTATIC_USE_MANIFEST=False) +def test_get_root_file_no_manifest(server, root_files, _collect_static): + response = server.get(root_files.robots_url) + assert response.content == root_files.robots_content + + def test_versioned_file_cached_forever(server, static_files, _collect_static): url = storage.staticfiles_storage.url(static_files.js_path) response = server.get(url) @@ -202,8 +208,8 @@ def test_no_content_disposition_header(server, static_files, _collect_static): @pytest.fixture() -def finder_application(finder_static_files): - return get_wsgi_application() +def finder_application(finder_static_files, application): + return application @pytest.fixture() @@ -219,6 +225,13 @@ def test_file_served_from_static_dir(finder_static_files, finder_server): assert response.content == finder_static_files.js_content +@override_settings(SERVESTATIC_USE_MANIFEST=False) +def test_file_served_from_static_dir_no_manifest(finder_static_files, finder_server): + url = settings.STATIC_URL + finder_static_files.js_path + response = finder_server.get(url) + assert response.content == finder_static_files.js_content + + def test_non_ascii_requests_safely_ignored(finder_server): response = finder_server.get(settings.STATIC_URL + "test\u263a") assert 404 == response.status_code @@ -236,6 +249,15 @@ def test_index_file_served_at_directory_path(finder_static_files, finder_server) assert response.content == finder_static_files.index_content +@override_settings(SERVESTATIC_USE_MANIFEST=False) +def test_index_file_served_at_directory_path_no_manifest( + finder_static_files, finder_server +): + path = finder_static_files.index_path.rpartition("/")[0] + "/" + response = finder_server.get(settings.STATIC_URL + path) + assert response.content == finder_static_files.index_content + + def test_index_file_path_redirected(finder_static_files, finder_server): directory_path = finder_static_files.index_path.rpartition("/")[0] + "/" index_url = settings.STATIC_URL + finder_static_files.index_path @@ -265,11 +287,11 @@ def test_servestatic_file_response_has_only_one_header(): assert headers == {"content-type"} +@override_settings(STATIC_URL="static/") def test_relative_static_url(server, static_files, _collect_static): - with override_settings(STATIC_URL="static/"): - url = storage.staticfiles_storage.url(static_files.js_path) - response = server.get(url) - assert response.content == static_files.js_content + url = storage.staticfiles_storage.url(static_files.js_path) + response = server.get(url) + assert response.content == static_files.js_content def test_404_in_prod(server):