diff --git a/api/catalog/api/utils/validate_images.py b/api/catalog/api/utils/validate_images.py index 763be48fc..24756273e 100644 --- a/api/catalog/api/utils/validate_images.py +++ b/api/catalog/api/utils/validate_images.py @@ -1,6 +1,8 @@ import logging import time +from django.conf import settings + import django_redis import grequests from decouple import config @@ -12,6 +14,9 @@ CACHE_PREFIX = "valid:" +HEADERS = { + "User-Agent": settings.OUTBOUND_USER_AGENT_TEMPLATE.format(purpose="LinkValidation") +} def _get_cached_statuses(redis, image_urls): @@ -50,8 +55,10 @@ def validate_images(query_hash, start_slice, results, image_urls): to_verify[url] = idx logger.debug(f"len(to_verify)={len(to_verify)}") reqs = ( - grequests.head(u, allow_redirects=False, timeout=2, verify=False) - for u in to_verify.keys() + grequests.head( + url, headers=HEADERS, allow_redirects=False, timeout=2, verify=False + ) + for url in to_verify.keys() ) verified = grequests.map(reqs, exception_handler=_validation_failure) # Cache newly verified image statuses. diff --git a/api/catalog/api/utils/watermark.py b/api/catalog/api/utils/watermark.py index 79b13d571..9e2c56db2 100644 --- a/api/catalog/api/utils/watermark.py +++ b/api/catalog/api/utils/watermark.py @@ -1,11 +1,18 @@ +import logging import os from enum import Flag, auto from io import BytesIO from textwrap import wrap +from django.conf import settings + import piexif import requests from PIL import Image, ImageDraw, ImageFont +from sentry_sdk import capture_exception + + +parent_logger = logging.getLogger(__name__) BREAKPOINT_DIMENSION = 400 # 400px @@ -14,6 +21,9 @@ FRAME_COLOR = "#fff" # White frame TEXT_COLOR = "#000" # Black text +HEADERS = { + "User-Agent": settings.OUTBOUND_USER_AGENT_TEMPLATE.format(purpose="Watermark") +} class Dimension(Flag): @@ -143,9 +153,9 @@ def _open_image(url): :param url: the URL from where to read the image :return: the PIL image object with the EXIF data """ - + logger = parent_logger.getChild("_open_image") try: - response = requests.get(url) + response = requests.get(url, headers=HEADERS) img_bytes = BytesIO(response.content) img = Image.open(img_bytes) # Preserve EXIF metadata @@ -154,8 +164,10 @@ def _open_image(url): else: exif = None return img, exif - except requests.exceptions.RequestException: - print("Error loading image data") + except requests.exceptions.RequestException as e: + capture_exception(e) + logger.error(f"Error loading image data: {e}") + return None, None def _print_attribution_on_image(img, image_info): diff --git a/api/catalog/api/utils/waveform.py b/api/catalog/api/utils/waveform.py index 605f7da48..2bb00214a 100644 --- a/api/catalog/api/utils/waveform.py +++ b/api/catalog/api/utils/waveform.py @@ -8,13 +8,15 @@ import subprocess from typing import List +from django.conf import settings + import requests parent_logger = logging.getLogger(__name__) TMP_DIR = pathlib.Path("/tmp").resolve() -UA_STRING = "OpenverseWaveform/0.0 (https://wordpress.org/openverse)" +UA_STRING = settings.OUTBOUND_USER_AGENT_TEMPLATE.format(purpose="Waveform") def ext_from_url(url): diff --git a/api/catalog/api/views/image_views.py b/api/catalog/api/views/image_views.py index 47f6f0f52..95fec62b5 100644 --- a/api/catalog/api/views/image_views.py +++ b/api/catalog/api/views/image_views.py @@ -1,9 +1,10 @@ import io from django.conf import settings -from django.http.response import FileResponse, Http404, HttpResponse +from django.http.response import FileResponse, HttpResponse from django.utils.decorators import method_decorator from rest_framework.decorators import action +from rest_framework.exceptions import NotFound from rest_framework.response import Response import piexif @@ -60,6 +61,10 @@ class ImageViewSet(MediaViewSet): serializer_class = ImageSerializer + OEMBED_HEADERS = { + "User-Agent": settings.OUTBOUND_USER_AGENT_TEMPLATE.format(purpose="OEmbed"), + } + # Extra actions @action( @@ -81,7 +86,7 @@ def oembed(self, request, *_, **__): except Image.DoesNotExist: return get_api_exception("Could not find image.", 404) if not (image.height and image.width): - image_file = requests.get(image.url) + image_file = requests.get(image.url, headers=self.OEMBED_HEADERS) width, height = PILImage.open(io.BytesIO(image_file.content)).size context |= { "width": width, @@ -110,7 +115,7 @@ def thumbnail(self, request, *_, **__): @action(detail=True, url_path="watermark", url_name="watermark") def watermark(self, request, *_, **__): if not settings.WATERMARK_ENABLED: - raise Http404 # watermark feature is disabled + raise NotFound("The watermark feature is currently disabled.") params = WatermarkRequestSerializer(data=request.query_params) params.is_valid(raise_exception=True) diff --git a/api/catalog/api/views/media_views.py b/api/catalog/api/views/media_views.py index a262394ca..8ec6a82a1 100644 --- a/api/catalog/api/views/media_views.py +++ b/api/catalog/api/views/media_views.py @@ -1,9 +1,5 @@ -import json -import logging as log -from http.client import RemoteDisconnected -from urllib.error import HTTPError +import logging from urllib.parse import urlencode -from urllib.request import Request, urlopen from django.conf import settings from django.http.response import HttpResponse @@ -13,6 +9,7 @@ from rest_framework.response import Response from rest_framework.viewsets import ReadOnlyModelViewSet +import requests from sentry_sdk import capture_exception from catalog.api.controllers import search_controller @@ -28,6 +25,9 @@ class UpstreamThumbnailException(APIException): default_detail = "Could not render thumbnail due to upstream provider error." +parent_logger = logging.getLogger(__name__) + + class MediaViewSet(ReadOnlyModelViewSet): swagger_schema = CustomAutoSchema @@ -171,32 +171,41 @@ def _get_user_ip(request): ip = request.META.get("REMOTE_ADDR") return ip + THUMBNAIL_PROXY_COMM_HEADERS = { + "User-Agent": settings.OUTBOUND_USER_AGENT_TEMPLATE.format( + purpose="ThumbnailGeneration" + ) + } + @staticmethod def _thumbnail_proxy_comm( path: str, params: dict, headers: tuple[tuple[str, str]] = (), - ): + ) -> tuple[requests.Response, int, str]: + logger = parent_logger.getChild("_thumbnail_proxy_comm") proxy_url = settings.THUMBNAIL_PROXY_URL query_string = urlencode(params) upstream_url = f"{proxy_url}/{path}?{query_string}" - log.debug(f"Image proxy upstream URL: {upstream_url}") + logger.debug(f"Image proxy upstream URL: {upstream_url}") try: - req = Request(upstream_url) - for key, val in headers: - req.add_header(key, val) - upstream_response = urlopen(req, timeout=10) + compiled_headers = MediaViewSet.THUMBNAIL_PROXY_COMM_HEADERS | { + k: v for k, v in headers + } + upstream_response = requests.get( + upstream_url, timeout=10, headers=compiled_headers + ) - res_status = upstream_response.status + res_status = upstream_response.status_code content_type = upstream_response.headers.get("Content-Type") - log.debug( + logger.debug( "Image proxy response " f"status: {res_status}, content-type: {content_type}" ) return upstream_response, res_status, content_type - except (HTTPError, RemoteDisconnected, TimeoutError) as exc: + except requests.RequestException as exc: capture_exception(exc) raise UpstreamThumbnailException(f"Failed to render thumbnail: {exc}") except Exception as exc: @@ -217,7 +226,7 @@ def _get_proxied_image( info_res, *_ = MediaViewSet._thumbnail_proxy_comm( "info", {"url": image_url} ) - info = json.loads(info_res.read()) + info = info_res.json() width = info["width"] params = { @@ -243,6 +252,6 @@ def _get_proxied_image( "resize", params, (("Accept", accept_header),) ) response = HttpResponse( - img_res.read(), status=res_status, content_type=content_type + img_res.content, status=res_status, content_type=content_type ) return response diff --git a/api/catalog/settings.py b/api/catalog/settings.py index fbffef2d3..4ca1418d9 100644 --- a/api/catalog/settings.py +++ b/api/catalog/settings.py @@ -304,6 +304,11 @@ # The version of the API. We follow the semantic version specification. API_VERSION = config("SEMANTIC_VERSION", default="Version not specified") +OUTBOUND_USER_AGENT_TEMPLATE = config( + "OUTBOUND_USER_AGENT_TEMPLATE", + default=f"Openverse{{purpose}}/{API_VERSION} (https://wordpress.org/openverse)", +) + # The contact email of the Openverse team CONTACT_EMAIL = config("CONTACT_EMAIL", default="openverse@wordpress.org") diff --git a/api/test/audio_integration_test.py b/api/test/audio_integration_test.py index d4cf7d52e..4e0bf6e28 100644 --- a/api/test/audio_integration_test.py +++ b/api/test/audio_integration_test.py @@ -18,10 +18,6 @@ search_source_and_excluded, search_special_chars, stats, - thumb, - thumb_compression, - thumb_full_size, - thumb_webp, uuid_validation, ) @@ -122,10 +118,6 @@ def test_audio_stats(): stats("audio") -def test_audio_thumb(jamendo_audio_fixture): - thumb(jamendo_audio_fixture) - - def test_audio_detail_without_thumb(): resp = requests.get(f"{API_URL}/v1/audio/44540200-91eb-483d-9e99-38ce86a52fb6") assert resp.status_code == 200 @@ -141,18 +133,6 @@ def test_audio_search_without_thumb(): assert parsed["results"][0]["thumbnail"] is None -def test_audio_thumb_compression(jamendo_audio_fixture): - thumb_compression(jamendo_audio_fixture) - - -def test_audio_thumb_webp(jamendo_audio_fixture): - thumb_webp(jamendo_audio_fixture) - - -def test_audio_thumb_full_size(jamendo_audio_fixture): - thumb_full_size(jamendo_audio_fixture) - - def test_audio_report(audio_fixture): report("audio", audio_fixture) diff --git a/api/test/factory/models/media.py b/api/test/factory/models/media.py index 47a0e5f95..8e9a8462b 100644 --- a/api/test/factory/models/media.py +++ b/api/test/factory/models/media.py @@ -22,6 +22,7 @@ class Meta: foreign_landing_url = Faker("globally_unique_url") url = Faker("globally_unique_url") + thumbnail = Faker("image_url") class IdentifierFactory(factory.SubFactory): diff --git a/api/test/factory/sample-audio-info.json b/api/test/factory/sample-audio-info.json new file mode 100644 index 000000000..742ba4c2a --- /dev/null +++ b/api/test/factory/sample-audio-info.json @@ -0,0 +1,20 @@ +{ + "creator": "TheGloomWorker", + "headers": { + "Access-Control-Allow-Headers": "DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range", + "Access-Control-Allow-Methods": "GET, POST, OPTIONS", + "Access-Control-Allow-Origin": "*", + "Access-Control-Expose-Headers": "Content-Length,Content-Range", + "Connection": "keep-alive", + "Content-Length": "583491", + "Content-Range": "bytes 0-583490/583491", + "Content-Type": "audio/mpeg", + "Date": "Fri, 12 Aug 2022 15:18:23 GMT", + "ETag": "5e4e7f4d-8e743", + "Last-Modified": "Thu, 20 Feb 2020 12:45:01 GMT", + "Server": "nginx/1.18.0 (Ubuntu)" + }, + "license": "CC0", + "license_version": "1.0", + "title": "Birds and City Ambience.mp3" +} diff --git a/api/test/factory/sample-audio.mp3 b/api/test/factory/sample-audio.mp3 new file mode 100644 index 000000000..d8f0bed6d Binary files /dev/null and b/api/test/factory/sample-audio.mp3 differ diff --git a/api/test/factory/sample-image-info.json b/api/test/factory/sample-image-info.json new file mode 100644 index 000000000..9482892d9 --- /dev/null +++ b/api/test/factory/sample-image-info.json @@ -0,0 +1,6 @@ +{ + "creator": "andymiccone", + "license": "CC0", + "license_version": "1.0", + "title": "'I Just Love Old Music..!'" +} diff --git a/api/test/factory/sample-image.jpg b/api/test/factory/sample-image.jpg new file mode 100644 index 000000000..fdd27220a Binary files /dev/null and b/api/test/factory/sample-image.jpg differ diff --git a/api/test/image_integration_test.py b/api/test/image_integration_test.py index 1419df6bb..1f3945782 100644 --- a/api/test/image_integration_test.py +++ b/api/test/image_integration_test.py @@ -18,10 +18,6 @@ search_source_and_excluded, search_special_chars, stats, - thumb, - thumb_compression, - thumb_full_size, - thumb_webp, uuid_validation, ) from urllib.parse import urlencode @@ -74,22 +70,6 @@ def test_image_stats(): stats("images") -def test_image_thumb(image_fixture): - thumb(image_fixture) - - -def test_image_thumb_compression(image_fixture): - thumb_compression(image_fixture) - - -def test_image_thumb_webp(image_fixture): - thumb_webp(image_fixture) - - -def test_image_thumb_full_size(image_fixture): - thumb_full_size(image_fixture) - - def test_audio_report(image_fixture): report("images", image_fixture) diff --git a/api/test/media_integration.py b/api/test/media_integration.py index b3f8ec7c3..aabb581e2 100644 --- a/api/test/media_integration.py +++ b/api/test/media_integration.py @@ -4,11 +4,9 @@ """ import json -from io import BytesIO from test.constants import API_URL import requests -from PIL import Image def search(fixture): @@ -100,51 +98,6 @@ def stats(media_type, count_key="media_count"): assert provider_count > 0 -def thumb(fixture): - thumbnail_url = fixture["results"][0]["thumbnail"] - thumbnail_response = requests.get(thumbnail_url) - assert thumbnail_response.status_code == 200 - assert thumbnail_response.headers["Content-Type"].startswith("image/") - - -def thumb_compression(fixture): - thumbnail_url = fixture["results"][0]["thumbnail"] - - thumbnail_response = requests.get(thumbnail_url) - compressed_size = len(thumbnail_response.content) - thumbnail_response = requests.get(f"{thumbnail_url}?compressed=no") - actual_size = len(thumbnail_response.content) - - assert compressed_size < actual_size - - -def thumb_webp(fixture): - thumbnail_url = fixture["results"][0]["thumbnail"] - - thumbnail_response = requests.get(thumbnail_url, headers={"Accept": "image/*,*/*"}) - assert thumbnail_response.headers["Content-Type"] != "image/webp" - thumbnail_response = requests.get( - thumbnail_url, headers={"Accept": "image/webp,image/*,*/*"} - ) - assert thumbnail_response.headers["Content-Type"] == "image/webp" - - -def thumb_full_size(fixture): - def _get_image_dimen(url: str) -> tuple[int, int]: - response = requests.get(url) - image = Image.open(BytesIO(response.content)) - return image.size - - thumbnail_url = fixture["results"][0]["thumbnail"] - full_w, full_h = _get_image_dimen(f"{thumbnail_url}?full_size=yes") - scaled_w, scaled_h = _get_image_dimen(thumbnail_url) - if full_w > 600: - assert scaled_w == 600 - assert full_w > scaled_w - else: - assert scaled_w == full_w # h2non/imaginary will not scale up - - def report(media_type, fixture): test_id = fixture["results"][0]["id"] response = requests.post( diff --git a/api/test/unit/utils/validate_images_test.py b/api/test/unit/utils/validate_images_test.py new file mode 100644 index 000000000..afbaeb954 --- /dev/null +++ b/api/test/unit/utils/validate_images_test.py @@ -0,0 +1,69 @@ +from dataclasses import dataclass +from typing import Callable + +import pytest +from fakeredis import FakeRedis +from grequests import AsyncRequest +from requests import Response + +from catalog.api.utils.validate_images import HEADERS, validate_images + + +@pytest.fixture(autouse=True) +def redis(monkeypatch) -> FakeRedis: + fake_redis = FakeRedis() + + def get_redis_connection(*args, **kwargs): + return fake_redis + + monkeypatch.setattr("django_redis.get_redis_connection", get_redis_connection) + + yield fake_redis + fake_redis.client().close() + + +@dataclass +class GRequestsFixture: + requests: list[AsyncRequest] + response_factory: Callable[ + (AsyncRequest,), Response + ] = lambda x: GRequestsFixture._default_response_factory(x) + + @staticmethod + def _default_response_factory(req: AsyncRequest) -> Response: + res = Response() + res.url = req.url + res.status_code = 200 + return res + + +@pytest.fixture(autouse=True) +def grequests(monkeypatch) -> GRequestsFixture: + fixture = GRequestsFixture([]) + + def map_reqs(reqs, **kwargs): + nonlocal fixture + fixture.requests += list(reqs) + responses = [fixture.response_factory(r) for r in fixture.requests] + return responses + + monkeypatch.setattr("grequests.map", map_reqs) + + return fixture + + +def test_sends_user_agent(grequests): + query_hash = "test_sends_user_agent" + results = [object() for _ in range(40)] + image_urls = [f"http://example.org/{i}" for i in range(len(results))] + start_slice = 0 + + validate_images(query_hash, start_slice, results, image_urls) + + requested_urls = [r.url for r in grequests.requests] + for url in image_urls: + assert url in requested_urls + + assert len(grequests.requests) > 0 + for r in grequests.requests: + assert r.kwargs["headers"] == HEADERS diff --git a/api/test/unit/utils/watermark_test.py b/api/test/unit/utils/watermark_test.py new file mode 100644 index 000000000..3c0e8c50b --- /dev/null +++ b/api/test/unit/utils/watermark_test.py @@ -0,0 +1,53 @@ +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Callable + +import pytest +from requests import Request, Response + +from catalog.api.utils.watermark import HEADERS, watermark + + +_MOCK_IMAGE_PATH = Path(__file__).parent / ".." / ".." / "factory" +_MOCK_IMAGE_BYTES = (_MOCK_IMAGE_PATH / "sample-image.jpg").read_bytes() +_MOCK_IMAGE_INFO = json.loads((_MOCK_IMAGE_PATH / "sample-image-info.json").read_text()) + + +@dataclass +class RequestsFixture: + requests: list[Request] + response_factory: Callable[ + (Request,), Response + ] = lambda x: RequestsFixture._default_response_factory(x) + + @staticmethod + def _default_response_factory(req: Request) -> Response: + res = Response() + res.url = req.url + res.status_code = 200 + res._content = _MOCK_IMAGE_BYTES + return res + + +@pytest.fixture(autouse=True) +def requests(monkeypatch) -> RequestsFixture: + fixture = RequestsFixture([]) + + def requests_get(url, **kwargs): + req = Request(method="GET", url=url, **kwargs) + fixture.requests.append(req) + response = fixture.response_factory(req) + return response + + monkeypatch.setattr("requests.get", requests_get) + + return fixture + + +def test_sends_UA_header(requests): + watermark("http://example.com/", _MOCK_IMAGE_INFO) + + assert len(requests.requests) > 0 + for r in requests.requests: + assert r.headers == HEADERS diff --git a/api/test/unit/utils/waveform_test.py b/api/test/unit/utils/waveform_test.py new file mode 100644 index 000000000..1f6326688 --- /dev/null +++ b/api/test/unit/utils/waveform_test.py @@ -0,0 +1,57 @@ +import json +from dataclasses import dataclass +from io import BytesIO +from pathlib import Path +from typing import Callable + +import pytest +from requests import Request, Response +from requests.structures import CaseInsensitiveDict + +from catalog.api.utils.waveform import UA_STRING, download_audio + + +_MOCK_AUDIO_PATH = Path(__file__).parent / ".." / ".." / "factory" +_MOCK_AUDIO_BYTES = (_MOCK_AUDIO_PATH / "sample-audio.mp3").read_bytes() +_MOCK_AUDIO_INFO = json.loads((_MOCK_AUDIO_PATH / "sample-audio-info.json").read_text()) + + +@dataclass +class RequestsFixture: + requests: list[Request] + response_factory: Callable[ + (Request,), Response + ] = lambda x: RequestsFixture._default_response_factory(x) + + @staticmethod + def _default_response_factory(req: Request) -> Response: + res = Response() + res.url = req.url + res.status_code = 200 + res.raw = BytesIO(_MOCK_AUDIO_BYTES) + res.headers = CaseInsensitiveDict(_MOCK_AUDIO_INFO["headers"]) + return res + + +@pytest.fixture(autouse=True) +def requests(monkeypatch) -> RequestsFixture: + fixture = RequestsFixture([]) + + def requests_get(url, **kwargs): + kwargs.pop("stream") + req = Request(method="GET", url=url, **kwargs) + fixture.requests.append(req) + response = fixture.response_factory(req) + return response + + monkeypatch.setattr("requests.get", requests_get) + + return fixture + + +def test_download_audio_sends_ua_header(requests): + download_audio("http://example.org", "abcd-1234") + + assert len(requests.requests) > 0 + for r in requests.requests: + assert r.headers["User-Agent"] == UA_STRING diff --git a/api/test/unit/views/image_views_test.py b/api/test/unit/views/image_views_test.py new file mode 100644 index 000000000..f7336f413 --- /dev/null +++ b/api/test/unit/views/image_views_test.py @@ -0,0 +1,65 @@ +import json +from dataclasses import dataclass +from pathlib import Path +from test.factory.models.image import ImageFactory +from typing import Callable + +from rest_framework.test import APIClient + +import pytest +from requests import Request, Response + +from catalog.api.views.image_views import ImageViewSet + + +_MOCK_IMAGE_PATH = Path(__file__).parent / ".." / ".." / "factory" +_MOCK_IMAGE_BYTES = (_MOCK_IMAGE_PATH / "sample-image.jpg").read_bytes() +_MOCK_IMAGE_INFO = json.loads((_MOCK_IMAGE_PATH / "sample-image-info.json").read_text()) + + +@pytest.fixture +def api_client(): + return APIClient() + + +@dataclass +class RequestsFixture: + requests: list[Request] + response_factory: Callable[ + (Request,), Response + ] = lambda x: RequestsFixture._default_response_factory(x) + + @staticmethod + def _default_response_factory(req: Request) -> Response: + res = Response() + res.url = req.url + res.status_code = 200 + res._content = _MOCK_IMAGE_BYTES + return res + + +@pytest.fixture(autouse=True) +def requests(monkeypatch) -> RequestsFixture: + fixture = RequestsFixture([]) + + def requests_get(url, **kwargs): + req = Request(method="GET", url=url, **kwargs) + fixture.requests.append(req) + response = fixture.response_factory(req) + return response + + monkeypatch.setattr("requests.get", requests_get) + + return fixture + + +@pytest.mark.django_db +def test_oembed_sends_ua_header(api_client, requests): + image = ImageFactory.create() + res = api_client.get("/v1/images/oembed/", data={"url": f"/{image.identifier}"}) + + assert res.status_code == 200 + + assert len(requests.requests) > 0 + for r in requests.requests: + assert r.headers == ImageViewSet.OEMBED_HEADERS diff --git a/api/test/unit/views/media_views_test.py b/api/test/unit/views/media_views_test.py index d97a5b040..a53deb79e 100644 --- a/api/test/unit/views/media_views_test.py +++ b/api/test/unit/views/media_views_test.py @@ -1,40 +1,243 @@ +import json +from dataclasses import dataclass, field +from pathlib import Path +from test.factory.models.audio import AudioFactory from test.factory.models.image import ImageFactory +from typing import Callable from unittest import mock -from urllib.error import HTTPError from rest_framework.test import APIClient import pytest +import requests as requests_lib +from requests import PreparedRequest, Request, Response -from catalog.api.models.image import Image +from catalog.api.views.media_views import MediaViewSet + + +_MOCK_IMAGE_PATH = Path(__file__).parent / ".." / ".." / "factory" +_MOCK_IMAGE_BYTES = (_MOCK_IMAGE_PATH / "sample-image.jpg").read_bytes() +_MOCK_IMAGE_INFO = json.loads((_MOCK_IMAGE_PATH / "sample-image-info.json").read_text()) @pytest.fixture -def api_client() -> APIClient: +def api_client(): return APIClient() -@pytest.fixture -def image() -> Image: - return ImageFactory.create() +@dataclass +class SentRequest: + request: PreparedRequest + kwargs: dict + + +@dataclass +class RequestsFixture: + sent_requests: list[SentRequest] + send_handler: Callable[ + (Request,), Response + ] = lambda *args, **kwargs: RequestsFixture._default_send_handler(*args, **kwargs) + response_queue: list[Response] = field(default_factory=list) + + @staticmethod + def _default_send_handler(fixture, session, req, **kwargs) -> Response: + if fixture.response_queue: + return fixture.response_queue.pop() + + res = Response() + res.url = req.url + res.status_code = 200 + res._content = _MOCK_IMAGE_BYTES + return res + + +@pytest.fixture(autouse=True) +def requests(monkeypatch) -> RequestsFixture: + fixture = RequestsFixture([]) + + def send(session, req, **kwargs): + fixture.sent_requests.append(SentRequest(req, kwargs)) + response = fixture.send_handler(fixture, session, req, **kwargs) + return response + + monkeypatch.setattr("requests.sessions.Session.send", send) + + return fixture @pytest.mark.django_db -def test_thumb_error(api_client, image): +@pytest.mark.parametrize( + ("media_type", "media_factory"), + ( + ("images", ImageFactory), + ("audio", AudioFactory), + ), +) +def test_thumb_error(api_client, media_type, media_factory, requests): error = None - def urlopen_503_response(url, **kwargs): + def send_handler(fixture, session, req, **kwargs): + requests.sent_requests.append(SentRequest(req, kwargs)) nonlocal error - error = HTTPError(url, 503, "Bad error upstream whoops", {}, None) + error = requests_lib.HTTPError( + req.url, 503, "Bad error upstream whoops", {}, None + ) raise error + requests.send_handler = send_handler + with mock.patch( - "catalog.api.views.media_views.urlopen" - ) as urlopen_mock, mock.patch( "catalog.api.views.media_views.capture_exception", autospec=True ) as mock_capture_exception: - urlopen_mock.side_effect = urlopen_503_response - response = api_client.get(f"/v1/images/{image.identifier}/thumb/") + media = media_factory.create() + response = api_client.get(f"/v1/{media_type}/{media.identifier}/thumb/") assert response.status_code == 424 mock_capture_exception.assert_called_once_with(error) + + +@pytest.mark.django_db +@pytest.mark.parametrize( + ("media_type", "media_factory"), + ( + ("images", ImageFactory), + ("audio", AudioFactory), + ), +) +def test_thumb_sends_ua_header(api_client, media_type, media_factory, requests): + media = media_factory.create() + res = api_client.get(f"/v1/{media_type}/{media.identifier}/thumb/") + + assert res.status_code == 200 + + assert len(requests.sent_requests) == 1 + assert ( + requests.sent_requests[0].request.headers["User-agent"] + == MediaViewSet.THUMBNAIL_PROXY_COMM_HEADERS["User-Agent"] + ) + + +@pytest.mark.django_db +@pytest.mark.parametrize( + ("media_type", "media_factory"), + ( + ("images", ImageFactory), + ("audio", AudioFactory), + ), +) +def test_thumb(api_client, media_type, media_factory, requests): + media = media_factory.create() + res = api_client.get(f"/v1/{media_type}/{media.identifier}/thumb/") + + assert res.status_code == 200 + assert res.content == _MOCK_IMAGE_BYTES + expected_upstream_params = { + "quality=", + "compression=", + "width=600", + } + for entry in expected_upstream_params: + assert ( + entry in requests.sent_requests[0].request.url + ), f"{entry} not found in prepared request url: {requests.sent_requests[0].request.url}" + + +@pytest.mark.django_db +@pytest.mark.parametrize( + ("media_type", "media_factory"), + ( + ("images", ImageFactory), + ("audio", AudioFactory), + ), +) +def test_thumb_compression(api_client, media_type, media_factory, requests): + media = media_factory.create() + res = api_client.get( + f"/v1/{media_type}/{media.identifier}/thumb/", data={"compressed": "yes"} + ) + + assert res.status_code == 200 + expected_upstream_params = { + # Don't encode the defaults here, just assert that these params + # exist, but not with the uncompressed values in the rejected set below + "quality=", + "compression=", + } + rejected_params = { + "quality=100", + "compression=0", + } + for entry in expected_upstream_params: + assert ( + entry in requests.sent_requests[0].request.url + ), f"{entry} not found in prepared request url: {requests.sent_requests[0].request.url}" + + for entry in rejected_params: + assert ( + entry not in requests.sent_requests[0].request.url + ), f"Rejected {entry} found in prepared request url: {requests.sent_requests[0].request.url}" + + +@pytest.mark.django_db +@pytest.mark.parametrize( + ("media_type", "media_factory"), + ( + ("images", ImageFactory), + ("audio", AudioFactory), + ), +) +def test_thumb_webp(api_client, media_type, media_factory, requests): + media = media_factory.create() + accept_header = "image/webp,image/*,*/*" + res = api_client.get( + f"/v1/{media_type}/{media.identifier}/thumb/", HTTP_ACCEPT=accept_header + ) + + assert res.status_code == 200 + expected_upstream_params = {"type=auto"} + for entry in expected_upstream_params: + assert ( + entry in requests.sent_requests[0].request.url + ), f"{entry} not found in prepared request url: {requests.sent_requests[0].request.url}" + + assert ("accept", accept_header) in requests.sent_requests[ + 0 + ].request.headers.lower_items() + + +@pytest.mark.django_db +@pytest.mark.parametrize( + ("media_type", "media_factory"), + ( + ("images", ImageFactory), + ("audio", AudioFactory), + ), +) +def test_thumb_full_size(api_client, media_type, media_factory, requests): + media = media_factory.create() + + upstream_width = 1200 + info_response = Response() + info_response.status = 200 + info_response._content = json.dumps({"width": upstream_width}).encode() + requests.response_queue = [info_response] + + res = api_client.get( + f"/v1/{media_type}/{media.identifier}/thumb/", data={"full_size": "yes"} + ) + + assert res.status_code == 200 + rejected_params = { + "width=600", + } + + for entry in rejected_params: + assert ( + entry not in requests.sent_requests[1].request.url + ), f"Rejected {entry} found in prepared request url: {requests.sent_requests[0].request.url}" + + expected_params = {f"width={upstream_width}"} + for entry in expected_params: + assert ( + entry in requests.sent_requests[1].request.url + ), f"{entry} not found in prepared request url: {requests.sent_requests[0].request.url}" diff --git a/docker-compose.yml b/docker-compose.yml index beed92cc8..49ffeb7e5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,7 +18,7 @@ services: environment: PORT: 8222 MALLOC_ARENA_MAX: 2 - command: ["-enable-url-source"] + command: ["-enable-url-source", "-forward-headers", "User-Agent"] upstream_db: image: postgres:13.2-alpine