From 35197e98e735a77ae730b5311f3b549def3e10fb Mon Sep 17 00:00:00 2001 From: thegreendrinker <32495526+thegreendrinker@users.noreply.github.com> Date: Wed, 10 Apr 2024 10:21:37 -0700 Subject: [PATCH] Bugfix to ensure image type is correctly extracted from content type (#4062) * fix: modified extension.py to split file extension by ';' * test: added test that ensures that stripped file url is expected file type * test: added pytest cases for test_get_extension_from_content_type() * fix: added a . to one of the file extension parameters in pytest.mark.parametrize for test_get_extension_from_content_type() * Apply linting * Fix tests (initially) * fix: added check for the case of a semicolon in the content_type and modified get_file_extension_from_content_type() to utilize mimetypes.guess_extension() to automatically find file extensions in strings * fix: added check for test case for 'image/png;charset=UTF-8' * fix: changed jpeg to jpg * fix: changed test cases 'audio/midi' to compare to 'midi', and cases that return None when passed to mimetypes.guess_extension() to compare to None. Also, call mimetypes.guess_extension() in conditional header, assign it to variable, and then only strip it of '.' if it isn't None * fix: removed test input '5' because not iterable and added test input 'foobar' * Lint files --------- Co-authored-by: Darien Co-authored-by: Madison Swain-Bowden --- api/api/utils/image_proxy/extension.py | 9 +++++++-- api/test/unit/utils/test_image_proxy.py | 22 ++++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/api/api/utils/image_proxy/extension.py b/api/api/utils/image_proxy/extension.py index a7da85e9cd8..2773b9d6a4e 100644 --- a/api/api/utils/image_proxy/extension.py +++ b/api/api/utils/image_proxy/extension.py @@ -1,4 +1,5 @@ import logging +import mimetypes from os.path import splitext from urllib.parse import urlparse @@ -78,6 +79,10 @@ def _get_file_extension_from_content_type(content_type: str) -> str | None: Return the image extension if present in the Response's content type header. """ - if content_type and "/" in content_type: - return content_type.split("/")[1] + if ( + content_type + and "/" in content_type + and (ext := mimetypes.guess_extension(content_type.split(";")[0], strict=False)) + ): + return ext.strip(".") return None diff --git a/api/test/unit/utils/test_image_proxy.py b/api/test/unit/utils/test_image_proxy.py index 8b5401e5371..5dce10b014d 100644 --- a/api/test/unit/utils/test_image_proxy.py +++ b/api/test/unit/utils/test_image_proxy.py @@ -512,6 +512,28 @@ def test__get_extension_from_url(image_url, expected_ext): assert extension._get_file_extension_from_url(image_url) == expected_ext +@pytest.mark.parametrize( + "content_type, expected_ext", + [ + ("image/png;charset=UTF-8", "png"), + ("image/jpeg", "jpg"), + ("image/png", "png"), + ("image/gif", "gif"), + ("image/svg+xml", "svg"), + ("audio/midi", "midi"), + ("audio/mpeg", "mp3"), + ("audio/ogg", None), + ("audio/opus", "opus"), + ("audio/wav", None), + ("video/webm", "webm"), + (None, None), + ("foobar", None), + ], +) +def test_get_extension_from_content_type(content_type, expected_ext): + assert extension._get_file_extension_from_content_type(content_type) == expected_ext + + @pytest.mark.django_db @pytest.mark.parametrize("image_type", ["apng", "tiff", "bmp"]) def test_photon_get_raises_by_not_allowed_types(photon_get, image_type):