From 56c7998da7d312430d69bdffe629acc64910661a Mon Sep 17 00:00:00 2001 From: Josh Qou Date: Mon, 29 May 2023 04:19:37 +0100 Subject: [PATCH 1/9] Fix unsafe hotserving behaviour for non-multimedia uploads. --- changelog.d/15680.bugfix | 1 + synapse/media/_base.py | 23 ++++++++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) create mode 100644 changelog.d/15680.bugfix diff --git a/changelog.d/15680.bugfix b/changelog.d/15680.bugfix new file mode 100644 index 000000000000..5eeb159ab6ad --- /dev/null +++ b/changelog.d/15680.bugfix @@ -0,0 +1 @@ +Fix unsafe hotserving behaviour for non-multimedia uploads. diff --git a/synapse/media/_base.py b/synapse/media/_base.py index ef8334ae2586..03f1a7c3a69f 100644 --- a/synapse/media/_base.py +++ b/synapse/media/_base.py @@ -50,6 +50,8 @@ "text/xml", ] +HOTSERVE_CONTENT_TYPES = ["audio/", "video/", "image/"] + def parse_media_id(request: Request) -> Tuple[str, str, Optional[str]]: """Parses the server name, media ID and optional file name from the request URI @@ -151,7 +153,16 @@ def _quote(x: str) -> str: else: content_type = media_type + # Only hotserve "safe" mimetypes, force download everything else + disposition_type = "attachment" + for mime in HOTSERVE_CONTENT_TYPES: + if media_type.lower().startswith(mime): + disposition_type = "inline" + break + + disposition = disposition_type request.setHeader(b"Content-Type", content_type.encode("UTF-8")) + if upload_name: # RFC6266 section 4.1 [1] defines both `filename` and `filename*`. # @@ -173,11 +184,17 @@ def _quote(x: str) -> str: # correctly interpret those as of 0.99.2 and (b) they are a bit of a pain and we # may as well just do the filename* version. if _can_encode_filename_as_token(upload_name): - disposition = "inline; filename=%s" % (upload_name,) + disposition = "%s; filename=%s" % ( + disposition_type, + upload_name, + ) else: - disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),) + disposition = "%s; filename*=utf-8''%s" % ( + disposition_type, + _quote(upload_name), + ) - request.setHeader(b"Content-Disposition", disposition.encode("ascii")) + request.setHeader(b"Content-Disposition", disposition.encode("ascii")) # cache for at least a day. # XXX: we might want to turn this off for data we don't want to From df96318e50d6f89601a6551170a916fd0d1bd966 Mon Sep 17 00:00:00 2001 From: Josh Qou Date: Wed, 31 May 2023 00:40:35 +0100 Subject: [PATCH 2/9] invert disposition assert --- tests/media/test_media_storage.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/media/test_media_storage.py b/tests/media/test_media_storage.py index f0f2da65db0b..51e7ac2e8f9f 100644 --- a/tests/media/test_media_storage.py +++ b/tests/media/test_media_storage.py @@ -364,8 +364,8 @@ def test_disposition_filenamestar_utf8escaped(self) -> None: def test_disposition_none(self) -> None: """ - If there is no filename, one isn't passed on in the Content-Disposition - of the request. + If there is no filename, Content-Disposition should only + be a disposition type. """ channel = self._req(None) @@ -373,7 +373,9 @@ def test_disposition_none(self) -> None: self.assertEqual( headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type] ) - self.assertEqual(headers.getRawHeaders(b"Content-Disposition"), None) + self.assertEqual( + headers.getRawHeaders(b"Content-Disposition"), [b"inline"] + ) def test_thumbnail_crop(self) -> None: """Test that a cropped remote thumbnail is available.""" From def5f2a12d3c8cf9998276ae1263e5d10449ccb0 Mon Sep 17 00:00:00 2001 From: Josh Qou Date: Wed, 31 May 2023 13:04:22 +0100 Subject: [PATCH 3/9] test_media_storage.py: run lint --- tests/media/test_media_storage.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/media/test_media_storage.py b/tests/media/test_media_storage.py index 51e7ac2e8f9f..b3198a0596dc 100644 --- a/tests/media/test_media_storage.py +++ b/tests/media/test_media_storage.py @@ -373,9 +373,7 @@ def test_disposition_none(self) -> None: self.assertEqual( headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type] ) - self.assertEqual( - headers.getRawHeaders(b"Content-Disposition"), [b"inline"] - ) + self.assertEqual(headers.getRawHeaders(b"Content-Disposition"), [b"inline"]) def test_thumbnail_crop(self) -> None: """Test that a cropped remote thumbnail is available.""" From 5ce2a121c2971f95ed911247db45205292e5ef4f Mon Sep 17 00:00:00 2001 From: Josh Qou Date: Wed, 31 May 2023 13:51:26 +0100 Subject: [PATCH 4/9] test_base.py: /inline/attachment/s --- tests/media/test_base.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/media/test_base.py b/tests/media/test_base.py index 66498c744d6d..4728c80969a8 100644 --- a/tests/media/test_base.py +++ b/tests/media/test_base.py @@ -20,12 +20,12 @@ class GetFileNameFromHeadersTests(unittest.TestCase): # input -> expected result TEST_CASES = { - b"inline; filename=abc.txt": "abc.txt", - b'inline; filename="azerty"': "azerty", - b'inline; filename="aze%20rty"': "aze%20rty", - b'inline; filename="aze"rty"': 'aze"rty', - b'inline; filename="azer;ty"': "azer;ty", - b"inline; filename*=utf-8''foo%C2%A3bar": "foo£bar", + b"attachment; filename=abc.txt": "abc.txt", + b'attachment; filename="azerty"': "azerty", + b'attachment; filename="aze%20rty"': "aze%20rty", + b'attachment; filename="aze"rty"': 'aze"rty', + b'attachment; filename="azer;ty"': "azer;ty", + b"attachment; filename*=utf-8''foo%C2%A3bar": "foo£bar", } def tests(self) -> None: From cf3cbc8950ffdca5f1b5297107b6fe8d0d60b784 Mon Sep 17 00:00:00 2001 From: Josh Qou Date: Thu, 1 Jun 2023 15:16:54 +0100 Subject: [PATCH 5/9] Only return attachment for disposition type, update tests --- synapse/media/_base.py | 15 +++------------ tests/media/test_media_storage.py | 16 ++++++++-------- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/synapse/media/_base.py b/synapse/media/_base.py index 03f1a7c3a69f..bb2a9c5ad316 100644 --- a/synapse/media/_base.py +++ b/synapse/media/_base.py @@ -50,8 +50,6 @@ "text/xml", ] -HOTSERVE_CONTENT_TYPES = ["audio/", "video/", "image/"] - def parse_media_id(request: Request) -> Tuple[str, str, Optional[str]]: """Parses the server name, media ID and optional file name from the request URI @@ -153,14 +151,7 @@ def _quote(x: str) -> str: else: content_type = media_type - # Only hotserve "safe" mimetypes, force download everything else - disposition_type = "attachment" - for mime in HOTSERVE_CONTENT_TYPES: - if media_type.lower().startswith(mime): - disposition_type = "inline" - break - - disposition = disposition_type + disposition = "attachment" request.setHeader(b"Content-Type", content_type.encode("UTF-8")) if upload_name: @@ -185,12 +176,12 @@ def _quote(x: str) -> str: # may as well just do the filename* version. if _can_encode_filename_as_token(upload_name): disposition = "%s; filename=%s" % ( - disposition_type, + disposition, upload_name, ) else: disposition = "%s; filename*=utf-8''%s" % ( - disposition_type, + disposition, _quote(upload_name), ) diff --git a/tests/media/test_media_storage.py b/tests/media/test_media_storage.py index b3198a0596dc..ea0051dde42f 100644 --- a/tests/media/test_media_storage.py +++ b/tests/media/test_media_storage.py @@ -317,7 +317,7 @@ def _req( def test_handle_missing_content_type(self) -> None: channel = self._req( - b"inline; filename=out" + self.test_image.extension, + b"attachment; filename=out" + self.test_image.extension, include_content_type=False, ) headers = channel.headers @@ -331,7 +331,7 @@ def test_disposition_filename_ascii(self) -> None: If the filename is filename= then Synapse will decode it as an ASCII string, and use filename= in the response. """ - channel = self._req(b"inline; filename=out" + self.test_image.extension) + channel = self._req(b"attachment; filename=out" + self.test_image.extension) headers = channel.headers self.assertEqual( @@ -339,7 +339,7 @@ def test_disposition_filename_ascii(self) -> None: ) self.assertEqual( headers.getRawHeaders(b"Content-Disposition"), - [b"inline; filename=out" + self.test_image.extension], + [b"attachment; filename=out" + self.test_image.extension], ) def test_disposition_filenamestar_utf8escaped(self) -> None: @@ -350,7 +350,7 @@ def test_disposition_filenamestar_utf8escaped(self) -> None: """ filename = parse.quote("\u2603".encode()).encode("ascii") channel = self._req( - b"inline; filename*=utf-8''" + filename + self.test_image.extension + b"attachment; filename*=utf-8''" + filename + self.test_image.extension ) headers = channel.headers @@ -359,7 +359,7 @@ def test_disposition_filenamestar_utf8escaped(self) -> None: ) self.assertEqual( headers.getRawHeaders(b"Content-Disposition"), - [b"inline; filename*=utf-8''" + filename + self.test_image.extension], + [b"attachment; filename*=utf-8''" + filename + self.test_image.extension], ) def test_disposition_none(self) -> None: @@ -373,7 +373,7 @@ def test_disposition_none(self) -> None: self.assertEqual( headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type] ) - self.assertEqual(headers.getRawHeaders(b"Content-Disposition"), [b"inline"]) + self.assertEqual(headers.getRawHeaders(b"Content-Disposition"), [b"attachment"]) def test_thumbnail_crop(self) -> None: """Test that a cropped remote thumbnail is available.""" @@ -612,7 +612,7 @@ def test_x_robots_tag_header(self) -> None: Tests that the `X-Robots-Tag` header is present, which informs web crawlers to not index, archive, or follow links in media. """ - channel = self._req(b"inline; filename=out" + self.test_image.extension) + channel = self._req(b"attachment; filename=out" + self.test_image.extension) headers = channel.headers self.assertEqual( @@ -625,7 +625,7 @@ def test_cross_origin_resource_policy_header(self) -> None: Test that the Cross-Origin-Resource-Policy header is set to "cross-origin" allowing web clients to embed media from the downloads API. """ - channel = self._req(b"inline; filename=out" + self.test_image.extension) + channel = self._req(b"attachment; filename=out" + self.test_image.extension) headers = channel.headers From af6e331e9d1903e5eda9602f1ff408263f68dc7d Mon Sep 17 00:00:00 2001 From: Josh Qou <97894002+joshqou@users.noreply.github.com> Date: Thu, 1 Jun 2023 15:20:48 +0100 Subject: [PATCH 6/9] Update synapse/media/_base.py Co-authored-by: Patrick Cloke --- synapse/media/_base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/media/_base.py b/synapse/media/_base.py index bb2a9c5ad316..20cb8b9010bb 100644 --- a/synapse/media/_base.py +++ b/synapse/media/_base.py @@ -151,9 +151,10 @@ def _quote(x: str) -> str: else: content_type = media_type - disposition = "attachment" request.setHeader(b"Content-Type", content_type.encode("UTF-8")) + # Use a Content-Disposition of attachment to force download of media. + disposition = "attachment" if upload_name: # RFC6266 section 4.1 [1] defines both `filename` and `filename*`. # From 68b1321a11b4c912c138994493d8bf79b8fc555b Mon Sep 17 00:00:00 2001 From: Josh Qou <97894002+joshqou@users.noreply.github.com> Date: Thu, 1 Jun 2023 15:22:13 +0100 Subject: [PATCH 7/9] Update changelog.d/15680.bugfix Co-authored-by: Patrick Cloke --- changelog.d/15680.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/15680.bugfix b/changelog.d/15680.bugfix index 5eeb159ab6ad..aac9fd164a6e 100644 --- a/changelog.d/15680.bugfix +++ b/changelog.d/15680.bugfix @@ -1 +1 @@ -Fix unsafe hotserving behaviour for non-multimedia uploads. +Fix a long-standing bug where downloading media files was served in an unsafe manner. From c9d7e5453cac42c73a861f8e138e36b88edb050b Mon Sep 17 00:00:00 2001 From: Josh Qou Date: Thu, 1 Jun 2023 15:24:29 +0100 Subject: [PATCH 8/9] add attribution --- changelog.d/15680.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/15680.bugfix b/changelog.d/15680.bugfix index aac9fd164a6e..816db6806726 100644 --- a/changelog.d/15680.bugfix +++ b/changelog.d/15680.bugfix @@ -1 +1 @@ -Fix a long-standing bug where downloading media files was served in an unsafe manner. +Fix a long-standing bug where downloading media files was served in an unsafe manner. Contributed by @joshqou From 9e741955f3c4f40725d6c3fd915015f0d9e32e6d Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 1 Jun 2023 10:30:27 -0400 Subject: [PATCH 9/9] Update changelog. --- changelog.d/15680.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/15680.bugfix b/changelog.d/15680.bugfix index 816db6806726..04ac19b4ec3a 100644 --- a/changelog.d/15680.bugfix +++ b/changelog.d/15680.bugfix @@ -1 +1 @@ -Fix a long-standing bug where downloading media files was served in an unsafe manner. Contributed by @joshqou +Fix a long-standing bug where media files were served in an unsafe manner. Contributed by @joshqou.