From 2fb3daaf5fc5c3d898f2c21d3e58c44fbb16a352 Mon Sep 17 00:00:00 2001 From: Marko Kohtala Date: Thu, 26 Nov 2020 16:52:00 +0200 Subject: [PATCH] Fix #4012 encoding of content-disposition parameters (#4031) Posting form data with field names containing "[]" were not recognized by other multipart/form-data parsers. The percent-encoding is only to be used on the filename parameter that follows how of a "file:" URI may be encoded according to RFC7578. --- CHANGES/4012.bugfix | 3 ++ CONTRIBUTORS.txt | 2 ++ aiohttp/helpers.py | 52 +++++++++++++++++++++++++++++---- aiohttp/payload.py | 8 +++-- tests/test_client_functional.py | 2 +- tests/test_formdata.py | 8 ++--- tests/test_helpers.py | 24 +++++++++++---- tests/test_multipart.py | 4 +-- tests/test_web_functional.py | 23 ++++----------- 9 files changed, 88 insertions(+), 38 deletions(-) create mode 100644 CHANGES/4012.bugfix diff --git a/CHANGES/4012.bugfix b/CHANGES/4012.bugfix new file mode 100644 index 00000000000..279de94d821 --- /dev/null +++ b/CHANGES/4012.bugfix @@ -0,0 +1,3 @@ +Only encode content-disposition filename parameter using percent-encoding. +Other parameters are encoded to quoted-string or RFC2231 extended parameter +value. diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index bef1c39b6be..2627868cedc 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -197,6 +197,8 @@ Manuel Miranda Marat Sharafutdinov Marco Paolini Mariano Anaya +Mariusz Masztalerczuk +Marko Kohtala Martijn Pieters Martin Melka Martin Richard diff --git a/aiohttp/helpers.py b/aiohttp/helpers.py index f977fef01de..ac50c4481e3 100644 --- a/aiohttp/helpers.py +++ b/aiohttp/helpers.py @@ -376,14 +376,41 @@ def guess_filename(obj: Any, default: Optional[str] = None) -> Optional[str]: return default +not_qtext_re = re.compile(r"[^\041\043-\133\135-\176]") +QCONTENT = {chr(i) for i in range(0x20, 0x7F)} | {"\t"} + + +def quoted_string(content: str) -> str: + """Return 7-bit content as quoted-string. + + Format content into a quoted-string as defined in RFC5322 for + Internet Message Format. Notice that this is not the 8-bit HTTP + format, but the 7-bit email format. Content must be in usascii or + a ValueError is raised. + """ + if not (QCONTENT > set(content)): + raise ValueError(f"bad content for quoted-string {content!r}") + return not_qtext_re.sub(lambda x: "\\" + x.group(0), content) + + def content_disposition_header( - disptype: str, quote_fields: bool = True, **params: str + disptype: str, quote_fields: bool = True, _charset: str = "utf-8", **params: str ) -> str: - """Sets ``Content-Disposition`` header. + """Sets ``Content-Disposition`` header for MIME. + + This is the MIME payload Content-Disposition header from RFC 2183 + and RFC 7579 section 4.2, not the HTTP Content-Disposition from + RFC 6266. disptype is a disposition type: inline, attachment, form-data. Should be valid extension token (see RFC 2183) + quote_fields performs value quoting to 7-bit MIME headers + according to RFC 7578. Set to quote_fields to False if recipient + can take 8-bit file names and field values. + + _charset specifies the charset to use when quote_fields is True. + params is a dict with disposition params. """ if not disptype or not (TOKEN > set(disptype)): @@ -397,10 +424,23 @@ def content_disposition_header( raise ValueError( "bad content disposition parameter" " {!r}={!r}".format(key, val) ) - qval = quote(val, "") if quote_fields else val - lparams.append((key, '"%s"' % qval)) - if key == "filename": - lparams.append(("filename*", "utf-8''" + qval)) + if quote_fields: + if key.lower() == "filename": + qval = quote(val, "", encoding=_charset) + lparams.append((key, '"%s"' % qval)) + else: + try: + qval = quoted_string(val) + except ValueError: + qval = "".join( + (_charset, "''", quote(val, "", encoding=_charset)) + ) + lparams.append((key + "*", qval)) + else: + lparams.append((key, '"%s"' % qval)) + else: + qval = val.replace("\\", "\\\\").replace('"', '\\"') + lparams.append((key, '"%s"' % qval)) sparams = "; ".join("=".join(pair) for pair in lparams) value = "; ".join((value, sparams)) return value diff --git a/aiohttp/payload.py b/aiohttp/payload.py index 097bf5da1c3..ac5d6bece39 100644 --- a/aiohttp/payload.py +++ b/aiohttp/payload.py @@ -197,11 +197,15 @@ def content_type(self) -> str: return self._headers[hdrs.CONTENT_TYPE] def set_content_disposition( - self, disptype: str, quote_fields: bool = True, **params: Any + self, + disptype: str, + quote_fields: bool = True, + _charset: str = "utf-8", + **params: Any, ) -> None: """Sets ``Content-Disposition`` header.""" self._headers[hdrs.CONTENT_DISPOSITION] = content_disposition_header( - disptype, quote_fields=quote_fields, **params + disptype, quote_fields=quote_fields, _charset=_charset, **params ) @abstractmethod diff --git a/tests/test_client_functional.py b/tests/test_client_functional.py index c320382ebc5..3e5c259aca5 100644 --- a/tests/test_client_functional.py +++ b/tests/test_client_functional.py @@ -1427,7 +1427,7 @@ async def handler(request): "text/x-python", ] assert request.headers["content-disposition"] == ( - "inline; filename=\"conftest.py\"; filename*=utf-8''conftest.py" + 'inline; filename="conftest.py"' ) return web.Response() diff --git a/tests/test_formdata.py b/tests/test_formdata.py index a556058155f..4bb8aa07587 100644 --- a/tests/test_formdata.py +++ b/tests/test_formdata.py @@ -72,18 +72,18 @@ def test_invalid_formdata_content_transfer_encoding() -> None: async def test_formdata_field_name_is_quoted(buf, writer) -> None: form = FormData(charset="ascii") - form.add_field("emails[]", "xxx@x.co", content_type="multipart/form-data") + form.add_field("email 1", "xxx@x.co", content_type="multipart/form-data") payload = form() await payload.write(writer) - assert b'name="emails%5B%5D"' in buf + assert b'name="email\\ 1"' in buf async def test_formdata_field_name_is_not_quoted(buf, writer) -> None: form = FormData(quote_fields=False, charset="ascii") - form.add_field("emails[]", "xxx@x.co", content_type="multipart/form-data") + form.add_field("email 1", "xxx@x.co", content_type="multipart/form-data") payload = form() await payload.write(writer) - assert b'name="emails[]"' in buf + assert b'name="email 1"' in buf async def test_mark_formdata_as_processed(aiohttp_client) -> None: diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 5dd8e769651..582d03166b6 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -414,11 +414,25 @@ async def test_ceil_timeout_small() -> None: # -------------------------------- ContentDisposition ------------------- -def test_content_disposition() -> None: - assert ( - helpers.content_disposition_header("attachment", foo="bar") - == 'attachment; foo="bar"' - ) +@pytest.mark.parametrize( + "kwargs, result", + [ + (dict(foo="bar"), 'attachment; foo="bar"'), + (dict(foo="bar[]"), 'attachment; foo="bar[]"'), + (dict(foo=' a""b\\'), 'attachment; foo="\\ a\\"\\"b\\\\"'), + (dict(foo="bär"), "attachment; foo*=utf-8''b%C3%A4r"), + (dict(foo='bär "\\', quote_fields=False), 'attachment; foo="bär \\"\\\\"'), + (dict(foo="bär", _charset="latin-1"), "attachment; foo*=latin-1''b%E4r"), + (dict(filename="bär"), 'attachment; filename="b%C3%A4r"'), + (dict(filename="bär", _charset="latin-1"), 'attachment; filename="b%E4r"'), + ( + dict(filename='bär "\\', quote_fields=False), + 'attachment; filename="bär \\"\\\\"', + ), + ], +) +def test_content_disposition(kwargs, result) -> None: + assert helpers.content_disposition_header("attachment", **kwargs) == result def test_content_disposition_bad_type() -> None: diff --git a/tests/test_multipart.py b/tests/test_multipart.py index 6c3f1214d9e..54dbb7ea827 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -1105,7 +1105,7 @@ async def test_write_preserves_content_disposition(self, buf, stream) -> None: b"Content-Type: test/passed\r\n" b"Content-Length: 3\r\n" b"Content-Disposition:" - b" form-data; filename=\"bug\"; filename*=utf-8''bug" + b' form-data; filename="bug"' ) assert message == b"foo\r\n--:--\r\n" @@ -1186,7 +1186,7 @@ async def test_reset_content_disposition_header(self, buf, stream): b"--:\r\n" b"Content-Type: text/plain\r\n" b"Content-Disposition:" - b" attachments; filename=\"bug.py\"; filename*=utf-8''bug.py\r\n" + b' attachments; filename="bug.py"\r\n' b"Content-Length: %s" b"" % (str(content_length).encode(),) ) diff --git a/tests/test_web_functional.py b/tests/test_web_functional.py index 88d0dfd1c7f..185287d64e1 100644 --- a/tests/test_web_functional.py +++ b/tests/test_web_functional.py @@ -894,9 +894,7 @@ async def handler(request): resp = await client.get("/") assert 200 == resp.status resp_data = await resp.read() - expected_content_disposition = ( - "attachment; filename=\"conftest.py\"; filename*=utf-8''conftest.py" - ) + expected_content_disposition = 'attachment; filename="conftest.py"' assert resp_data == data assert resp.headers.get("Content-Type") in ( "application/octet-stream", @@ -930,9 +928,7 @@ async def handler(request): resp = await client.get("/") assert 200 == resp.status resp_data = await resp.read() - expected_content_disposition = ( - "attachment; filename=\"conftest.py\"; filename*=utf-8''conftest.py" - ) + expected_content_disposition = 'attachment; filename="conftest.py"' assert resp_data == data assert resp.headers.get("Content-Type") == "text/binary" assert resp.headers.get("Content-Length") == str(len(resp_data)) @@ -964,10 +960,7 @@ async def handler(request): assert resp_data == data assert resp.headers.get("Content-Type") == "text/binary" assert resp.headers.get("Content-Length") == str(len(resp_data)) - assert ( - resp.headers.get("Content-Disposition") - == "inline; filename=\"test.txt\"; filename*=utf-8''test.txt" - ) + assert resp.headers.get("Content-Disposition") == 'inline; filename="test.txt"' outer_file_descriptor.close() @@ -1619,10 +1612,7 @@ async def handler(request): assert body == b"test" disp = multipart.parse_content_disposition(resp.headers["content-disposition"]) - assert disp == ( - "attachment", - {"name": "file", "filename": "file", "filename*": "file"}, - ) + assert disp == ("attachment", {"name": "file", "filename": "file"}) async def test_response_with_bodypart_named(aiohttp_client, tmpdir) -> None: @@ -1646,10 +1636,7 @@ async def handler(request): assert body == b"test" disp = multipart.parse_content_disposition(resp.headers["content-disposition"]) - assert disp == ( - "attachment", - {"name": "file", "filename": "foobar.txt", "filename*": "foobar.txt"}, - ) + assert disp == ("attachment", {"name": "file", "filename": "foobar.txt"}) async def test_response_with_bodypart_invalid_name(aiohttp_client) -> None: