From f8868606baf36fe10361a88e823d7a581814b350 Mon Sep 17 00:00:00 2001 From: Marko Kohtala Date: Sat, 31 Aug 2019 23:01:48 +0300 Subject: [PATCH] Fix #4012 encoding of content-disposition parameters Posting form data with field names containing "[]" were not recognized by other multipart/form-data parsers. The percent-encoding is only to be used on the filename parameter that follows how of a "file:" URI may be encoded according to RFC7578. --- CHANGES/4012.bugfix | 3 ++ CONTRIBUTORS.txt | 1 + aiohttp/helpers.py | 50 +++++++++++++++++++++++++++++---- aiohttp/payload.py | 3 +- tests/test_client_functional.py | 2 +- tests/test_formdata.py | 8 +++--- tests/test_helpers.py | 21 ++++++++++++++ tests/test_multipart.py | 4 +-- tests/test_web_functional.py | 10 +++---- 9 files changed, 84 insertions(+), 18 deletions(-) create mode 100644 CHANGES/4012.bugfix diff --git a/CHANGES/4012.bugfix b/CHANGES/4012.bugfix new file mode 100644 index 00000000000..279de94d821 --- /dev/null +++ b/CHANGES/4012.bugfix @@ -0,0 +1,3 @@ +Only encode content-disposition filename parameter using percent-encoding. +Other parameters are encoded to quoted-string or RFC2231 extended parameter +value. diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 437676f4c1a..f54dbe43e19 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -183,6 +183,7 @@ Marat Sharafutdinov Marco Paolini Mariano Anaya Mariusz Masztalerczuk +Marko Kohtala Martijn Pieters Martin Melka Martin Richard diff --git a/aiohttp/helpers.py b/aiohttp/helpers.py index b0829b6f59c..c8a5c655f75 100644 --- a/aiohttp/helpers.py +++ b/aiohttp/helpers.py @@ -352,14 +352,42 @@ def guess_filename(obj: Any, default: Optional[str]=None) -> Optional[str]: return default +not_qtext_re = re.compile(r'[^\041\043-\133\135-\176]') +QCONTENT = set(chr(i) for i in range(0x20, 0x7f)) | {'\t'} + + +def quoted_string(content: str) -> str: + """Return 7-bit content as quoted-string. + + Format content into a quoted-string as defined in RFC5322 for + Internet Message Format. Notice that this is not the 8-bit HTTP + format, but the 7-bit email format. Content must be in usascii or + a ValueError is raised. + """ + if not (QCONTENT > set(content)): + raise ValueError('bad content for quoted-string {!r}'.format(content)) + return not_qtext_re.sub(lambda x: '\\'+x.group(0), content) + + def content_disposition_header(disptype: str, quote_fields: bool=True, + _charset: str='utf-8', **params: str) -> str: - """Sets ``Content-Disposition`` header. + """Sets ``Content-Disposition`` header for MIME. + + This is the MIME payload Content-Disposition header from RFC 2183 + and RFC 7579 section 4.2, not the HTTP Content-Disposition from + RFC 6266. disptype is a disposition type: inline, attachment, form-data. Should be valid extension token (see RFC 2183) + quote_fields performs value quoting to 7-bit MIME headers + according to RFC 7578. Set to quote_fields to False if recipient + can take 8-bit file names and field values. + + _charset specifies the charset to use when quote_fields is True. + params is a dict with disposition params. """ if not disptype or not (TOKEN > set(disptype)): @@ -373,10 +401,22 @@ def content_disposition_header(disptype: str, if not key or not (TOKEN > set(key)): raise ValueError('bad content disposition parameter' ' {!r}={!r}'.format(key, val)) - qval = quote(val, '') if quote_fields else val - lparams.append((key, '"%s"' % qval)) - if key == 'filename': - lparams.append(('filename*', "utf-8''" + qval)) + if quote_fields: + if key.lower() == 'filename': + qval = quote(val, '', encoding=_charset) + lparams.append((key, '"%s"' % qval)) + else: + try: + qval = quoted_string(val) + except ValueError: + qval = ''.join((_charset, "''", + quote(val, '', encoding=_charset))) + lparams.append((key + '*', qval)) + else: + lparams.append((key, '"%s"' % qval)) + else: + qval = val.replace('\\', '\\\\').replace('"', '\\"') + lparams.append((key, '"%s"' % qval)) sparams = '; '.join('='.join(pair) for pair in lparams) value = '; '.join((value, sparams)) return value diff --git a/aiohttp/payload.py b/aiohttp/payload.py index ccc64f38526..5db860ab22f 100644 --- a/aiohttp/payload.py +++ b/aiohttp/payload.py @@ -187,10 +187,11 @@ def content_type(self) -> str: def set_content_disposition(self, disptype: str, quote_fields: bool=True, + _charset: str='utf-8', **params: Any) -> None: """Sets ``Content-Disposition`` header.""" self._headers[hdrs.CONTENT_DISPOSITION] = content_disposition_header( - disptype, quote_fields=quote_fields, **params) + disptype, quote_fields=quote_fields, _charset=_charset, **params) @abstractmethod async def write(self, writer: AbstractStreamWriter) -> None: diff --git a/tests/test_client_functional.py b/tests/test_client_functional.py index ecebea642d2..6f21e0b4495 100644 --- a/tests/test_client_functional.py +++ b/tests/test_client_functional.py @@ -1439,7 +1439,7 @@ async def handler(request): 'application/octet-stream', 'text/x-python'] assert request.headers['content-disposition'] == ( - "inline; filename=\"conftest.py\"; filename*=utf-8''conftest.py") + "inline; filename=\"conftest.py\"") return web.Response() diff --git a/tests/test_formdata.py b/tests/test_formdata.py index 88cfc0456be..af191c7b986 100644 --- a/tests/test_formdata.py +++ b/tests/test_formdata.py @@ -74,18 +74,18 @@ def test_invalid_formdata_content_transfer_encoding() -> None: async def test_formdata_field_name_is_quoted(buf, writer) -> None: form = FormData(charset="ascii") - form.add_field("emails[]", "xxx@x.co", content_type="multipart/form-data") + form.add_field("email 1", "xxx@x.co", content_type="multipart/form-data") payload = form() await payload.write(writer) - assert b'name="emails%5B%5D"' in buf + assert b'name="email\\ 1"' in buf async def test_formdata_field_name_is_not_quoted(buf, writer) -> None: form = FormData(quote_fields=False, charset="ascii") - form.add_field("emails[]", "xxx@x.co", content_type="multipart/form-data") + form.add_field("email 1", "xxx@x.co", content_type="multipart/form-data") payload = form() await payload.write(writer) - assert b'name="emails[]"' in buf + assert b'name="email 1"' in buf async def test_mark_formdata_as_processed() -> None: diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 19b51aed5d0..0531a276b97 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -387,6 +387,27 @@ async def test_ceil_timeout_round(loop) -> None: def test_content_disposition() -> None: assert (helpers.content_disposition_header('attachment', foo='bar') == 'attachment; foo="bar"') + assert (helpers.content_disposition_header('attachment', foo='bar[]') == + 'attachment; foo="bar[]"') + assert (helpers.content_disposition_header('attachment', foo=' a""b\\') == + 'attachment; foo="\\ a\\"\\"b\\\\"') + assert (helpers.content_disposition_header('attachment', foo='bär') == + "attachment; foo*=utf-8''b%C3%A4r") + assert (helpers.content_disposition_header('attachment', foo='bär "\\', + quote_fields=False) == + 'attachment; foo="bär \\"\\\\"') + assert (helpers.content_disposition_header('attachment', foo='bär', + _charset='latin-1') == + "attachment; foo*=latin-1''b%E4r") + assert (helpers.content_disposition_header('attachment', filename='bär') == + 'attachment; filename="b%C3%A4r"') + assert (helpers.content_disposition_header('attachment', filename='bär', + _charset='latin-1') == + 'attachment; filename="b%E4r"') + assert (helpers.content_disposition_header('attachment', + filename='bär "\\', + quote_fields=False) == + 'attachment; filename="bär \\"\\\\"') def test_content_disposition_bad_type() -> None: diff --git a/tests/test_multipart.py b/tests/test_multipart.py index 8f828b2338c..67d3483cbab 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -1264,7 +1264,7 @@ async def test_write_preserves_content_disposition( b'Content-Type: test/passed\r\n' b'Content-Length: 3\r\n' b'Content-Disposition:' - b' form-data; filename="bug"; filename*=utf-8\'\'bug' + b' form-data; filename="bug"' ) assert message == b'foo\r\n--:--\r\n' @@ -1349,7 +1349,7 @@ async def test_reset_content_disposition_header(self, buf, stream): b'--:\r\n' b'Content-Type: text/plain\r\n' b'Content-Disposition:' - b' attachments; filename="bug.py"; filename*=utf-8\'\'bug.py\r\n' + b' attachments; filename="bug.py"\r\n' b'Content-Length: %s' b'' % (str(content_length).encode(),) ) diff --git a/tests/test_web_functional.py b/tests/test_web_functional.py index 5c011c2a006..322e20f09c7 100644 --- a/tests/test_web_functional.py +++ b/tests/test_web_functional.py @@ -854,7 +854,7 @@ async def handler(request): assert 200 == resp.status resp_data = await resp.read() expected_content_disposition = ( - 'attachment; filename="conftest.py"; filename*=utf-8\'\'conftest.py' + 'attachment; filename="conftest.py"' ) assert resp_data == data assert resp.headers.get('Content-Type') in ( @@ -883,7 +883,7 @@ async def handler(request): assert 200 == resp.status resp_data = await resp.read() expected_content_disposition = ( - 'attachment; filename="conftest.py"; filename*=utf-8\'\'conftest.py' + 'attachment; filename="conftest.py"' ) assert resp_data == data assert resp.headers.get('Content-Type') == 'text/binary' @@ -915,7 +915,7 @@ async def handler(request): assert resp.headers.get('Content-Type') == 'text/binary' assert resp.headers.get('Content-Length') == str(len(resp_data)) assert (resp.headers.get('Content-Disposition') == - 'inline; filename="test.txt"; filename*=utf-8\'\'test.txt') + 'inline; filename="test.txt"') async def test_response_with_payload_stringio(aiohttp_client, fname) -> None: @@ -1585,7 +1585,7 @@ async def handler(request): disp = multipart.parse_content_disposition( resp.headers['content-disposition']) assert disp == ('attachment', - {'name': 'file', 'filename': 'file', 'filename*': 'file'}) + {'name': 'file', 'filename': 'file'}) async def test_response_with_bodypart_named(aiohttp_client, tmp_path) -> None: @@ -1612,7 +1612,7 @@ async def handler(request): resp.headers['content-disposition']) assert disp == ( 'attachment', - {'name': 'file', 'filename': 'foobar.txt', 'filename*': 'foobar.txt'} + {'name': 'file', 'filename': 'foobar.txt'} )