diff --git a/CHANGES/4012.bugfix b/CHANGES/4012.bugfix new file mode 100644 index 00000000000..279de94d821 --- /dev/null +++ b/CHANGES/4012.bugfix @@ -0,0 +1,3 @@ +Only encode content-disposition filename parameter using percent-encoding. +Other parameters are encoded to quoted-string or RFC2231 extended parameter +value. diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 6ed8a281ece..a06caef9d6e 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -171,6 +171,7 @@ Manuel Miranda Marat Sharafutdinov Marco Paolini Mariano Anaya +Marko Kohtala Martijn Pieters Martin Melka Martin Richard diff --git a/aiohttp/helpers.py b/aiohttp/helpers.py index 00ec9a5015d..173d79088b8 100644 --- a/aiohttp/helpers.py +++ b/aiohttp/helpers.py @@ -342,14 +342,42 @@ def guess_filename(obj: Any, default: Optional[str]=None) -> Optional[str]: return default +not_qtext_re = re.compile(r'[^\041\043-\133\135-\176]') +QCONTENT = set(chr(i) for i in range(0x20, 0x7f)) | {'\t'} + + +def quoted_string(content: str) -> str: + """Return 7-bit content as quoted-string. + + Format content into a quoted-string as defined in RFC5322 for + Internet Message Format. Notice that this is not the 8-bit HTTP + format, but the 7-bit email format. Content must be in usascii or + a ValueError is raised. + """ + if not (QCONTENT > set(content)): + raise ValueError('bad content for quoted-string {!r}'.format(content)) + return not_qtext_re.sub(lambda x: '\\'+x.group(0), content) + + def content_disposition_header(disptype: str, quote_fields: bool=True, + _charset: str='utf-8', **params: str) -> str: - """Sets ``Content-Disposition`` header. + """Sets ``Content-Disposition`` header for MIME. + + This is the MIME payload Content-Disposition header from RFC 2183 + and RFC 7579 section 4.2, not the HTTP Content-Disposition from + RFC 6266. disptype is a disposition type: inline, attachment, form-data. Should be valid extension token (see RFC 2183) + quote_fields performs value quoting to 7-bit MIME headers + according to RFC 7578. Set to quote_fields to False if recipient + can take 8-bit file names and field values. + + _charset specifies the charset to use when quote_fields is True. + params is a dict with disposition params. """ if not disptype or not (TOKEN > set(disptype)): @@ -363,10 +391,22 @@ def content_disposition_header(disptype: str, if not key or not (TOKEN > set(key)): raise ValueError('bad content disposition parameter' ' {!r}={!r}'.format(key, val)) - qval = quote(val, '') if quote_fields else val - lparams.append((key, '"%s"' % qval)) - if key == 'filename': - lparams.append(('filename*', "utf-8''" + qval)) + if quote_fields: + if key.lower() == 'filename': + qval = quote(val, '', encoding=_charset) + lparams.append((key, '"%s"' % qval)) + else: + try: + qval = quoted_string(val) + except ValueError: + qval = ''.join((_charset, "''", + quote(val, '', encoding=_charset))) + lparams.append((key + '*', qval)) + else: + lparams.append((key, '"%s"' % qval)) + else: + qval = val.replace('\\', '\\\\').replace('"', '\\"') + lparams.append((key, '"%s"' % qval)) sparams = '; '.join('='.join(pair) for pair in lparams) value = '; '.join((value, sparams)) return value diff --git a/aiohttp/payload.py b/aiohttp/payload.py index 7e633028d42..2c0fb5aff9e 100644 --- a/aiohttp/payload.py +++ b/aiohttp/payload.py @@ -187,10 +187,11 @@ def content_type(self) -> str: def set_content_disposition(self, disptype: str, quote_fields: bool=True, + _charset: str='utf-8', **params: Any) -> None: """Sets ``Content-Disposition`` header.""" self._headers[hdrs.CONTENT_DISPOSITION] = content_disposition_header( - disptype, quote_fields=quote_fields, **params) + disptype, quote_fields=quote_fields, _charset=_charset, **params) @abstractmethod async def write(self, writer: AbstractStreamWriter) -> None: diff --git a/tests/test_client_functional.py b/tests/test_client_functional.py index 656a14e2b9a..ce12e8cda9b 100644 --- a/tests/test_client_functional.py +++ b/tests/test_client_functional.py @@ -1439,7 +1439,7 @@ async def handler(request): 'application/octet-stream', 'text/x-python'] assert request.headers['content-disposition'] == ( - "inline; filename=\"conftest.py\"; filename*=utf-8''conftest.py") + "inline; filename=\"conftest.py\"") return web.Response() diff --git a/tests/test_formdata.py b/tests/test_formdata.py index 88cfc0456be..af191c7b986 100644 --- a/tests/test_formdata.py +++ b/tests/test_formdata.py @@ -74,18 +74,18 @@ def test_invalid_formdata_content_transfer_encoding() -> None: async def test_formdata_field_name_is_quoted(buf, writer) -> None: form = FormData(charset="ascii") - form.add_field("emails[]", "xxx@x.co", content_type="multipart/form-data") + form.add_field("email 1", "xxx@x.co", content_type="multipart/form-data") payload = form() await payload.write(writer) - assert b'name="emails%5B%5D"' in buf + assert b'name="email\\ 1"' in buf async def test_formdata_field_name_is_not_quoted(buf, writer) -> None: form = FormData(quote_fields=False, charset="ascii") - form.add_field("emails[]", "xxx@x.co", content_type="multipart/form-data") + form.add_field("email 1", "xxx@x.co", content_type="multipart/form-data") payload = form() await payload.write(writer) - assert b'name="emails[]"' in buf + assert b'name="email 1"' in buf async def test_mark_formdata_as_processed() -> None: diff --git a/tests/test_helpers.py b/tests/test_helpers.py index ef084449956..6f6b58536ea 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -387,6 +387,27 @@ async def test_ceil_timeout_round(loop) -> None: def test_content_disposition() -> None: assert (helpers.content_disposition_header('attachment', foo='bar') == 'attachment; foo="bar"') + assert (helpers.content_disposition_header('attachment', foo='bar[]') == + 'attachment; foo="bar[]"') + assert (helpers.content_disposition_header('attachment', foo=' a""b\\') == + 'attachment; foo="\\ a\\"\\"b\\\\"') + assert (helpers.content_disposition_header('attachment', foo='bär') == + "attachment; foo*=utf-8''b%C3%A4r") + assert (helpers.content_disposition_header('attachment', foo='bär "\\', + quote_fields=False) == + 'attachment; foo="bär \\"\\\\"') + assert (helpers.content_disposition_header('attachment', foo='bär', + _charset='latin-1') == + "attachment; foo*=latin-1''b%E4r") + assert (helpers.content_disposition_header('attachment', filename='bär') == + 'attachment; filename="b%C3%A4r"') + assert (helpers.content_disposition_header('attachment', filename='bär', + _charset='latin-1') == + 'attachment; filename="b%E4r"') + assert (helpers.content_disposition_header('attachment', + filename='bär "\\', + quote_fields=False) == + 'attachment; filename="bär \\"\\\\"') def test_content_disposition_bad_type() -> None: diff --git a/tests/test_multipart.py b/tests/test_multipart.py index e491e82d16e..5e998dca42d 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -1259,7 +1259,7 @@ async def test_write_preserves_content_disposition( b'Content-Type: test/passed\r\n' b'Content-Length: 3\r\n' b'Content-Disposition:' - b' form-data; filename="bug"; filename*=utf-8\'\'bug' + b' form-data; filename="bug"' ) assert message == b'foo\r\n--:--\r\n' @@ -1344,7 +1344,7 @@ async def test_reset_content_disposition_header(self, buf, stream): b'--:\r\n' b'Content-Type: text/plain\r\n' b'Content-Disposition:' - b' attachments; filename="bug.py"; filename*=utf-8\'\'bug.py\r\n' + b' attachments; filename="bug.py"\r\n' b'Content-Length: %s' b'' % (str(content_length).encode(),) ) diff --git a/tests/test_web_functional.py b/tests/test_web_functional.py index 4be7a962303..ec670cc6ce9 100644 --- a/tests/test_web_functional.py +++ b/tests/test_web_functional.py @@ -850,7 +850,7 @@ async def handler(request): assert 200 == resp.status resp_data = await resp.read() expected_content_disposition = ( - 'attachment; filename="conftest.py"; filename*=utf-8\'\'conftest.py' + 'attachment; filename="conftest.py"' ) assert resp_data == data assert resp.headers.get('Content-Type') in ( @@ -879,7 +879,7 @@ async def handler(request): assert 200 == resp.status resp_data = await resp.read() expected_content_disposition = ( - 'attachment; filename="conftest.py"; filename*=utf-8\'\'conftest.py' + 'attachment; filename="conftest.py"' ) assert resp_data == data assert resp.headers.get('Content-Type') == 'text/binary' @@ -911,7 +911,7 @@ async def handler(request): assert resp.headers.get('Content-Type') == 'text/binary' assert resp.headers.get('Content-Length') == str(len(resp_data)) assert (resp.headers.get('Content-Disposition') == - 'inline; filename="test.txt"; filename*=utf-8\'\'test.txt') + 'inline; filename="test.txt"') async def test_response_with_payload_stringio(aiohttp_client, fname) -> None: @@ -1553,7 +1553,7 @@ async def handler(request): disp = multipart.parse_content_disposition( resp.headers['content-disposition']) assert disp == ('attachment', - {'name': 'file', 'filename': 'file', 'filename*': 'file'}) + {'name': 'file', 'filename': 'file'}) async def test_response_with_bodypart_named(aiohttp_client, tmp_path) -> None: @@ -1580,7 +1580,7 @@ async def handler(request): resp.headers['content-disposition']) assert disp == ( 'attachment', - {'name': 'file', 'filename': 'foobar.txt', 'filename*': 'foobar.txt'} + {'name': 'file', 'filename': 'foobar.txt'} )