diff --git a/CHANGES/4012.bugfix b/CHANGES/4012.bugfix new file mode 100644 index 00000000000..279de94d821 --- /dev/null +++ b/CHANGES/4012.bugfix @@ -0,0 +1,3 @@ +Only encode content-disposition filename parameter using percent-encoding. +Other parameters are encoded to quoted-string or RFC2231 extended parameter +value. diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 0f474de6b04..1883e6e46d0 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -164,6 +164,7 @@ Manuel Miranda Marat Sharafutdinov Marco Paolini Mariano Anaya +Marko Kohtala Martin Melka Martin Richard Mathias Fröjdman diff --git a/aiohttp/helpers.py b/aiohttp/helpers.py index b64b7fafb6a..427d7586342 100644 --- a/aiohttp/helpers.py +++ b/aiohttp/helpers.py @@ -339,14 +339,40 @@ def guess_filename(obj: Any, default: Optional[str]=None) -> Optional[str]: return default +not_qtext_re = re.compile(r'[^\041\043-\133\135-\176]') +QCONTENT = set(chr(i) for i in range(0x20, 0x7f)) | {'\t'} + + +def quoted_string(content: str) -> str: + """Return content as quoted-string. + + Format content into a quoted-string as defined in RFC5322 for + Internet Message Format. Notice that this is not the 8-bit HTTP + format, but the 7-bit email format. Content must be in usascii or + a ValueError is raised. + """ + if not (QCONTENT > set(content)): + raise ValueError('bad content for quoted-string {!r}'.format(content)) + return not_qtext_re.sub(lambda x: '\\'+x.group(0), content) + + def content_disposition_header(disptype: str, quote_fields: bool=True, + _charset: str='utf-8', **params: str) -> str: - """Sets ``Content-Disposition`` header. + """Sets ``Content-Disposition`` header for MIME. + + This is the MIME payload Content-Disposition header from RFC 2183 + and RFC 7579 section 4.2, not the HTTP Content-Disposition from + RFC 6266. disptype is a disposition type: inline, attachment, form-data. Should be valid extension token (see RFC 2183) + quote_fields performs value quoting according to RFC 7578. Set to + False if values are already quoted by caller. If the submission + form uses some other charset but utf-8, specify the _charset. + params is a dict with disposition params. """ if not disptype or not (TOKEN > set(disptype)): @@ -360,10 +386,20 @@ def content_disposition_header(disptype: str, if not key or not (TOKEN > set(key)): raise ValueError('bad content disposition parameter' ' {!r}={!r}'.format(key, val)) - qval = quote(val, '') if quote_fields else val - lparams.append((key, '"%s"' % qval)) - if key == 'filename': - lparams.append(('filename*', "utf-8''" + qval)) + if key.lower() == 'filename': + if quote_fields: + qval = quote(val, '', encoding=_charset) + else: + qval = val + lparams.append((key, '"%s"' % qval)) + else: + try: + qval = quoted_string(val) if quote_fields else val + except ValueError: + qval = _charset + "''" + quote(val, '', encoding=_charset) + lparams.append((key + '*', qval)) + else: + lparams.append((key, '"%s"' % qval)) sparams = '; '.join('='.join(pair) for pair in lparams) value = '; '.join((value, sparams)) return value diff --git a/aiohttp/payload.py b/aiohttp/payload.py index 5b110ba1dd1..eff2a99586d 100644 --- a/aiohttp/payload.py +++ b/aiohttp/payload.py @@ -187,10 +187,11 @@ def content_type(self) -> str: def set_content_disposition(self, disptype: str, quote_fields: bool=True, + _charset: str='utf-8', **params: Any) -> None: """Sets ``Content-Disposition`` header.""" self._headers[hdrs.CONTENT_DISPOSITION] = content_disposition_header( - disptype, quote_fields=quote_fields, **params) + disptype, quote_fields=quote_fields, _charset=_charset, **params) @abstractmethod async def write(self, writer: AbstractStreamWriter) -> None: diff --git a/tests/test_client_functional.py b/tests/test_client_functional.py index cd372132e09..017b8f03a4b 100644 --- a/tests/test_client_functional.py +++ b/tests/test_client_functional.py @@ -1389,7 +1389,7 @@ async def handler(request): 'text/plain', 'application/octet-stream'] assert request.headers['content-disposition'] == ( - "inline; filename=\"conftest.py\"; filename*=utf-8''conftest.py") + "inline; filename=\"conftest.py\"") return web.Response() diff --git a/tests/test_formdata.py b/tests/test_formdata.py index 55f8653d6d6..6b783082609 100644 --- a/tests/test_formdata.py +++ b/tests/test_formdata.py @@ -74,15 +74,15 @@ def test_invalid_formdata_content_transfer_encoding() -> None: async def test_formdata_field_name_is_quoted(buf, writer) -> None: form = FormData(charset="ascii") - form.add_field("emails[]", "xxx@x.co", content_type="multipart/form-data") + form.add_field("email 1", "xxx@x.co", content_type="multipart/form-data") payload = form() await payload.write(writer) - assert b'name="emails%5B%5D"' in buf + assert b'name="email\\ 1"' in buf async def test_formdata_field_name_is_not_quoted(buf, writer) -> None: form = FormData(quote_fields=False, charset="ascii") - form.add_field("emails[]", "xxx@x.co", content_type="multipart/form-data") + form.add_field("email 1", "xxx@x.co", content_type="multipart/form-data") payload = form() await payload.write(writer) - assert b'name="emails[]"' in buf + assert b'name="email 1"' in buf diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 864498ec3cd..f927b34ba3c 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -388,6 +388,26 @@ def test_ceil_timeout_no_task(loop) -> None: def test_content_disposition() -> None: assert (helpers.content_disposition_header('attachment', foo='bar') == 'attachment; foo="bar"') + assert (helpers.content_disposition_header('attachment', foo='bar[]') == + 'attachment; foo="bar[]"') + assert (helpers.content_disposition_header('attachment', foo=' a""b\\') == + 'attachment; foo="\\ a\\"\\"b\\\\"') + assert (helpers.content_disposition_header('attachment', foo='bär') == + "attachment; foo*=utf-8''b%C3%A4r") + assert (helpers.content_disposition_header('attachment', foo='bär', + quote_fields=False) == + 'attachment; foo="bär"') + assert (helpers.content_disposition_header('attachment', foo='bär', + _charset='latin-1') == + "attachment; foo*=latin-1''b%E4r") + assert (helpers.content_disposition_header('attachment', filename='bär') == + 'attachment; filename="b%C3%A4r"') + assert (helpers.content_disposition_header('attachment', filename='bär', + _charset='latin-1') == + 'attachment; filename="b%E4r"') + assert (helpers.content_disposition_header('attachment', filename='bär', + quote_fields=False) == + 'attachment; filename="bär"') def test_content_disposition_bad_type() -> None: diff --git a/tests/test_multipart.py b/tests/test_multipart.py index 753205972d4..6c7dbce13c0 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -1259,7 +1259,7 @@ async def test_write_preserves_content_disposition( b'Content-Type: test/passed\r\n' b'Content-Length: 3\r\n' b'Content-Disposition:' - b' form-data; filename="bug"; filename*=utf-8\'\'bug' + b' form-data; filename="bug"' ) assert message == b'foo\r\n--:--\r\n' @@ -1350,7 +1350,7 @@ async def test_reset_content_disposition_header(self, buf, stream): b'--:\r\n' b'Content-Type: text/plain\r\n' b'Content-Disposition:' - b' attachments; filename="bug.py"; filename*=utf-8\'\'bug.py\r\n' + b' attachments; filename="bug.py"\r\n' b'Content-Length: %s' b'' % (str(content_length).encode(),) ) diff --git a/tests/test_web_functional.py b/tests/test_web_functional.py index 81b60642ce0..280c8a53082 100644 --- a/tests/test_web_functional.py +++ b/tests/test_web_functional.py @@ -854,7 +854,7 @@ async def handler(request): assert 200 == resp.status resp_data = await resp.read() expected_content_disposition = ( - 'attachment; filename="conftest.py"; filename*=utf-8\'\'conftest.py' + 'attachment; filename="conftest.py"' ) assert resp_data == data assert resp.headers.get('Content-Type') in ( @@ -883,7 +883,7 @@ async def handler(request): assert 200 == resp.status resp_data = await resp.read() expected_content_disposition = ( - 'attachment; filename="conftest.py"; filename*=utf-8\'\'conftest.py' + 'attachment; filename="conftest.py"' ) assert resp_data == data assert resp.headers.get('Content-Type') == 'text/binary' @@ -915,7 +915,7 @@ async def handler(request): assert resp.headers.get('Content-Type') == 'text/binary' assert resp.headers.get('Content-Length') == str(len(resp_data)) assert (resp.headers.get('Content-Disposition') == - 'inline; filename="test.txt"; filename*=utf-8\'\'test.txt') + 'inline; filename="test.txt"') async def test_response_with_payload_stringio(aiohttp_client, fname) -> None: @@ -1557,7 +1557,7 @@ async def handler(request): disp = multipart.parse_content_disposition( resp.headers['content-disposition']) assert disp == ('attachment', - {'name': 'file', 'filename': 'file', 'filename*': 'file'}) + {'name': 'file', 'filename': 'file'}) async def test_response_with_bodypart_named(aiohttp_client, tmp_path) -> None: @@ -1584,7 +1584,7 @@ async def handler(request): resp.headers['content-disposition']) assert disp == ( 'attachment', - {'name': 'file', 'filename': 'foobar.txt', 'filename*': 'foobar.txt'} + {'name': 'file', 'filename': 'foobar.txt'} )