diff --git a/tornado/http1connection.py b/tornado/http1connection.py index 3bdffac115..1a23f5c7fd 100644 --- a/tornado/http1connection.py +++ b/tornado/http1connection.py @@ -391,14 +391,11 @@ def write_headers( self._request_start_line = start_line lines.append(utf8("%s %s HTTP/1.1" % (start_line[0], start_line[1]))) # Client requests with a non-empty body must have either a - # Content-Length or a Transfer-Encoding. + # Content-Length or a Transfer-Encoding. If Content-Length is not + # present we'll add our Transfer-Encoding below. self._chunking_output = ( start_line.method in ("POST", "PUT", "PATCH") and "Content-Length" not in headers - and ( - "Transfer-Encoding" not in headers - or headers["Transfer-Encoding"] == "chunked" - ) ) else: assert isinstance(start_line, httputil.ResponseStartLine) @@ -420,9 +417,6 @@ def write_headers( and (start_line.code < 100 or start_line.code >= 200) # No need to chunk the output if a Content-Length is specified. and "Content-Length" not in headers - # Applications are discouraged from touching Transfer-Encoding, - # but if they do, leave it alone. - and "Transfer-Encoding" not in headers ) # If connection to a 1.1 client will be closed, inform client if ( @@ -562,7 +556,7 @@ def _can_keep_alive( return connection_header != "close" elif ( "Content-Length" in headers - or headers.get("Transfer-Encoding", "").lower() == "chunked" + or is_transfer_encoding_chunked(headers) or getattr(start_line, "method", None) in ("HEAD", "GET") ): # start_line may be a request or response start line; only @@ -600,13 +594,6 @@ def _read_body( delegate: httputil.HTTPMessageDelegate, ) -> Optional[Awaitable[None]]: if "Content-Length" in headers: - if "Transfer-Encoding" in headers: - # Response cannot contain both Content-Length and - # Transfer-Encoding headers. - # http://tools.ietf.org/html/rfc7230#section-3.3.3 - raise httputil.HTTPInputError( - "Response with both Transfer-Encoding and Content-Length" - ) if "," in headers["Content-Length"]: # Proxies sometimes cause Content-Length headers to get # duplicated. If all the values are identical then we can @@ -633,20 +620,22 @@ def _read_body( else: content_length = None + is_chunked = is_transfer_encoding_chunked(headers) + if code == 204: # This response code is not allowed to have a non-empty body, # and has an implicit length of zero instead of read-until-close. # http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.3 - if "Transfer-Encoding" in headers or content_length not in (None, 0): + if is_chunked or content_length not in (None, 0): raise httputil.HTTPInputError( "Response with code %d should not have body" % code ) content_length = 0 + if is_chunked: + return self._read_chunked_body(delegate) if content_length is not None: return self._read_fixed_body(content_length, delegate) - if headers.get("Transfer-Encoding", "").lower() == "chunked": - return self._read_chunked_body(delegate) if self.is_client: return self._read_body_until_close(delegate) return None @@ -865,3 +854,33 @@ def parse_hex_int(s: str) -> int: if HEXDIGITS.fullmatch(s) is None: raise ValueError("not a hexadecimal integer: %r" % s) return int(s, 16) + + +def is_transfer_encoding_chunked(headers: httputil.HTTPHeaders) -> bool: + """Returns true if the headers specify Transfer-Encoding: chunked. + + Raise httputil.HTTPInputError if any other transfer encoding is used. + """ + # Note that transfer-encoding is an area in which postel's law can lead + # us astray. If a proxy and a backend server are liberal in what they accept, + # but accept slightly different things, this can lead to mismatched framing + # and request smuggling issues. Therefore we are as strict as possible here + # (even technically going beyond the requirements of the RFCs: a value of + # ",chunked" is legal but doesn't appear in practice for legitimate traffic) + if "Transfer-Encoding" not in headers: + return False + if "Content-Length" in headers: + # Message cannot contain both Content-Length and + # Transfer-Encoding headers. + # http://tools.ietf.org/html/rfc7230#section-3.3.3 + raise httputil.HTTPInputError( + "Message with both Transfer-Encoding and Content-Length" + ) + if headers["Transfer-Encoding"].lower() == "chunked": + return True + # We do not support any transfer-encodings other than chunked, and we do not + # expect to add any support because the concept of transfer-encoding has + # been removed in HTTP/2. + raise httputil.HTTPInputError( + "Unsupported Transfer-Encoding %s" % headers["Transfer-Encoding"] + ) diff --git a/tornado/httputil.py b/tornado/httputil.py index b21d8046c4..9ce992d82b 100644 --- a/tornado/httputil.py +++ b/tornado/httputil.py @@ -62,6 +62,9 @@ from asyncio import Future # noqa: F401 import unittest # noqa: F401 +# To be used with str.strip() and related methods. +HTTP_WHITESPACE = " \t" + @lru_cache(1000) def _normalize_header(name: str) -> str: @@ -171,7 +174,7 @@ def parse_line(self, line: str) -> None: # continuation of a multi-line header if self._last_key is None: raise HTTPInputError("first header line cannot start with whitespace") - new_part = " " + line.lstrip() + new_part = " " + line.lstrip(HTTP_WHITESPACE) self._as_list[self._last_key][-1] += new_part self._dict[self._last_key] += new_part else: @@ -179,7 +182,7 @@ def parse_line(self, line: str) -> None: name, value = line.split(":", 1) except ValueError: raise HTTPInputError("no colon in header line") - self.add(name, value.strip()) + self.add(name, value.strip(HTTP_WHITESPACE)) @classmethod def parse(cls, headers: str) -> "HTTPHeaders": diff --git a/tornado/test/httpserver_test.py b/tornado/test/httpserver_test.py index 1faf63fabf..0b29a39ccf 100644 --- a/tornado/test/httpserver_test.py +++ b/tornado/test/httpserver_test.py @@ -581,6 +581,76 @@ def test_chunked_request_body_invalid_size(self): ) self.assertEqual(400, start_line.code) + def test_chunked_request_body_duplicate_header(self): + # Repeated Transfer-Encoding headers should be an error (and not confuse + # the chunked-encoding detection to mess up framing). + self.stream.write( + b"""\ +POST /echo HTTP/1.1 +Transfer-Encoding: chunked +Transfer-encoding: chunked + +2 +ok +0 + +""" + ) + with ExpectLog( + gen_log, + ".*Unsupported Transfer-Encoding chunked,chunked", + level=logging.INFO, + ): + start_line, headers, response = self.io_loop.run_sync( + lambda: read_stream_body(self.stream) + ) + self.assertEqual(400, start_line.code) + + def test_chunked_request_body_unsupported_transfer_encoding(self): + # We don't support transfer-encodings other than chunked. + self.stream.write( + b"""\ +POST /echo HTTP/1.1 +Transfer-Encoding: gzip, chunked + +2 +ok +0 + +""" + ) + with ExpectLog( + gen_log, ".*Unsupported Transfer-Encoding gzip, chunked", level=logging.INFO + ): + start_line, headers, response = self.io_loop.run_sync( + lambda: read_stream_body(self.stream) + ) + self.assertEqual(400, start_line.code) + + def test_chunked_request_body_transfer_encoding_and_content_length(self): + # Transfer-encoding and content-length are mutually exclusive + self.stream.write( + b"""\ +POST /echo HTTP/1.1 +Transfer-Encoding: chunked +Content-Length: 2 + +2 +ok +0 + +""" + ) + with ExpectLog( + gen_log, + ".*Message with both Transfer-Encoding and Content-Length", + level=logging.INFO, + ): + start_line, headers, response = self.io_loop.run_sync( + lambda: read_stream_body(self.stream) + ) + self.assertEqual(400, start_line.code) + @gen_test def test_invalid_content_length(self): # HTTP only allows decimal digits in content-length. Make sure we don't diff --git a/tornado/test/httputil_test.py b/tornado/test/httputil_test.py index aa9b6ee253..6d618839e0 100644 --- a/tornado/test/httputil_test.py +++ b/tornado/test/httputil_test.py @@ -334,6 +334,25 @@ def test_unicode_newlines(self): gen_log.warning("failed while trying %r in %s", newline, encoding) raise + def test_unicode_whitespace(self): + # Only tabs and spaces are to be stripped according to the HTTP standard. + # Other unicode whitespace is to be left as-is. In the context of headers, + # this specifically means the whitespace characters falling within the + # latin1 charset. + whitespace = [ + (" ", True), # SPACE + ("\t", True), # TAB + ("\u00a0", False), # NON-BREAKING SPACE + ("\u0085", False), # NEXT LINE + ] + for c, stripped in whitespace: + headers = HTTPHeaders.parse("Transfer-Encoding: %schunked" % c) + if stripped: + expected = [("Transfer-Encoding", "chunked")] + else: + expected = [("Transfer-Encoding", "%schunked" % c)] + self.assertEqual(expected, list(headers.get_all())) + def test_optional_cr(self): # Both CRLF and LF should be accepted as separators. CR should not be # part of the data when followed by LF, but it is a normal char diff --git a/tornado/test/simple_httpclient_test.py b/tornado/test/simple_httpclient_test.py index 62bd4830c8..593f81fd34 100644 --- a/tornado/test/simple_httpclient_test.py +++ b/tornado/test/simple_httpclient_test.py @@ -828,7 +828,7 @@ def test_chunked_with_content_length(self): with ExpectLog( gen_log, ( - "Malformed HTTP message from None: Response " + "Malformed HTTP message from None: Message " "with both Transfer-Encoding and Content-Length" ), level=logging.INFO,