From 4244b9bd37cdffcc603b08cbfec79700d49f3ff1 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 26 Oct 2022 14:06:53 +0100 Subject: [PATCH] Improve type hinting of `RawHeaders` to detect the problem that #14301 fixed. --- synapse/app/generic_worker.py | 8 ++++---- synapse/http/client.py | 23 +++++++++++++++++++---- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index 2a9f039367b9..c5aaa202a0a9 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -178,14 +178,14 @@ async def on_POST( # Proxy headers from the original request, such as the auth headers # (in case the access token is there) and the original IP / # User-Agent of the request. - headers = { - header: request.requestHeaders.getRawHeaders(header, []) + headers: Dict[bytes, List[bytes]] = { + header: list(request.requestHeaders.getRawHeaders(header, [])) for header in (b"Authorization", b"User-Agent") } # Add the previous hop to the X-Forwarded-For header. - x_forwarded_for = request.requestHeaders.getRawHeaders( + x_forwarded_for = list(request.requestHeaders.getRawHeaders( b"X-Forwarded-For", [] - ) + )) # we use request.client here, since we want the previous hop, not the # original client (as returned by request.getClientAddress()). if isinstance(request.client, (address.IPv4Address, address.IPv6Address)): diff --git a/synapse/http/client.py b/synapse/http/client.py index 084d0a5b84e9..7519a76121e9 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -90,14 +90,29 @@ "synapse_http_client_responses", "", ["method", "code"] ) -# the type of the headers list, to be passed to the t.w.h.Headers. -# Actually we can mix str and bytes keys, but Mapping treats 'key' as invariant so -# we simplify. +# the type of the headers map, to be passed to the t.w.h.Headers. +# +# The actual type accepted by Twisted is +# Mapping[Union[str, bytes], Sequence[Union[str, bytes]] , +# allowing us to mix and match str and bytes freely. However: any str is also a +# Sequence[str]; passing a header string value which is a +# standalone str is interpreted as a sequence of 1-codepoint strings. This is a disastrous footgun. +# We use a narrower value type (RawHeaderValue) to avoid this footgun. +# +# We also simplify the keys to be either all str or all bytes. This helps because +# Dict[K, V] is invariant in K (and indeed V). RawHeaders = Union[Mapping[str, "RawHeaderValue"], Mapping[bytes, "RawHeaderValue"]] # the value actually has to be a List, but List is invariant so we can't specify that # the entries can either be Lists or bytes. -RawHeaderValue = Sequence[Union[str, bytes]] +RawHeaderValue = Union[ + List[str], + List[bytes], + List[Union[str, bytes]], + Tuple[str, ...], + Tuple[bytes, ...], + Tuple[Union[str, bytes], ...], +] def check_against_blacklist(