Skip to content

Commit

Permalink
Fix parsing the Forwarded header (#2170) (#2173)
Browse files Browse the repository at this point in the history
This fixes #2170 by parsing Forwarded more carefully, while staying about
as fast, simple, and robust in the face of potential injection attacks.
(Speed was measured with IPython's %timeit on my laptop on a few typical
and pathological header values.)

In particular:

- commas and semicolons are allowed inside quoted-strings;
- empty forwarded-pairs (as in "for=_1;;by=_2") are allowed;
- non-standard parameters are allowed (although this alone could be easily
  done in the previous parser).

This still doesn't parse valid headers containing obs-text, which was
an intentional decision in the previous parser (see comments) that I did
not change.

Also, the previous parser used to bail out of forwarded-elements containing
duplicate parameter names. No rationale was given in the code, and I don't
think this is important, so the new parser doesn't enforce this.
  • Loading branch information
vfaronov authored and asvetlov committed Aug 9, 2017
1 parent dda632b commit a0666e8
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 39 deletions.
1 change: 1 addition & 0 deletions CONTRIBUTORS.txt
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ Thomas Grainger
Tolga Tezel
Vaibhav Sagar
Vamsi Krishna Avula
Vasiliy Faronov
Vasyl Baran
Vikas Kawadia
Vitalik Verhovodov
Expand Down
76 changes: 40 additions & 36 deletions aiohttp/web_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
_TCHAR = string.digits + string.ascii_letters + r"!#$%&'*+.^_`|~-"
# '-' at the end to prevent interpretation as range in a char class

_TOKEN = r'[{tchar}]*'.format(tchar=_TCHAR)
_TOKEN = r'[{tchar}]+'.format(tchar=_TCHAR)

_QDTEXT = r'[{}]'.format(
r''.join(chr(c) for c in (0x09, 0x20, 0x21) + tuple(range(0x23, 0x7F))))
Expand All @@ -40,12 +40,8 @@
_QUOTED_STRING = r'"(?:{quoted_pair}|{qdtext})*"'.format(
qdtext=_QDTEXT, quoted_pair=_QUOTED_PAIR)

_FORWARDED_PARAMS = (
r'[bB][yY]|[fF][oO][rR]|[hH][oO][sS][tT]|[pP][rR][oO][tT][oO]')

_FORWARDED_PAIR = (
r'^({forwarded_params})=({token}|{quoted_string})$'.format(
forwarded_params=_FORWARDED_PARAMS,
r'({token})=({token}|{quoted_string})'.format(
token=_TOKEN,
quoted_string=_QUOTED_STRING))

Expand Down Expand Up @@ -209,37 +205,45 @@ def forwarded(self):
Returns a tuple containing one or more immutable dicts
"""
def _parse_forwarded(forwarded_headers):
for field_value in forwarded_headers:
# by=...;for=..., For=..., BY=...
for forwarded_elm in field_value.split(','):
# by=...;for=...
fvparams = dict()
forwarded_pairs = (
_FORWARDED_PAIR_RE.findall(pair)
for pair in forwarded_elm.strip().split(';'))
for forwarded_pair in forwarded_pairs:
# by=...
if len(forwarded_pair) != 1:
# non-compliant syntax
break
param, value = forwarded_pair[0]
if param.lower() in fvparams:
# duplicate param in field-value
break
if value and value[0] == '"':
# quoted string: replace quotes and escape
# sequences
value = _QUOTED_PAIR_REPLACE_RE.sub(
r'\1', value[1:-1])
fvparams[param.lower()] = value
elems = []
for field_value in self._message.headers.getall(hdrs.FORWARDED, ()):
length = len(field_value)
pos = 0
need_separator = False
elem = {}
elems.append(types.MappingProxyType(elem))
while 0 <= pos < length:
match = _FORWARDED_PAIR_RE.match(field_value, pos)
if match is not None: # got a valid forwarded-pair
if need_separator:
# bad syntax here, skip to next comma
pos = field_value.find(',', pos)
else:
yield types.MappingProxyType(fvparams)
continue
yield dict()

return tuple(
_parse_forwarded(self._message.headers.getall(hdrs.FORWARDED, ())))
(name, value) = match.groups()
if value[0] == '"':
# quoted string: remove quotes and unescape
value = _QUOTED_PAIR_REPLACE_RE.sub(r'\1',
value[1:-1])
elem[name.lower()] = value
pos += len(match.group(0))
need_separator = True
elif field_value[pos] == ',': # next forwarded-element
need_separator = False
elem = {}
elems.append(types.MappingProxyType(elem))
pos += 1
elif field_value[pos] == ';': # next forwarded-pair
need_separator = False
pos += 1
elif field_value[pos] in ' \t':
# Allow whitespace even between forwarded-pairs, though
# RFC 7239 doesn't. This simplifies code and is in line
# with Postel's law.
pos += 1
else:
# bad syntax here, skip to next comma
pos = field_value.find(',', pos)
return tuple(elems)

@reify
def _scheme(self):
Expand Down
1 change: 1 addition & 0 deletions changes/2170.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix parsing the Forwarded header according to RFC 7239.
88 changes: 85 additions & 3 deletions tests/test_web_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,61 @@ def test_single_forwarded_header_quoted_escaped():
assert req.forwarded[0]['proto'] == 'lala land~ 123!&'


def test_single_forwarded_header_custom_param():
header = r'BY=identifier;PROTO=https;SOME="other, \"value\""'
req = make_mocked_request('GET', '/',
headers=CIMultiDict({'Forwarded': header}))
assert len(req.forwarded) == 1
assert req.forwarded[0]['by'] == 'identifier'
assert req.forwarded[0]['proto'] == 'https'
assert req.forwarded[0]['some'] == 'other, "value"'


def test_single_forwarded_header_empty_params():
# This is allowed by the grammar given in RFC 7239
header = ';For=identifier;;PROTO=https;;;'
req = make_mocked_request('GET', '/',
headers=CIMultiDict({'Forwarded': header}))
assert req.forwarded[0]['for'] == 'identifier'
assert req.forwarded[0]['proto'] == 'https'


def test_single_forwarded_header_bad_separator():
header = 'BY=identifier PROTO=https'
req = make_mocked_request('GET', '/',
headers=CIMultiDict({'Forwarded': header}))
assert 'proto' not in req.forwarded[0]


def test_single_forwarded_header_injection1():
# We might receive a header like this if we're sitting behind a reverse
# proxy that blindly appends a forwarded-element without checking
# the syntax of existing field-values. We should be able to recover
# the appended element anyway.
header = 'for=_injected;by=", for=_real'
req = make_mocked_request('GET', '/',
headers=CIMultiDict({'Forwarded': header}))
assert len(req.forwarded) == 2
assert 'by' not in req.forwarded[0]
assert req.forwarded[1]['for'] == '_real'


def test_single_forwarded_header_injection2():
header = 'very bad syntax, for=_real'
req = make_mocked_request('GET', '/',
headers=CIMultiDict({'Forwarded': header}))
assert len(req.forwarded) == 2
assert 'for' not in req.forwarded[0]
assert req.forwarded[1]['for'] == '_real'


def test_single_forwarded_header_long_quoted_string():
header = 'for="' + '\\\\' * 5000 + '"'
req = make_mocked_request('GET', '/',
headers=CIMultiDict({'Forwarded': header}))
assert req.forwarded[0]['for'] == '\\' * 5000


def test_multiple_forwarded_headers():
headers = CIMultiDict()
headers.add('Forwarded', 'By=identifier1;for=identifier2, BY=identifier3')
Expand All @@ -303,10 +358,37 @@ def test_multiple_forwarded_headers():
assert req.forwarded[2]['for'] == 'identifier5'


def test_multiple_forwarded_headers_bad_syntax():
headers = CIMultiDict()
headers.add('Forwarded', 'for=_1;by=_2')
headers.add('Forwarded', 'invalid value')
headers.add('Forwarded', '')
headers.add('Forwarded', 'for=_3;by=_4')
req = make_mocked_request('GET', '/', headers=headers)
assert len(req.forwarded) == 4
assert req.forwarded[0]['for'] == '_1'
assert 'for' not in req.forwarded[1]
assert 'for' not in req.forwarded[2]
assert req.forwarded[3]['by'] == '_4'


def test_multiple_forwarded_headers_injection():
headers = CIMultiDict()
# This could be sent by an attacker, hoping to "shadow" the second header.
headers.add('Forwarded', 'for=_injected;by="')
# This is added by our trusted reverse proxy.
headers.add('Forwarded', 'for=_real;by=_actual_proxy')
req = make_mocked_request('GET', '/', headers=headers)
assert len(req.forwarded) == 2
assert 'by' not in req.forwarded[0]
assert req.forwarded[1]['for'] == '_real'
assert req.forwarded[1]['by'] == '_actual_proxy'


def test_https_scheme_by_forwarded_header():
header = 'by=_1;for=_2;host=_3;proto=https'
req = make_mocked_request('GET', '/',
headers=CIMultiDict(
{'Forwarded': 'by=;for=;host=;proto=https'}))
headers=CIMultiDict({'Forwarded': header}))
assert "https" == req.scheme
assert req.secure is True

Expand Down Expand Up @@ -338,7 +420,7 @@ def test_https_scheme_by_x_forwarded_proto_header_no_tls():
def test_host_by_forwarded_header():
headers = CIMultiDict()
headers.add('Forwarded', 'By=identifier1;for=identifier2, BY=identifier3')
headers.add('Forwarded', 'by=;for=;host=example.com')
headers.add('Forwarded', 'by=unknown;for=unknown;host=example.com')
req = make_mocked_request('GET', '/', headers=headers)
assert req.host == 'example.com'

Expand Down

0 comments on commit a0666e8

Please sign in to comment.