Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix inconsistency between Python and C http request parsers. #4973

Merged
merged 13 commits into from
Oct 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/4972.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix inconsistency between Python and C http request parsers in parsing pct-encoded URL.
1 change: 1 addition & 0 deletions CONTRIBUTORS.txt
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ Sergey Ninua
Sergey Skripnick
Serhii Charykov
Serhii Kostel
Serhiy Storchaka
Simon Kennedy
Sin-Woo Bang
Stanislas Plum
Expand Down
2 changes: 1 addition & 1 deletion aiohttp/_http_parser.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -868,7 +868,7 @@ cdef _parse_url(char* buf_data, size_t length):

return URL_build(scheme=schema,
user=user, password=password, host=host, port=port,
path=path, query=query, fragment=fragment)
path=path, query_string=query, fragment=fragment, encoded=True)
else:
raise InvalidURLError("invalid url {!r}".format(buf_data))
finally:
Expand Down
54 changes: 37 additions & 17 deletions aiohttp/web_urldispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

from typing_extensions import TypedDict
from yarl import URL
from yarl import __version__ as yarl_version # type: ignore

from . import hdrs
from .abc import AbstractMatchInfo, AbstractRouter, AbstractView
Expand Down Expand Up @@ -61,6 +62,8 @@
else:
BaseDict = dict

YARL_VERSION = tuple(map(int, yarl_version.split('.')[:2]))

HTTP_METHOD_RE = re.compile(r"^[0-9A-Za-z!#\$%&'\*\+\-\.\^_`\|~]+$")
ROUTE_RE = re.compile(r'(\{[_a-zA-Z][^{}]*(?:\{[^{}]*\}[^{}]*)*\})')
PATH_SEP = re.escape('/')
Expand Down Expand Up @@ -421,9 +424,9 @@ def __init__(self, path: str, *, name: Optional[str]=None) -> None:
if '{' in part or '}' in part:
raise ValueError("Invalid path '{}'['{}']".format(path, part))

path = URL.build(path=part).raw_path
formatter += path
pattern += re.escape(path)
part = _requote_path(part)
formatter += part
pattern += re.escape(part)

try:
compiled = re.compile(pattern)
Expand Down Expand Up @@ -451,7 +454,7 @@ def _match(self, path: str) -> Optional[Dict[str, str]]:
if match is None:
return None
else:
return {key: URL.build(path=value, encoded=True).path
return {key: _unquote_path(value)
for key, value in match.groupdict().items()}

def raw_match(self, path: str) -> bool:
Expand All @@ -462,9 +465,9 @@ def get_info(self) -> _InfoDict:
'pattern': self._pattern}

def url_for(self, **parts: str) -> URL:
url = self._formatter.format_map({k: URL.build(path=v).raw_path
url = self._formatter.format_map({k: _quote_path(v)
for k, v in parts.items()})
return URL.build(path=url)
return URL.build(path=url, encoded=True)

def __repr__(self) -> str:
name = "'" + self.name + "' " if self.name is not None else ""
Expand All @@ -478,7 +481,7 @@ def __init__(self, prefix: str, *, name: Optional[str]=None) -> None:
assert not prefix or prefix.startswith('/'), prefix
assert prefix in ('', '/') or not prefix.endswith('/'), prefix
super().__init__(name=name)
self._prefix = URL.build(path=prefix).raw_path
self._prefix = _requote_path(prefix)

@property
def canonical(self) -> str:
Expand Down Expand Up @@ -535,17 +538,17 @@ def url_for(self, *, filename: Union[str, Path], # type: ignore
append_version = self._append_version
if isinstance(filename, Path):
filename = str(filename)
while filename.startswith('/'):
filename = filename[1:]
filename = '/' + filename
filename = filename.lstrip('/')

url = URL.build(path=self._prefix, encoded=True)
# filename is not encoded
url = URL.build(path=self._prefix + filename)
if YARL_VERSION < (1, 6):
url = url / filename.replace('%', '%25')
else:
url = url / filename

if append_version:
try:
if filename.startswith('/'):
filename = filename[1:]
filepath = self._directory.joinpath(filename).resolve()
if not self._follow_symlinks:
filepath.relative_to(self._directory)
Expand Down Expand Up @@ -592,8 +595,7 @@ async def resolve(self, request: Request) -> _Resolve:
if method not in allowed_methods:
return None, allowed_methods

match_dict = {'filename': URL.build(path=path[len(self._prefix)+1:],
encoded=True).path}
match_dict = {'filename': _unquote_path(path[len(self._prefix)+1:])}
return (UrlMappingMatchInfo(match_dict, self._routes[method]),
allowed_methods)

Expand Down Expand Up @@ -1032,8 +1034,7 @@ def add_resource(self, path: str, *,
if resource.name == name and resource.raw_match(path):
return cast(Resource, resource)
if not ('{' in path or '}' in path or ROUTE_RE.search(path)):
url = URL.build(path=path)
resource = PlainResource(url.raw_path, name=name)
resource = PlainResource(_requote_path(path), name=name)
self.register_resource(resource)
return resource
resource = DynamicResource(path, name=name)
Expand Down Expand Up @@ -1152,3 +1153,22 @@ def add_routes(self,
for route_def in routes:
registered_routes.extend(route_def.register(self))
return registered_routes


def _quote_path(value: str) -> str:
if YARL_VERSION < (1, 6):
value = value.replace('%', '%25')
return URL.build(path=value, encoded=False).raw_path


def _unquote_path(value: str) -> str:
return URL.build(path=value, encoded=True).path


def _requote_path(value: str) -> str:
serhiy-storchaka marked this conversation as resolved.
Show resolved Hide resolved
# Quote non-ascii characters and other characters which must be quoted,
# but preserve existing %-sequences.
result = _quote_path(value)
if '%' in value:
result = result.replace('%25', '%')
return result
2 changes: 1 addition & 1 deletion requirements/ci-wheel.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ pytest==6.1.1
pytest-cov==2.10.1
pytest-mock==3.3.1
typing_extensions==3.7.4.3
yarl==1.4.2
yarl==1.6.1

# Using PEP 508 env markers to control dependency on runtimes:

Expand Down
50 changes: 50 additions & 0 deletions tests/test_http_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import asyncio
from unittest import mock
from urllib.parse import quote

import pytest
from multidict import CIMultiDict
Expand Down Expand Up @@ -787,6 +788,55 @@ def test_url_parse_non_strict_mode(parser) -> None:
assert payload.is_eof()


@pytest.mark.parametrize(
('uri', 'path', 'query', 'fragment'),
[
('/path%23frag', '/path#frag', {}, ''),
('/path%2523frag', '/path%23frag', {}, ''),
('/path?key=value%23frag', '/path', {'key': 'value#frag'}, ''),
('/path?key=value%2523frag', '/path', {'key': 'value%23frag'}, ''),
('/path#frag%20', '/path', {}, 'frag '),
('/path#frag%2520', '/path', {}, 'frag%20'),
]
)
def test_parse_uri_percent_encoded(parser, uri, path, query, fragment) -> None:
text = ('GET %s HTTP/1.1\r\n\r\n' % (uri,)).encode()
messages, upgrade, tail = parser.feed_data(text)
msg = messages[0][0]

assert msg.path == uri
assert msg.url == URL(uri)
assert msg.url.path == path
assert msg.url.query == query
assert msg.url.fragment == fragment


def test_parse_uri_utf8(parser) -> None:
text = ('GET /путь?ключ=знач#фраг HTTP/1.1\r\n\r\n').encode()
webknjaz marked this conversation as resolved.
Show resolved Hide resolved
messages, upgrade, tail = parser.feed_data(text)
msg = messages[0][0]

assert msg.path == '/путь?ключ=знач#фраг'
assert msg.url.path == '/путь'
assert msg.url.query == {'ключ': 'знач'}
assert msg.url.fragment == 'фраг'


def test_parse_uri_utf8_percent_encoded(parser) -> None:
text = (
'GET %s HTTP/1.1\r\n\r\n' %
quote('/путь?ключ=знач#фраг', safe='/?=#')
).encode()
messages, upgrade, tail = parser.feed_data(text)
msg = messages[0][0]

assert msg.path == quote('/путь?ключ=знач#фраг', safe='/?=#')
assert msg.url == URL('/путь?ключ=знач#фраг')
assert msg.url.path == '/путь'
assert msg.url.query == {'ключ': 'знач'}
assert msg.url.fragment == 'фраг'


@pytest.mark.skipif('HttpRequestParserC' not in dir(aiohttp.http_parser),
reason="C based HTTP parser not available")
def test_parse_bad_method_for_c_parser_raises(loop, protocol):
Expand Down
26 changes: 22 additions & 4 deletions tests/test_urldispatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,20 @@ def test_add_static_append_version_not_follow_symlink(router,
assert '/st/append_version_symlink/data.unknown_mime_type' == str(url)


def test_add_static_quoting(router) -> None:
resource = router.add_static('/пре %2Fфикс',
pathlib.Path(aiohttp.__file__).parent,
name='static')
assert router['static'] is resource
url = resource.url_for(filename='/1 2/файл%2F.txt')
assert url.path == '/пре /фикс/1 2/файл%2F.txt'
assert str(url) == (
'/%D0%BF%D1%80%D0%B5%20%2F%D1%84%D0%B8%D0%BA%D1%81'
'/1%202/%D1%84%D0%B0%D0%B9%D0%BB%252F.txt'
)
assert len(resource) == 2


def test_plain_not_match(router) -> None:
handler = make_handler()
router.add_route('GET', '/get/path', handler, name='name')
Expand Down Expand Up @@ -629,10 +643,14 @@ def test_route_dynamic_with_regex(router) -> None:

def test_route_dynamic_quoting(router) -> None:
handler = make_handler()
route = router.add_route('GET', r'/{arg}', handler)

url = route.url_for(arg='1 2/текст')
assert '/1%202/%D1%82%D0%B5%D0%BA%D1%81%D1%82' == str(url)
route = router.add_route('GET', r'/пре %2Fфикс/{arg}', handler)

url = route.url_for(arg='1 2/текст%2F')
assert url.path == '/пре /фикс/1 2/текст%2F'
assert str(url) == (
'/%D0%BF%D1%80%D0%B5%20%2F%D1%84%D0%B8%D0%BA%D1%81'
'/1%202/%D1%82%D0%B5%D0%BA%D1%81%D1%82%252F'
)


async def test_regular_match_info(router) -> None:
Expand Down
2 changes: 1 addition & 1 deletion vendor/http-parser
Submodule http-parser updated 4 files
+6 −6 Makefile
+44 −150 http_parser.c
+5 −12 http_parser.h
+4 −132 test.c