From 62b47316983c5309c8f7b703139d45af1cd402d8 Mon Sep 17 00:00:00 2001 From: cdeler Date: Fri, 30 Oct 2020 11:38:24 +0300 Subject: [PATCH] Fixed the problem with socket, which might leak when tunnel proxy connection couldn't be establish (#223) * Added test case for the leaked proxy socket * Fixed the problem with socket, which might leak when tunnel proxy connection couldn't be established * Fixed some PR issues - renamed the blocked host to blockedhost.example.com - added some docs to http_proxy_server fixture * Update tests/async_tests/test_interfaces.py Co-authored-by: Florimond Manca * Update httpcore/_async/http_proxy.py Co-authored-by: Florimond Manca * Narrowed a try-except block in _tunnel_request Co-authored-by: Florimond Manca Co-authored-by: Joe --- httpcore/_async/http_proxy.py | 54 ++++++++++++++++----------- httpcore/_sync/http_proxy.py | 54 ++++++++++++++++----------- tests/async_tests/test_interfaces.py | 30 +++++++++++++++ tests/sync_tests/test_interfaces.py | 30 +++++++++++++++ tests/utils.py | 56 +++++++++++++++++++++++----- 5 files changed, 170 insertions(+), 54 deletions(-) diff --git a/httpcore/_async/http_proxy.py b/httpcore/_async/http_proxy.py index 8a9f33c2..d9df762b 100644 --- a/httpcore/_async/http_proxy.py +++ b/httpcore/_async/http_proxy.py @@ -207,29 +207,39 @@ async def _tunnel_request( connect_url = self.proxy_origin + (target,) connect_headers = [(b"Host", target), (b"Accept", b"*/*")] connect_headers = merge_headers(connect_headers, self.proxy_headers) - (proxy_status_code, _, proxy_stream, _) = await proxy_connection.arequest( - b"CONNECT", connect_url, headers=connect_headers, ext=ext - ) - proxy_reason = get_reason_phrase(proxy_status_code) - logger.trace( - "tunnel_response proxy_status_code=%r proxy_reason=%r ", - proxy_status_code, - proxy_reason, - ) - # Read the response data without closing the socket - async for _ in proxy_stream: - pass - - # See if the tunnel was successfully established. - if proxy_status_code < 200 or proxy_status_code > 299: - msg = "%d %s" % (proxy_status_code, proxy_reason) - raise ProxyError(msg) - - # Upgrade to TLS if required - # We assume the target speaks TLS on the specified port - if scheme == b"https": - await proxy_connection.start_tls(host, timeout) + try: + ( + proxy_status_code, + _, + proxy_stream, + _, + ) = await proxy_connection.arequest( + b"CONNECT", connect_url, headers=connect_headers, ext=ext + ) + + proxy_reason = get_reason_phrase(proxy_status_code) + logger.trace( + "tunnel_response proxy_status_code=%r proxy_reason=%r ", + proxy_status_code, + proxy_reason, + ) + # Read the response data without closing the socket + async for _ in proxy_stream: + pass + + # See if the tunnel was successfully established. + if proxy_status_code < 200 or proxy_status_code > 299: + msg = "%d %s" % (proxy_status_code, proxy_reason) + raise ProxyError(msg) + + # Upgrade to TLS if required + # We assume the target speaks TLS on the specified port + if scheme == b"https": + await proxy_connection.start_tls(host, timeout) + except Exception as exc: + await proxy_connection.aclose() + raise ProxyError(exc) # The CONNECT request is successful, so we have now SWITCHED PROTOCOLS. # This means the proxy connection is now unusable, and we must create diff --git a/httpcore/_sync/http_proxy.py b/httpcore/_sync/http_proxy.py index aa3a1ae5..f5576c01 100644 --- a/httpcore/_sync/http_proxy.py +++ b/httpcore/_sync/http_proxy.py @@ -207,29 +207,39 @@ def _tunnel_request( connect_url = self.proxy_origin + (target,) connect_headers = [(b"Host", target), (b"Accept", b"*/*")] connect_headers = merge_headers(connect_headers, self.proxy_headers) - (proxy_status_code, _, proxy_stream, _) = proxy_connection.request( - b"CONNECT", connect_url, headers=connect_headers, ext=ext - ) - proxy_reason = get_reason_phrase(proxy_status_code) - logger.trace( - "tunnel_response proxy_status_code=%r proxy_reason=%r ", - proxy_status_code, - proxy_reason, - ) - # Read the response data without closing the socket - for _ in proxy_stream: - pass - - # See if the tunnel was successfully established. - if proxy_status_code < 200 or proxy_status_code > 299: - msg = "%d %s" % (proxy_status_code, proxy_reason) - raise ProxyError(msg) - - # Upgrade to TLS if required - # We assume the target speaks TLS on the specified port - if scheme == b"https": - proxy_connection.start_tls(host, timeout) + try: + ( + proxy_status_code, + _, + proxy_stream, + _, + ) = proxy_connection.request( + b"CONNECT", connect_url, headers=connect_headers, ext=ext + ) + + proxy_reason = get_reason_phrase(proxy_status_code) + logger.trace( + "tunnel_response proxy_status_code=%r proxy_reason=%r ", + proxy_status_code, + proxy_reason, + ) + # Read the response data without closing the socket + for _ in proxy_stream: + pass + + # See if the tunnel was successfully established. + if proxy_status_code < 200 or proxy_status_code > 299: + msg = "%d %s" % (proxy_status_code, proxy_reason) + raise ProxyError(msg) + + # Upgrade to TLS if required + # We assume the target speaks TLS on the specified port + if scheme == b"https": + proxy_connection.start_tls(host, timeout) + except Exception as exc: + proxy_connection.close() + raise ProxyError(exc) # The CONNECT request is successful, so we have now SWITCHED PROTOCOLS. # This means the proxy connection is now unusable, and we must create diff --git a/tests/async_tests/test_interfaces.py b/tests/async_tests/test_interfaces.py index eeb66634..73b96de1 100644 --- a/tests/async_tests/test_interfaces.py +++ b/tests/async_tests/test_interfaces.py @@ -191,6 +191,36 @@ async def test_http_proxy( assert ext == {"http_version": "HTTP/1.1", "reason": "OK"} +@pytest.mark.parametrize("proxy_mode", ["DEFAULT", "FORWARD_ONLY", "TUNNEL_ONLY"]) +@pytest.mark.parametrize("protocol,port", [(b"http", 80), (b"https", 443)]) +@pytest.mark.trio +async def test_proxy_socket_does_not_leak_when_the_connection_hasnt_been_added_to_pool( + proxy_server: URL, + server: Server, + proxy_mode: str, + protocol: bytes, + port: int, +): + method = b"GET" + url = (protocol, b"blockedhost.example.com", port, b"/") + headers = [(b"host", b"blockedhost.example.com")] + + with pytest.warns(None) as recorded_warnings: + async with httpcore.AsyncHTTPProxy(proxy_server, proxy_mode=proxy_mode) as http: + for _ in range(100): + try: + _ = await http.arequest(method, url, headers) + except (httpcore.ProxyError, httpcore.RemoteProtocolError): + pass + + # have to filter out https://github.com/encode/httpx/issues/825 from other tests + warnings = [ + *filter(lambda warn: "asyncio" not in warn.filename, recorded_warnings.list) + ] + + assert len(warnings) == 0 + + @pytest.mark.anyio async def test_http_request_local_address(backend: str, server: Server) -> None: if backend == "auto" and lookup_async_backend() == "trio": diff --git a/tests/sync_tests/test_interfaces.py b/tests/sync_tests/test_interfaces.py index 98e24336..b87cbe5a 100644 --- a/tests/sync_tests/test_interfaces.py +++ b/tests/sync_tests/test_interfaces.py @@ -191,6 +191,36 @@ def test_http_proxy( assert ext == {"http_version": "HTTP/1.1", "reason": "OK"} +@pytest.mark.parametrize("proxy_mode", ["DEFAULT", "FORWARD_ONLY", "TUNNEL_ONLY"]) +@pytest.mark.parametrize("protocol,port", [(b"http", 80), (b"https", 443)]) + +def test_proxy_socket_does_not_leak_when_the_connection_hasnt_been_added_to_pool( + proxy_server: URL, + server: Server, + proxy_mode: str, + protocol: bytes, + port: int, +): + method = b"GET" + url = (protocol, b"blockedhost.example.com", port, b"/") + headers = [(b"host", b"blockedhost.example.com")] + + with pytest.warns(None) as recorded_warnings: + with httpcore.SyncHTTPProxy(proxy_server, proxy_mode=proxy_mode) as http: + for _ in range(100): + try: + _ = http.request(method, url, headers) + except (httpcore.ProxyError, httpcore.RemoteProtocolError): + pass + + # have to filter out https://github.com/encode/httpx/issues/825 from other tests + warnings = [ + *filter(lambda warn: "asyncio" not in warn.filename, recorded_warnings.list) + ] + + assert len(warnings) == 0 + + def test_http_request_local_address(backend: str, server: Server) -> None: if backend == "sync" and lookup_sync_backend() == "trio": diff --git a/tests/utils.py b/tests/utils.py index a6762bde..9145ec78 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,8 +1,9 @@ import contextlib import socket import subprocess +import tempfile import time -from typing import Tuple +from typing import List, Tuple import sniffio @@ -46,15 +47,50 @@ def host_header(self) -> Tuple[bytes, bytes]: @contextlib.contextmanager def http_proxy_server(proxy_host: str, proxy_port: int): - + """ + This function launches pproxy process like this: + $ pproxy -b -l http://127.0.0.1:8080 + What does it mean? + It runs HTTP proxy on 127.0.0.1:8080 and blocks access to some external hosts, + specified in blocked_hosts_file + + Relevant pproxy docs could be found in their github repo: + https://github.com/qwj/python-proxy + """ proc = None - try: - command = ["pproxy", "-l", f"http://{proxy_host}:{proxy_port}/"] - proc = subprocess.Popen(command) - _wait_can_connect(proxy_host, proxy_port) + with create_proxy_block_file(["blockedhost.example.com"]) as block_file_name: + try: + command = [ + "pproxy", + "-b", + block_file_name, + "-l", + f"http://{proxy_host}:{proxy_port}/", + ] + proc = subprocess.Popen(command) + + _wait_can_connect(proxy_host, proxy_port) + + yield b"http", proxy_host.encode(), proxy_port, b"/" + finally: + if proc is not None: + proc.kill() + + +@contextlib.contextmanager +def create_proxy_block_file(blocked_domains: List[str]): + """ + The context manager yields pproxy block file. + This file should contain line delimited hostnames. We use it in the following test: + test_proxy_socket_does_not_leak_when_the_connection_hasnt_been_added_to_pool + """ + with tempfile.NamedTemporaryFile(delete=True, mode="w+") as file: + + for domain in blocked_domains: + file.write(domain) + file.write("\n") + + file.flush() - yield b"http", proxy_host.encode(), proxy_port, b"/" - finally: - if proc is not None: - proc.kill() + yield file.name