Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed the problem with socket, which might leak when tunnel proxy connection couldn't be establish #223

Merged
merged 8 commits into from
Oct 30, 2020
78 changes: 44 additions & 34 deletions httpcore/_async/http_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,41 +207,51 @@ async def _tunnel_request(
connect_url = self.proxy_origin + (target,)
connect_headers = [(b"Host", target), (b"Accept", b"*/*")]
connect_headers = merge_headers(connect_headers, self.proxy_headers)
(proxy_status_code, _, proxy_stream, _) = await proxy_connection.arequest(
b"CONNECT", connect_url, headers=connect_headers, ext=ext
)

proxy_reason = get_reason_phrase(proxy_status_code)
logger.trace(
"tunnel_response proxy_status_code=%r proxy_reason=%r ",
proxy_status_code,
proxy_reason,
)
# Read the response data without closing the socket
async for _ in proxy_stream:
pass

# See if the tunnel was successfully established.
if proxy_status_code < 200 or proxy_status_code > 299:
msg = "%d %s" % (proxy_status_code, proxy_reason)
raise ProxyError(msg)

# Upgrade to TLS if required
# We assume the target speaks TLS on the specified port
if scheme == b"https":
await proxy_connection.start_tls(host, timeout)

# The CONNECT request is successful, so we have now SWITCHED PROTOCOLS.
# This means the proxy connection is now unusable, and we must create
# a new one for regular requests, making sure to use the same socket to
# retain the tunnel.
connection = AsyncHTTPConnection(
origin=origin,
http2=self._http2,
ssl_context=self._ssl_context,
socket=proxy_connection.socket,
)
await self._add_to_pool(connection, timeout)
try:
(
proxy_status_code,
_,
proxy_stream,
_,
) = await proxy_connection.arequest(
b"CONNECT", connect_url, headers=connect_headers, ext=ext
)

proxy_reason = get_reason_phrase(proxy_status_code)
logger.trace(
"tunnel_response proxy_status_code=%r proxy_reason=%r ",
proxy_status_code,
proxy_reason,
)
# Read the response data without closing the socket
async for _ in proxy_stream:
pass

# See if the tunnel was successfully established.
if proxy_status_code < 200 or proxy_status_code > 299:
msg = "%d %s" % (proxy_status_code, proxy_reason)
raise ProxyError(msg)

# Upgrade to TLS if required
# We assume the target speaks TLS on the specified port
if scheme == b"https":
await proxy_connection.start_tls(host, timeout)

# The CONNECT request is successful, so we have now SWITCHED PROTOCOLS.
# This means the proxy connection is now unusable, and we must create
# a new one for regular requests, making sure to use the same socket to
# retain the tunnel.
connection = AsyncHTTPConnection(
origin=origin,
http2=self._http2,
ssl_context=self._ssl_context,
socket=proxy_connection.socket,
)
await self._add_to_pool(connection, timeout)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to explicitly put this operation outside the try/except block? I assume the only operations that we want to fail upon is socket-related stuff, so proxy_connection.arequest() and proxy_connection.start_tls(). Anything that's not related to it, such as adding to the pool, should be left outside the block, correct?

Copy link
Member Author

@cdeler cdeler Oct 17, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, you are right, the try-except block should be as narrow as possible

except Exception:
await proxy_connection.aclose()
raise
cdeler marked this conversation as resolved.
Show resolved Hide resolved

# Once the connection has been established we can send requests on
# it as normal.
Expand Down
78 changes: 44 additions & 34 deletions httpcore/_sync/http_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,41 +207,51 @@ def _tunnel_request(
connect_url = self.proxy_origin + (target,)
connect_headers = [(b"Host", target), (b"Accept", b"*/*")]
connect_headers = merge_headers(connect_headers, self.proxy_headers)
(proxy_status_code, _, proxy_stream, _) = proxy_connection.request(
b"CONNECT", connect_url, headers=connect_headers, ext=ext
)

proxy_reason = get_reason_phrase(proxy_status_code)
logger.trace(
"tunnel_response proxy_status_code=%r proxy_reason=%r ",
proxy_status_code,
proxy_reason,
)
# Read the response data without closing the socket
for _ in proxy_stream:
pass

# See if the tunnel was successfully established.
if proxy_status_code < 200 or proxy_status_code > 299:
msg = "%d %s" % (proxy_status_code, proxy_reason)
raise ProxyError(msg)

# Upgrade to TLS if required
# We assume the target speaks TLS on the specified port
if scheme == b"https":
proxy_connection.start_tls(host, timeout)

# The CONNECT request is successful, so we have now SWITCHED PROTOCOLS.
# This means the proxy connection is now unusable, and we must create
# a new one for regular requests, making sure to use the same socket to
# retain the tunnel.
connection = SyncHTTPConnection(
origin=origin,
http2=self._http2,
ssl_context=self._ssl_context,
socket=proxy_connection.socket,
)
self._add_to_pool(connection, timeout)
try:
(
proxy_status_code,
_,
proxy_stream,
_,
) = proxy_connection.request(
b"CONNECT", connect_url, headers=connect_headers, ext=ext
)

proxy_reason = get_reason_phrase(proxy_status_code)
logger.trace(
"tunnel_response proxy_status_code=%r proxy_reason=%r ",
proxy_status_code,
proxy_reason,
)
# Read the response data without closing the socket
for _ in proxy_stream:
pass

# See if the tunnel was successfully established.
if proxy_status_code < 200 or proxy_status_code > 299:
msg = "%d %s" % (proxy_status_code, proxy_reason)
raise ProxyError(msg)

# Upgrade to TLS if required
# We assume the target speaks TLS on the specified port
if scheme == b"https":
proxy_connection.start_tls(host, timeout)

# The CONNECT request is successful, so we have now SWITCHED PROTOCOLS.
# This means the proxy connection is now unusable, and we must create
# a new one for regular requests, making sure to use the same socket to
# retain the tunnel.
connection = SyncHTTPConnection(
origin=origin,
http2=self._http2,
ssl_context=self._ssl_context,
socket=proxy_connection.socket,
)
self._add_to_pool(connection, timeout)
except Exception:
proxy_connection.close()
raise

# Once the connection has been established we can send requests on
# it as normal.
Expand Down
30 changes: 30 additions & 0 deletions tests/async_tests/test_interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,36 @@ async def test_http_proxy(
assert ext == {"http_version": "HTTP/1.1", "reason": "OK"}


@pytest.mark.parametrize("proxy_mode", ["DEFAULT", "FORWARD_ONLY", "TUNNEL_ONLY"])
@pytest.mark.parametrize("protocol,port", [(b"http", 80), (b"https", 443)])
@pytest.mark.trio
async def test_proxy_socket_does_not_leak_when_the_connection_hasnt_been_added_to_pool(
proxy_server: URL,
server: Server,
proxy_mode: str,
protocol: bytes,
port: int,
):
method = b"GET"
url = (protocol, b"example.com", port, b"/")
headers = [(b"host", b"example.org")]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the host mismatch between url / headers expected? I think this is somewhat related to the block file passed to pproxy. If so, ad assuming the block file is intended to be a list of URL hosts that cannot be requested through the proxy…

  • Should we set the blocked hostname to something more explicit like blockedhost.org?
  • If the different host in url and headers is on purpose and required, add a small comment to explain why?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right! I missed that the url and header are different, and it might confuse. Moreover the server name is not so clear. I'm renaming this host to "blockedhost.example.com" (and changing the fixture the same way)


with pytest.warns(None) as recorded_warnings:
async with httpcore.AsyncHTTPProxy(proxy_server, proxy_mode=proxy_mode) as http:
for _ in range(100):
try:
_ = await http.arequest(method, url, headers)
except httpcore.RemoteProtocolError:
pass

# have to filter out https://github.com/encode/httpx/issues/825 from other tests
warnings_list = [
*filter(lambda warn: "asyncio" not in warn.filename, recorded_warnings.list)
]

assert len(warnings_list) == 0
cdeler marked this conversation as resolved.
Show resolved Hide resolved


@pytest.mark.anyio
async def test_http_request_local_address(backend: str, server: Server) -> None:
if backend == "auto" and lookup_async_backend() == "trio":
Expand Down
30 changes: 30 additions & 0 deletions tests/sync_tests/test_interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,36 @@ def test_http_proxy(
assert ext == {"http_version": "HTTP/1.1", "reason": "OK"}


@pytest.mark.parametrize("proxy_mode", ["DEFAULT", "FORWARD_ONLY", "TUNNEL_ONLY"])
@pytest.mark.parametrize("protocol,port", [(b"http", 80), (b"https", 443)])

def test_proxy_socket_does_not_leak_when_the_connection_hasnt_been_added_to_pool(
proxy_server: URL,
server: Server,
proxy_mode: str,
protocol: bytes,
port: int,
):
method = b"GET"
url = (protocol, b"example.com", port, b"/")
headers = [(b"host", b"example.org")]

with pytest.warns(None) as recorded_warnings:
with httpcore.SyncHTTPProxy(proxy_server, proxy_mode=proxy_mode) as http:
for _ in range(100):
try:
_ = http.request(method, url, headers)
except httpcore.RemoteProtocolError:
pass

# have to filter out https://github.com/encode/httpx/issues/825 from other tests
warnings_list = [
*filter(lambda warn: "asyncio" not in warn.filename, recorded_warnings.list)
]

assert len(warnings_list) == 0



def test_http_request_local_address(backend: str, server: Server) -> None:
if backend == "sync" and lookup_sync_backend() == "trio":
Expand Down
40 changes: 31 additions & 9 deletions tests/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import contextlib
import socket
import subprocess
import tempfile
import time
from typing import Tuple
from typing import List, Tuple

import sniffio

Expand Down Expand Up @@ -48,13 +49,34 @@ def host_header(self) -> Tuple[bytes, bytes]:
def http_proxy_server(proxy_host: str, proxy_port: int):

proc = None
try:
command = ["pproxy", "-l", f"http://{proxy_host}:{proxy_port}/"]
proc = subprocess.Popen(command)

_wait_can_connect(proxy_host, proxy_port)
with create_proxy_block_file(["example.com"]) as block_file_name:
try:
command = [
"pproxy",
"-b",
block_file_name,
"-l",
f"http://{proxy_host}:{proxy_port}/",
]
proc = subprocess.Popen(command)

_wait_can_connect(proxy_host, proxy_port)

yield b"http", proxy_host.encode(), proxy_port, b"/"
finally:
if proc is not None:
proc.kill()


@contextlib.contextmanager
def create_proxy_block_file(blocked_domains: List[str]):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd suggest we add a small docstring here with any relevant docs material from pproxy (eg a link to relevant docs) describing what a "block file" is and what effect it has?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added some docs to http_proxy_server and create_proxy_block_file

with tempfile.NamedTemporaryFile(delete=True, mode="w+") as file:

for domain in blocked_domains:
file.write(domain)
file.write("\n")

file.flush()

yield b"http", proxy_host.encode(), proxy_port, b"/"
finally:
if proc is not None:
proc.kill()
yield file.name