From a2c1721f10eba8fea3e766ac004f722280629f8e Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 30 Sep 2024 21:40:19 -0500 Subject: [PATCH 1/4] Speed up the ConnectionKey A large part of the connection management is constructing, hashing and comparing the connection key For additional details see #9364 To speed this up, ConnectionKey is now a NamedTuple which allows all the __hash__ and __eq__ operations to happen in native code. https://github.com/python/cpython/blob/133e929a791d209b578b4822a7a07f4570b3803b/Objects/tupleobject.c#L319 The construction of the object is a bit faster by calling the yarl URL methods directly --- aiohttp/client_reqrep.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/aiohttp/client_reqrep.py b/aiohttp/client_reqrep.py index d9a7d1cd049..74c0e675760 100644 --- a/aiohttp/client_reqrep.py +++ b/aiohttp/client_reqrep.py @@ -19,6 +19,7 @@ Iterable, List, Mapping, + NamedTuple, Optional, Tuple, Type, @@ -150,11 +151,13 @@ def check(self, transport: asyncio.Transport) -> None: SSL_ALLOWED_TYPES = (bool,) -@dataclasses.dataclass(frozen=True) -class ConnectionKey: +_SSL_SCHEMES = frozenset(("https", "wss")) + + +class ConnectionKey(NamedTuple): # the key should contain an information about used proxy / TLS # to prevent reusing wrong connections from a pool - host: str + host: Optional[str] port: Optional[int] is_ssl: bool ssl: Union[SSLContext, bool, Fingerprint] @@ -287,7 +290,7 @@ def _writer(self, writer: Optional["asyncio.Task[None]"]) -> None: writer.add_done_callback(self.__reset_writer) def is_ssl(self) -> bool: - return self.url.scheme in ("https", "wss") + return self.url.scheme in _SSL_SCHEMES @property def ssl(self) -> Union["SSLContext", bool, Fingerprint]: @@ -295,16 +298,16 @@ def ssl(self) -> Union["SSLContext", bool, Fingerprint]: @property def connection_key(self) -> ConnectionKey: - proxy_headers = self.proxy_headers - if proxy_headers: + if proxy_headers := self.proxy_headers: h: Optional[int] = hash(tuple(proxy_headers.items())) else: h = None + url = self.url return ConnectionKey( - self.host, - self.port, - self.is_ssl(), - self.ssl, + url.raw_host, + url.port, + url.scheme in _SSL_SCHEMES, + self._ssl, self.proxy, self.proxy_auth, h, From 18f76e19101583dee3aa479ee43e19e5d818e871 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 30 Sep 2024 21:44:20 -0500 Subject: [PATCH 2/4] fixes --- aiohttp/client_reqrep.py | 4 ++-- aiohttp/connector.py | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/aiohttp/client_reqrep.py b/aiohttp/client_reqrep.py index 74c0e675760..6aeb59da83e 100644 --- a/aiohttp/client_reqrep.py +++ b/aiohttp/client_reqrep.py @@ -157,7 +157,7 @@ def check(self, transport: asyncio.Transport) -> None: class ConnectionKey(NamedTuple): # the key should contain an information about used proxy / TLS # to prevent reusing wrong connections from a pool - host: Optional[str] + host: str port: Optional[int] is_ssl: bool ssl: Union[SSLContext, bool, Fingerprint] @@ -304,7 +304,7 @@ def connection_key(self) -> ConnectionKey: h = None url = self.url return ConnectionKey( - url.raw_host, + url.raw_host or "", url.port, url.scheme in _SSL_SCHEMES, self._ssl, diff --git a/aiohttp/connector.py b/aiohttp/connector.py index 45f766b0fcc..1237f3c74d4 100644 --- a/aiohttp/connector.py +++ b/aiohttp/connector.py @@ -1,5 +1,4 @@ import asyncio -import dataclasses import functools import logging import random @@ -1312,8 +1311,8 @@ async def _create_proxy_connection( # asyncio handles this perfectly proxy_req.method = hdrs.METH_CONNECT proxy_req.url = req.url - key = dataclasses.replace( - req.connection_key, proxy=None, proxy_auth=None, proxy_headers_hash=None + key = req.connection_key._replace( + proxy=None, proxy_auth=None, proxy_headers_hash=None ) conn = Connection(self, key, proto, self._loop) proxy_resp = await proxy_req.send(conn) From 0bb5281ee1cdb44bac09fdb586cc55bd855fd779 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 30 Sep 2024 21:47:18 -0500 Subject: [PATCH 3/4] changelog --- CHANGES/9365.breaking.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 CHANGES/9365.breaking.rst diff --git a/CHANGES/9365.breaking.rst b/CHANGES/9365.breaking.rst new file mode 100644 index 00000000000..f0224170f07 --- /dev/null +++ b/CHANGES/9365.breaking.rst @@ -0,0 +1 @@ +Changed ``ClientRequest.connection_key`` to be a `NamedTuple` to improve client performance -- by :user:`bdraco`. From 1457337daa041c5d5e4b01a75b48c9ab0f160448 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Tue, 1 Oct 2024 10:47:16 -0500 Subject: [PATCH 4/4] comment --- aiohttp/client_reqrep.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/aiohttp/client_reqrep.py b/aiohttp/client_reqrep.py index 6aeb59da83e..4da944d38dd 100644 --- a/aiohttp/client_reqrep.py +++ b/aiohttp/client_reqrep.py @@ -154,6 +154,9 @@ def check(self, transport: asyncio.Transport) -> None: _SSL_SCHEMES = frozenset(("https", "wss")) +# ConnectionKey is a NamedTuple because it is used as a key in a dict +# and a set in the connector. Since a NamedTuple is a tuple it uses +# the fast native tuple __hash__ and __eq__ implementation in CPython. class ConnectionKey(NamedTuple): # the key should contain an information about used proxy / TLS # to prevent reusing wrong connections from a pool