From f78a456e797a432aeb86cfd09d505ac8dddcede1 Mon Sep 17 00:00:00 2001 From: Jason Little Date: Wed, 12 Apr 2023 19:27:02 -0500 Subject: [PATCH 1/5] Factor the methods and basic setup out of SimpleHttpClient to allow a reusable base class. --- synapse/http/client.py | 129 ++++++++++++++++++++++++----------------- 1 file changed, 77 insertions(+), 52 deletions(-) diff --git a/synapse/http/client.py b/synapse/http/client.py index b5cf8123ce84..014046f5d09e 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -312,35 +312,24 @@ def request( ) -class SimpleHttpClient: +class BaseSynapseClient: """ A simple, no-frills HTTP client with methods that wrap up common ways of using HTTP in Matrix + + Args: + hs: The HomeServer instance to pass in + treq_args: Extra keyword arguments to be given to treq.request. """ def __init__( self, hs: "HomeServer", treq_args: Optional[Dict[str, Any]] = None, - ip_whitelist: Optional[IPSet] = None, - ip_blacklist: Optional[IPSet] = None, - use_proxy: bool = False, ): - """ - Args: - hs - treq_args: Extra keyword arguments to be given to treq.request. - ip_blacklist: The IP addresses that are blacklisted that - we may not request. - ip_whitelist: The whitelisted IP addresses, that we can - request if it were otherwise caught in a blacklist. - use_proxy: Whether proxy settings should be discovered and used - from conventional environment variables. - """ self.hs = hs + self.reactor = hs.get_reactor() - self._ip_whitelist = ip_whitelist - self._ip_blacklist = ip_blacklist self._extra_treq_args = treq_args or {} self.clock = hs.get_clock() @@ -356,44 +345,11 @@ def __init__( # reactor. self._cooperator = Cooperator(scheduler=_make_scheduler(hs.get_reactor())) - if self._ip_blacklist: - # If we have an IP blacklist, we need to use a DNS resolver which - # filters out blacklisted IP addresses, to prevent DNS rebinding. - self.reactor: ISynapseReactor = BlacklistingReactorWrapper( - hs.get_reactor(), self._ip_whitelist, self._ip_blacklist - ) - else: - self.reactor = hs.get_reactor() - - # the pusher makes lots of concurrent SSL connections to sygnal, and - # tends to do so in batches, so we need to allow the pool to keep - # lots of idle connections around. - pool = HTTPConnectionPool(self.reactor) - # XXX: The justification for using the cache factor here is that larger instances - # will need both more cache and more connections. - # Still, this should probably be a separate dial - pool.maxPersistentPerHost = max(int(100 * hs.config.caches.global_factor), 5) - pool.cachedConnectionTimeout = 2 * 60 - - self.agent: IAgent = ProxyAgent( + self.agent: IAgent = Agent( self.reactor, - hs.get_reactor(), - connectTimeout=15, - contextFactory=self.hs.get_http_client_context_factory(), - pool=pool, - use_proxy=use_proxy, + contextFactory=hs.get_http_client_context_factory(), ) - if self._ip_blacklist: - # If we have an IP blacklist, we then install the blacklisting Agent - # which prevents direct access to IP addresses, that are not caught - # by the DNS resolution. - self.agent = BlacklistingAgentWrapper( - self.agent, - ip_blacklist=self._ip_blacklist, - ip_whitelist=self._ip_whitelist, - ) - async def request( self, method: str, @@ -799,6 +755,75 @@ async def get_file( ) +class SimpleHttpClient(BaseSynapseClient): + """ + An HTTP client capable of crossing a proxy and respecting a block/allow list. This + does set up a HTTPConnectionPool based on a multiplier of 100 times + hs.config.caches.global_factor, as it is responsible for pushing to Sygnal. + + Args: + hs: The HomeServer instance to pass in + treq_args: Extra keyword arguments to be given to treq.request. + ip_blacklist: The IP addresses that are blacklisted that + we may not request. + ip_whitelist: The whitelisted IP addresses, that we can + request if it were otherwise caught in a blacklist. + use_proxy: Whether proxy settings should be discovered and used + from conventional environment variables. + """ + + def __init__( + self, + hs: "HomeServer", + treq_args: Optional[Dict[str, Any]] = None, + ip_whitelist: Optional[IPSet] = None, + ip_blacklist: Optional[IPSet] = None, + use_proxy: bool = False, + ): + super().__init__(hs, treq_args=treq_args) + self._ip_whitelist = ip_whitelist + self._ip_blacklist = ip_blacklist + + if self._ip_blacklist: + # If we have an IP blacklist, we need to use a DNS resolver which + # filters out blacklisted IP addresses, to prevent DNS rebinding. + self.reactor: ISynapseReactor = BlacklistingReactorWrapper( + hs.get_reactor(), self._ip_whitelist, self._ip_blacklist + ) + else: + # This should have already been set up in the call to super(). Make sure. + self.reactor = hs.get_reactor() + + # the pusher makes lots of concurrent SSL connections to Sygnal, and tends to + # do so in batches, so we need to allow the pool to keep lots of idle + # connections around. + pool = HTTPConnectionPool(self.reactor) + # XXX: The justification for using the cache factor here is that larger + # instances will need both more cache and more connections. + # Still, this should probably be a separate dial + pool.maxPersistentPerHost = max(int(100 * hs.config.caches.global_factor), 5) + pool.cachedConnectionTimeout = 2 * 60 + + self.agent: IAgent = ProxyAgent( + self.reactor, + hs.get_reactor(), + connectTimeout=15, + contextFactory=self.hs.get_http_client_context_factory(), + pool=pool, + use_proxy=use_proxy, + ) + + if self._ip_blacklist: + # If we have an IP blacklist, we then install the blacklisting Agent + # which prevents direct access to IP addresses, that are not caught + # by the DNS resolution. + self.agent = BlacklistingAgentWrapper( + self.agent, + ip_blacklist=self._ip_blacklist, + ip_whitelist=self._ip_whitelist, + ) + + def _timeout_to_request_timed_out_error(f: Failure) -> Failure: if f.check(twisted_error.TimeoutError, twisted_error.ConnectingCancelledError): # The TCP connection has its own timeout (set by the 'connectTimeout' param From f04879ca34494a30c887aca65dc0f2a0a57d16db Mon Sep 17 00:00:00 2001 From: Jason Little Date: Wed, 12 Apr 2023 19:54:05 -0500 Subject: [PATCH 2/5] Changelog --- changelog.d/15427.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/15427.misc diff --git a/changelog.d/15427.misc b/changelog.d/15427.misc new file mode 100644 index 000000000000..ef873e3b2b49 --- /dev/null +++ b/changelog.d/15427.misc @@ -0,0 +1 @@ +Refactor `SimpleHttpClient` to pull out a base class. From 4eec2e7138375d7825e8b00ad523dfd4b1f78d30 Mon Sep 17 00:00:00 2001 From: Jason Little Date: Thu, 13 Apr 2023 15:22:57 -0500 Subject: [PATCH 3/5] Apply suggestions from code review Co-authored-by: Patrick Cloke --- synapse/http/client.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/synapse/http/client.py b/synapse/http/client.py index 014046f5d09e..a969f4f40929 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -312,7 +312,7 @@ def request( ) -class BaseSynapseClient: +class BaseHttpClient: """ A simple, no-frills HTTP client with methods that wrap up common ways of using HTTP in Matrix @@ -757,9 +757,9 @@ async def get_file( class SimpleHttpClient(BaseSynapseClient): """ - An HTTP client capable of crossing a proxy and respecting a block/allow list. This - does set up a HTTPConnectionPool based on a multiplier of 100 times - hs.config.caches.global_factor, as it is responsible for pushing to Sygnal. + An HTTP client capable of crossing a proxy and respecting a block/allow list. + + This also configures a larger / longer lasting HTTP connection pool. Args: hs: The HomeServer instance to pass in @@ -788,11 +788,8 @@ def __init__( # If we have an IP blacklist, we need to use a DNS resolver which # filters out blacklisted IP addresses, to prevent DNS rebinding. self.reactor: ISynapseReactor = BlacklistingReactorWrapper( - hs.get_reactor(), self._ip_whitelist, self._ip_blacklist + self.reactor, self._ip_whitelist, self._ip_blacklist ) - else: - # This should have already been set up in the call to super(). Make sure. - self.reactor = hs.get_reactor() # the pusher makes lots of concurrent SSL connections to Sygnal, and tends to # do so in batches, so we need to allow the pool to keep lots of idle From a3bdb1f01032225dfb51db4d2a5acc1a1c71d993 Mon Sep 17 00:00:00 2001 From: Jason Little Date: Thu, 13 Apr 2023 15:28:27 -0500 Subject: [PATCH 4/5] Follow up fix to rename `BaseSynapseClient` to `BaseHttpClient` --- synapse/http/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/http/client.py b/synapse/http/client.py index a969f4f40929..fbb6cd2bc218 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -755,7 +755,7 @@ async def get_file( ) -class SimpleHttpClient(BaseSynapseClient): +class SimpleHttpClient(BaseHttpClient): """ An HTTP client capable of crossing a proxy and respecting a block/allow list. From 445b6e3421a9b6b9971a86b2d2f9e9db2f4741de Mon Sep 17 00:00:00 2001 From: Jason Little Date: Fri, 14 Apr 2023 15:05:49 -0500 Subject: [PATCH 5/5] From review: remove default Agent and add to docstring 'BYOA'. --- synapse/http/client.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/synapse/http/client.py b/synapse/http/client.py index fbb6cd2bc218..91fe474f36d9 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -315,13 +315,16 @@ def request( class BaseHttpClient: """ A simple, no-frills HTTP client with methods that wrap up common ways of - using HTTP in Matrix + using HTTP in Matrix. Does not come with a default Agent, subclasses will need to + define their own. Args: hs: The HomeServer instance to pass in treq_args: Extra keyword arguments to be given to treq.request. """ + agent: IAgent + def __init__( self, hs: "HomeServer", @@ -345,11 +348,6 @@ def __init__( # reactor. self._cooperator = Cooperator(scheduler=_make_scheduler(hs.get_reactor())) - self.agent: IAgent = Agent( - self.reactor, - contextFactory=hs.get_http_client_context_factory(), - ) - async def request( self, method: str,