From 63085a013f653e46e9ddd3bd7db3a711190bb198 Mon Sep 17 00:00:00 2001 From: Nicolas Werner Date: Tue, 22 Aug 2023 14:12:28 +0200 Subject: [PATCH 1/4] Properly update retry_last_ts when hitting the maximum retry interval This was broken in 1.87 when the maximum retry interval got changed from almost infinite to a week (and made configurable). fixes #16101 Signed-off-by: Nicolas Werner --- synapse/storage/databases/main/transactions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py index c3bd36efc9c2..84ae428333f0 100644 --- a/synapse/storage/databases/main/transactions.py +++ b/synapse/storage/databases/main/transactions.py @@ -258,6 +258,7 @@ def _set_destination_retry_timings_txn( EXCLUDED.retry_interval = 0 OR destinations.retry_interval IS NULL OR destinations.retry_interval < EXCLUDED.retry_interval + OR destinations.retry_last_ts > EXCLUDED.retry_last_ts """ txn.execute(sql, (destination, failure_ts, retry_last_ts, retry_interval)) From 42b3d915130d395cd4e50f7970571e0418c718ca Mon Sep 17 00:00:00 2001 From: Nicolas Werner Date: Tue, 22 Aug 2023 14:17:25 +0200 Subject: [PATCH 2/4] Add changelog --- changelog.d/16156.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/16156.bugfix diff --git a/changelog.d/16156.bugfix b/changelog.d/16156.bugfix new file mode 100644 index 000000000000..17284297cfae --- /dev/null +++ b/changelog.d/16156.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in 1.87 where synapse would send an excessive amount of federation requests to servers which have been offline for a long time. Contributed by Nico. From b73c5dcf3702177bc9b71d6af9f2e24b0b2d0504 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Wed, 23 Aug 2023 09:35:38 +0200 Subject: [PATCH 3/4] Change fix + add test --- .../storage/databases/main/transactions.py | 3 +- tests/util/test_retryutils.py | 51 +++++++++++++++++++ 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py index 84ae428333f0..c84209e8bc31 100644 --- a/synapse/storage/databases/main/transactions.py +++ b/synapse/storage/databases/main/transactions.py @@ -257,8 +257,7 @@ def _set_destination_retry_timings_txn( WHERE EXCLUDED.retry_interval = 0 OR destinations.retry_interval IS NULL - OR destinations.retry_interval < EXCLUDED.retry_interval - OR destinations.retry_last_ts > EXCLUDED.retry_last_ts + OR destinations.retry_interval <= EXCLUDED.retry_interval """ txn.execute(sql, (destination, failure_ts, retry_last_ts, retry_interval)) diff --git a/tests/util/test_retryutils.py b/tests/util/test_retryutils.py index 1277e1a865ff..4bcd17a6fc93 100644 --- a/tests/util/test_retryutils.py +++ b/tests/util/test_retryutils.py @@ -108,3 +108,54 @@ def test_limiter(self) -> None: new_timings = self.get_success(store.get_destination_retry_timings("test_dest")) self.assertIsNone(new_timings) + + def test_max_retry_interval(self) -> None: + """Test that `destination_max_retry_interval` setting works as expected""" + store = self.hs.get_datastores().main + + destination_max_retry_interval_ms = ( + self.hs.config.federation.destination_max_retry_interval_ms + ) + + self.get_success(get_retry_limiter("test_dest", self.clock, store)) + self.pump(1) + + failure_ts = self.clock.time_msec() + + # Simulate reaching destination_max_retry_interval + self.get_success( + store.set_destination_retry_timings( + "test_dest", + failure_ts=failure_ts, + retry_last_ts=failure_ts, + retry_interval=destination_max_retry_interval_ms, + ) + ) + + # Check it fails + self.get_failure( + get_retry_limiter("test_dest", self.clock, store), NotRetryingDestination + ) + + # Get past retry_interval and we can try again, and still throw an error to continue the backoff + self.reactor.advance(destination_max_retry_interval_ms / 1000 + 1) + limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store)) + self.pump(1) + try: + with limiter: + self.pump(1) + raise AssertionError("argh") + except AssertionError: + pass + + self.pump() + + # retry_interval does not increase and stays at destination_max_retry_interval_ms + new_timings = self.get_success(store.get_destination_retry_timings("test_dest")) + assert new_timings is not None + self.assertEqual(new_timings.retry_interval, destination_max_retry_interval_ms) + + # Check it fails + self.get_failure( + get_retry_limiter("test_dest", self.clock, store), NotRetryingDestination + ) From ec5a7563b679670be8231d78e8ae62bd5465b3e6 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Wed, 23 Aug 2023 09:40:46 +0200 Subject: [PATCH 4/4] Add comment --- synapse/storage/databases/main/transactions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py index c84209e8bc31..48e4b0ba3c73 100644 --- a/synapse/storage/databases/main/transactions.py +++ b/synapse/storage/databases/main/transactions.py @@ -242,6 +242,8 @@ def _set_destination_retry_timings_txn( ) -> None: # Upsert retry time interval if retry_interval is zero (i.e. we're # resetting it) or greater than the existing retry interval. + # We also upsert when the new retry interval is the same as the existing one, + # since it will be the case when `destination_max_retry_interval` is reached. # # WARNING: This is executed in autocommit, so we shouldn't add any more # SQL calls in here (without being very careful).