From c1443894ba50358a84928c364b24bea4f3906b43 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 May 2019 14:45:51 +0100 Subject: [PATCH 1/5] Correctly fitler out extremities with soft failed prevs When we receive a soft failed event we, correctly, *do not* update the forward extremity table with the event. However, if we later receive an event that references the soft failed event we then need to remove the soft failed events prev events from the forward extremities table, otherwise we just build up forward extremities. Fixes #5269 --- synapse/storage/events.py | 69 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 2 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 2ffc27ff4188..55f72417acef 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -558,6 +558,11 @@ def _calculate_new_extremities(self, room_id, event_contexts, latest_event_ids): existing_prevs = yield self._get_events_which_are_prevs(result) result.difference_update(existing_prevs) + existing_prevs = yield self._get_prevs_before_rejected( + e_id for event in new_events for e_id in event.prev_event_ids() + ) + result.difference_update(existing_prevs) + defer.returnValue(result) @defer.inlineCallbacks @@ -573,7 +578,7 @@ def _get_events_which_are_prevs(self, event_ids): """ results = [] - def _get_events(txn, batch): + def _get_events_which_are_prevs_txn(txn, batch): sql = """ SELECT prev_event_id, internal_metadata FROM event_edges @@ -596,10 +601,70 @@ def _get_events(txn, batch): ) for chunk in batch_iter(event_ids, 100): - yield self.runInteraction("_get_events_which_are_prevs", _get_events, chunk) + yield self.runInteraction( + "_get_events_which_are_prevs", _get_events_which_are_prevs_txn, + chunk, + ) defer.returnValue(results) + @defer.inlineCallbacks + def _get_prevs_before_rejected(self, event_ids): + """Given a set of events recursively find all prev events that have + been soft-failed or rejected. Then return those soft failed events + and their prev events. + + This is used to find extremities that are ancestors of new events, but + are separated by soft failed events. + + Args: + event_ids (Iterable[str]): Events to find prev events for. Note + that these must have already been persisted. + + Returns: + Deferred[set[str]] + """ + existing_prevs = set() + + def _get_prevs_before_rejected_txn(txn, batch): + to_recursively_check = batch + + while to_recursively_check: + sql = """ + SELECT + event_id, prev_event_id, internal_metadata, + rejections.event_id IS NOT NULL + FROM event_edges + INNER JOIN events USING (event_id) + LEFT JOIN rejections USING (event_id) + LEFT JOIN event_json USING (event_id) + WHERE + event_id IN (%s) + AND NOT events.outlier + """ % ( + ",".join("?" for _ in to_recursively_check), + ) + + txn.execute(sql, to_recursively_check) + to_recursively_check = [] + + for event_id, prev_event_id, metadata, rejected in txn: + if prev_event_id in existing_prevs: + continue + + soft_failed = json.loads(metadata).get("soft_failed") + if soft_failed or rejected: + to_recursively_check.append(prev_event_id) + existing_prevs.add(prev_event_id) + + for chunk in batch_iter(event_ids, 100): + yield self.runInteraction( + "_get_prevs_before_rejected", _get_prevs_before_rejected_txn, + chunk, + ) + + defer.returnValue(existing_prevs) + @defer.inlineCallbacks def _get_new_state_after_events( self, room_id, events_context, old_latest_event_ids, new_latest_event_ids From 18e35f518bf122a443e9c8e21fbaaec0c493ab89 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 May 2019 14:51:15 +0100 Subject: [PATCH 2/5] Newsfile --- changelog.d/5274.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5274.bugfix diff --git a/changelog.d/5274.bugfix b/changelog.d/5274.bugfix new file mode 100644 index 000000000000..9e14d20289f8 --- /dev/null +++ b/changelog.d/5274.bugfix @@ -0,0 +1 @@ +Fix bug where we leaked extremities when we soft failed events, leading to performance degradation. From 06b5360e434a8dfa33568d50cb331ddba1ef2f29 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 May 2019 15:36:45 +0100 Subject: [PATCH 3/5] Update synapse/storage/events.py Co-Authored-By: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/storage/events.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 55f72417acef..37595f6de660 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -602,7 +602,8 @@ def _get_events_which_are_prevs_txn(txn, batch): for chunk in batch_iter(event_ids, 100): yield self.runInteraction( - "_get_events_which_are_prevs", _get_events_which_are_prevs_txn, + "_get_events_which_are_prevs", + _get_events_which_are_prevs_txn, chunk, ) From 990a0c8eebf0ea84875e7f238f71e8e0e3d85293 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 28 May 2019 15:43:31 +0100 Subject: [PATCH 4/5] Fix up comments --- synapse/storage/events.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 37595f6de660..3b63e15bdfcd 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -554,10 +554,13 @@ def _calculate_new_extremities(self, room_id, event_contexts, latest_event_ids): e_id for event in new_events for e_id in event.prev_event_ids() ) - # Finally, remove any events which are prev_events of any existing events. + # Remove any events which are prev_events of any existing events. existing_prevs = yield self._get_events_which_are_prevs(result) result.difference_update(existing_prevs) + # Finally handle the case where the new events have soft-failed prev + # events. If they do we need to remove them and their prev events, + # otherwise we end up with dangling extremities. existing_prevs = yield self._get_prevs_before_rejected( e_id for event in new_events for e_id in event.prev_event_ids() ) @@ -602,7 +605,7 @@ def _get_events_which_are_prevs_txn(txn, batch): for chunk in batch_iter(event_ids, 100): yield self.runInteraction( - "_get_events_which_are_prevs", + "_get_events_which_are_prevs", _get_events_which_are_prevs_txn, chunk, ) @@ -611,7 +614,9 @@ def _get_events_which_are_prevs_txn(txn, batch): @defer.inlineCallbacks def _get_prevs_before_rejected(self, event_ids): - """Given a set of events recursively find all prev events that have + """Get soft-failed ancestors to remove from the extremities. + + Given a set of events recursively find all prev events that have been soft-failed or rejected. Then return those soft failed events and their prev events. @@ -625,6 +630,9 @@ def _get_prevs_before_rejected(self, event_ids): Returns: Deferred[set[str]] """ + + # The set of event_ids to return. This includes all soft-failed events + # and their prev events. existing_prevs = set() def _get_prevs_before_rejected_txn(txn, batch): @@ -660,7 +668,8 @@ def _get_prevs_before_rejected_txn(txn, batch): for chunk in batch_iter(event_ids, 100): yield self.runInteraction( - "_get_prevs_before_rejected", _get_prevs_before_rejected_txn, + "_get_prevs_before_rejected", + _get_prevs_before_rejected_txn, chunk, ) From 12fbcd16f9c4369ef9d32d1b95089a7e44585aa9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 29 May 2019 09:26:50 +0100 Subject: [PATCH 5/5] Update comments --- synapse/storage/events.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 3b63e15bdfcd..6e9f3d1dc0b4 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -616,9 +616,10 @@ def _get_events_which_are_prevs_txn(txn, batch): def _get_prevs_before_rejected(self, event_ids): """Get soft-failed ancestors to remove from the extremities. - Given a set of events recursively find all prev events that have - been soft-failed or rejected. Then return those soft failed events - and their prev events. + Given a set of events, find all those that have been soft-failed or + rejected. Returns those soft failed/rejected events and their prev + events (whether soft-failed/rejected or not), and recurses up the + prev-event graph until it finds no more soft-failed/rejected events. This is used to find extremities that are ancestors of new events, but are separated by soft failed events.