Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Correctly filter out extremities with soft failed prevs #5274

Merged
merged 5 commits into from
May 29, 2019
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/5274.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix bug where we leaked extremities when we soft failed events, leading to performance degradation.
81 changes: 78 additions & 3 deletions synapse/storage/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,10 +554,18 @@ def _calculate_new_extremities(self, room_id, event_contexts, latest_event_ids):
e_id for event in new_events for e_id in event.prev_event_ids()
)

# Finally, remove any events which are prev_events of any existing events.
# Remove any events which are prev_events of any existing events.
existing_prevs = yield self._get_events_which_are_prevs(result)
result.difference_update(existing_prevs)

# Finally handle the case where the new events have soft-failed prev
# events. If they do we need to remove them and their prev events,
# otherwise we end up with dangling extremities.
existing_prevs = yield self._get_prevs_before_rejected(
erikjohnston marked this conversation as resolved.
Show resolved Hide resolved
e_id for event in new_events for e_id in event.prev_event_ids()
)
result.difference_update(existing_prevs)

defer.returnValue(result)

@defer.inlineCallbacks
Expand All @@ -573,7 +581,7 @@ def _get_events_which_are_prevs(self, event_ids):
"""
results = []

def _get_events(txn, batch):
def _get_events_which_are_prevs_txn(txn, batch):
sql = """
SELECT prev_event_id, internal_metadata
FROM event_edges
Expand All @@ -596,10 +604,77 @@ def _get_events(txn, batch):
)

for chunk in batch_iter(event_ids, 100):
yield self.runInteraction("_get_events_which_are_prevs", _get_events, chunk)
yield self.runInteraction(
"_get_events_which_are_prevs",
_get_events_which_are_prevs_txn,
chunk,
)

defer.returnValue(results)

@defer.inlineCallbacks
def _get_prevs_before_rejected(self, event_ids):
"""Get soft-failed ancestors to remove from the extremities.

Given a set of events recursively find all prev events that have
erikjohnston marked this conversation as resolved.
Show resolved Hide resolved
been soft-failed or rejected. Then return those soft failed events
and their prev events.
erikjohnston marked this conversation as resolved.
Show resolved Hide resolved

This is used to find extremities that are ancestors of new events, but
are separated by soft failed events.

Args:
event_ids (Iterable[str]): Events to find prev events for. Note
that these must have already been persisted.

Returns:
Deferred[set[str]]
"""

# The set of event_ids to return. This includes all soft-failed events
# and their prev events.
existing_prevs = set()
erikjohnston marked this conversation as resolved.
Show resolved Hide resolved

def _get_prevs_before_rejected_txn(txn, batch):
to_recursively_check = batch

while to_recursively_check:
sql = """
SELECT
event_id, prev_event_id, internal_metadata,
rejections.event_id IS NOT NULL
FROM event_edges
INNER JOIN events USING (event_id)
LEFT JOIN rejections USING (event_id)
LEFT JOIN event_json USING (event_id)
WHERE
event_id IN (%s)
AND NOT events.outlier
""" % (
",".join("?" for _ in to_recursively_check),
)

txn.execute(sql, to_recursively_check)
to_recursively_check = []

for event_id, prev_event_id, metadata, rejected in txn:
if prev_event_id in existing_prevs:
continue

soft_failed = json.loads(metadata).get("soft_failed")
if soft_failed or rejected:
to_recursively_check.append(prev_event_id)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

surely we should only do this if the event is not in existing_prevs, to save duplication

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a check above if prev_event_id in existing_prevs?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

<- idiot

existing_prevs.add(prev_event_id)

for chunk in batch_iter(event_ids, 100):
yield self.runInteraction(
"_get_prevs_before_rejected",
_get_prevs_before_rejected_txn,
chunk,
)

defer.returnValue(existing_prevs)

@defer.inlineCallbacks
def _get_new_state_after_events(
self, room_id, events_context, old_latest_event_ids, new_latest_event_ids
Expand Down