Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Implement the lazy_load_members room state filter parameter #2970

Merged
merged 58 commits into from
Jul 25, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
9b334b3
WIP experiment in lazyloading room members
ara4n Mar 11, 2018
8713365
typos
ara4n Mar 11, 2018
97c0496
fix sqlite where clause
ara4n Mar 12, 2018
fdedcd1
correctly handle None state_keys
ara4n Mar 12, 2018
1b1c137
fix bug #2926
ara4n Mar 13, 2018
52f7e23
PR feedbackz
ara4n Mar 13, 2018
b2aba9e
build where_clause sanely
ara4n Mar 13, 2018
865377a
disable optimisation for searching for state groups
ara4n Mar 13, 2018
afbf4d3
typoe
ara4n Mar 13, 2018
14a9d2f
ensure we always include the members for a given timeline block
ara4n Mar 13, 2018
12350e3
merge proper fix to bug 2969
ara4n Mar 13, 2018
f0f9a06
remove comment now #2969 is fixed
ara4n Mar 13, 2018
ccca028
make it work
ara4n Mar 13, 2018
c9d72e4
oops
ara4n Mar 13, 2018
4d0cfef
add copyright to nudge CI
ara4n Mar 14, 2018
9f77001
pep8
ara4n Mar 14, 2018
056a6df
Merge branch 'develop' into matthew/filter_members
ara4n Mar 14, 2018
3bc5bd2
make incr syncs work
ara4n Mar 16, 2018
bf49d2d
Replace some ujson with simplejson to make it work
erikjohnston Mar 15, 2018
5b3b3aa
simplify timeline_start_members
ara4n Mar 16, 2018
f7dcc40
add state_ids for timeline entries
ara4n Mar 16, 2018
4f0493c
fix tsm search again
ara4n Mar 16, 2018
fc5397f
remove debug
ara4n Mar 16, 2018
0b56290
remove stale import
ara4n Mar 16, 2018
366f730
only get member state IDs for incremental syncs if we're filtering
ara4n Mar 18, 2018
478af0f
reshuffle todo & comments
ara4n Mar 19, 2018
b2f2282
make lazy_load_members configurable in filters
ara4n Mar 19, 2018
7a6df01
merge develop
ara4n May 28, 2018
a6c8f7c
add pydoc
ara4n May 29, 2018
5e6b31f
fix dumb typo
ara4n May 29, 2018
b69ff33
disable CPUMetrics if no /proc/self/stat
ara4n May 29, 2018
8df7bad
pep8
ara4n May 29, 2018
9bbb9f5
add lazy_load_members to the filter json schema
ara4n May 29, 2018
5f6122f
more comments
ara4n Jun 3, 2018
28f09fc
Merge branch 'develop' into matthew/filter_members
ara4n Jun 3, 2018
c96d882
Merge branch 'develop' into matthew/filter_members
ara4n Jun 10, 2018
be3adfc
merge develop pydoc for _get_state_for_groups
ara4n Jul 19, 2018
924eb34
add a filtered_types param to limit filtering to specific types
ara4n Jul 19, 2018
bcaec29
incorporate review
ara4n Jul 19, 2018
2f55830
fix thinkos; unbreak tests
ara4n Jul 19, 2018
1fa4f7e
first cut of a UT for testing state store (untested)
ara4n Jul 19, 2018
650daf5
make test work
ara4n Jul 19, 2018
254fb43
incorporate review
ara4n Jul 23, 2018
adfe29e
Merge branch 'develop' into matthew/filter_members
ara4n Jul 23, 2018
004a83b
changelog
ara4n Jul 23, 2018
efcdaca
handle case where types is [] on postgres correctly
ara4n Jul 23, 2018
cd241d6
incorporate more review
ara4n Jul 24, 2018
d19fba3
Merge branch 'develop' into matthew/filter_members
ara4n Jul 24, 2018
eb1d911
rather than adding ll_ids, remove them from p_ids
ara4n Jul 24, 2018
e22700c
consider non-filter_type types as wildcards, thus missing from the st…
ara4n Jul 24, 2018
1a01a5b
clarify comment on p_ids
ara4n Jul 24, 2018
454f59b
Merge branch 'develop' into matthew/filter_members
ara4n Jul 24, 2018
cb5c37a
handle the edge case for _get_some_state_from_cache where types is []
ara4n Jul 24, 2018
7d9fb88
incorporate more review.
ara4n Jul 25, 2018
0a7ee0a
add tests for _get_some_state_from_cache
ara4n Jul 25, 2018
0620d27
flake8
ara4n Jul 25, 2018
2565804
Merge branch 'develop' into matthew/filter_members
ara4n Jul 25, 2018
bc7944e
switch missing_types to be a bool
ara4n Jul 25, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/2970.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
add support for the lazy_loaded_members filter as per MSC1227
11 changes: 10 additions & 1 deletion synapse/api/filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,10 @@
},
"contains_url": {
"type": "boolean"
}
},
"lazy_load_members": {
"type": "boolean"
},
}
}

Expand Down Expand Up @@ -261,6 +264,9 @@ def presence_limit(self):
def ephemeral_limit(self):
return self._room_ephemeral_filter.limit()

def lazy_load_members(self):
return self._room_state_filter.lazy_load_members()

def filter_presence(self, events):
return self._presence_filter.filter(events)

Expand Down Expand Up @@ -417,6 +423,9 @@ def filter(self, events):
def limit(self):
return self.filter_json.get("limit", 10)

def lazy_load_members(self):
return self.filter_json.get("lazy_load_members", False)


def _matches_wildcard(actual_value, filter_value):
if filter_value.endswith("*"):
Expand Down
116 changes: 102 additions & 14 deletions synapse/handlers/sync.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015 - 2016 OpenMarket Ltd
# Copyright 2015, 2016 OpenMarket Ltd
# Copyright 2018 New Vector Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -416,29 +417,44 @@ def _load_filtered_recents(self, room_id, sync_config, now_token,
))

@defer.inlineCallbacks
def get_state_after_event(self, event):
def get_state_after_event(self, event, types=None, filtered_types=None):
"""
Get the room state after the given event

Args:
event(synapse.events.EventBase): event of interest
types(list[(str, str|None)]|None): List of (type, state_key) tuples
which are used to filter the state fetched. If `state_key` is None,
all events are returned of the given type.
May be None, which matches any key.
filtered_types(list[str]|None): Only apply filtering via `types` to this
list of event types. Other types of events are returned unfiltered.
If None, `types` filtering is applied to all events.

Returns:
A Deferred map from ((type, state_key)->Event)
"""
state_ids = yield self.store.get_state_ids_for_event(event.event_id)
state_ids = yield self.store.get_state_ids_for_event(
event.event_id, types, filtered_types=filtered_types,
)
if event.is_state():
state_ids = state_ids.copy()
state_ids[(event.type, event.state_key)] = event.event_id
defer.returnValue(state_ids)

@defer.inlineCallbacks
def get_state_at(self, room_id, stream_position):
def get_state_at(self, room_id, stream_position, types=None, filtered_types=None):
""" Get the room state at a particular stream position

Args:
room_id(str): room for which to get state
stream_position(StreamToken): point at which to get state
types(list[(str, str|None)]|None): List of (type, state_key) tuples
which are used to filter the state fetched. If `state_key` is None,
all events are returned of the given type.
filtered_types(list[str]|None): Only apply filtering via `types` to this
list of event types. Other types of events are returned unfiltered.
If None, `types` filtering is applied to all events.

Returns:
A Deferred map from ((type, state_key)->Event)
Expand All @@ -453,7 +469,9 @@ def get_state_at(self, room_id, stream_position):

if last_events:
last_event = last_events[-1]
state = yield self.get_state_after_event(last_event)
state = yield self.get_state_after_event(
last_event, types, filtered_types=filtered_types,
)

else:
# no events in this room - so presumably no state
Expand Down Expand Up @@ -485,18 +503,42 @@ def compute_state_delta(self, room_id, batch, sync_config, since_token, now_toke
# TODO(mjark) Check for new redactions in the state events.

with Measure(self.clock, "compute_state_delta"):

types = None
lazy_load_members = sync_config.filter_collection.lazy_load_members()
filtered_types = None

if lazy_load_members:
# We only request state for the members needed to display the
# timeline:

types = [
(EventTypes.Member, state_key)
for state_key in set(
event.sender # FIXME: we also care about invite targets etc.
for event in batch.events
)
]

# only apply the filtering to room members
filtered_types = [EventTypes.Member]

if full_state:
if batch:
current_state_ids = yield self.store.get_state_ids_for_event(
batch.events[-1].event_id
batch.events[-1].event_id, types=types,
filtered_types=filtered_types,
)

state_ids = yield self.store.get_state_ids_for_event(
batch.events[0].event_id
batch.events[0].event_id, types=types,
filtered_types=filtered_types,
)

else:
current_state_ids = yield self.get_state_at(
room_id, stream_position=now_token
room_id, stream_position=now_token, types=types,
filtered_types=filtered_types,
)

state_ids = current_state_ids
Expand All @@ -511,33 +553,58 @@ def compute_state_delta(self, room_id, batch, sync_config, since_token, now_toke
timeline_start=state_ids,
previous={},
current=current_state_ids,
lazy_load_members=lazy_load_members,
)
elif batch.limited:
state_at_previous_sync = yield self.get_state_at(
room_id, stream_position=since_token
room_id, stream_position=since_token, types=types,
filtered_types=filtered_types,
)

current_state_ids = yield self.store.get_state_ids_for_event(
batch.events[-1].event_id
batch.events[-1].event_id, types=types,
filtered_types=filtered_types,
)

state_at_timeline_start = yield self.store.get_state_ids_for_event(
batch.events[0].event_id
batch.events[0].event_id, types=types,
filtered_types=filtered_types,
)

timeline_state = {
(event.type, event.state_key): event.event_id
for event in batch.events if event.is_state()
}

# TODO: optionally filter out redundant membership events at this
# point, to stop repeatedly sending members in every /sync as if
# the client isn't tracking them.
# When implemented, this should filter using event_ids (not mxids).
# In practice, limited syncs are
# relatively rare so it's not a total disaster to send redundant
# members down at this point. Redundant members are ones which
# repeatedly get sent down /sync because we don't know if the client
# is caching them or not.

state_ids = _calculate_state(
timeline_contains=timeline_state,
timeline_start=state_at_timeline_start,
previous=state_at_previous_sync,
current=current_state_ids,
lazy_load_members=lazy_load_members,
)
else:
state_ids = {}
if lazy_load_members:
# TODO: filter out redundant members based on their mxids (not their
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

confused. What is a redundant member? Why do they have event_ids? are we currently filtering them by event_ids, or is this a note to remind us not to mistakenly filter by event_ids in the future?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

have added a comment to answer the above.

# event_ids) at this point. We know we can do it based on mxid as this
# is an non-gappy incremental sync.

if types:
state_ids = yield self.store.get_state_ids_for_event(
batch.events[0].event_id, types=types,
filtered_types=filtered_types,
)

state = {}
if state_ids:
Expand Down Expand Up @@ -1448,7 +1515,9 @@ def _action_has_highlight(actions):
return False


def _calculate_state(timeline_contains, timeline_start, previous, current):
def _calculate_state(
timeline_contains, timeline_start, previous, current, lazy_load_members,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can haz docstring for lazy_load_members pls

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done.

):
"""Works out what state to include in a sync response.

Args:
Expand All @@ -1457,6 +1526,9 @@ def _calculate_state(timeline_contains, timeline_start, previous, current):
previous (dict): state at the end of the previous sync (or empty dict
if this is an initial sync)
current (dict): state at the end of the timeline
lazy_load_members (bool): whether to return members from timeline_start
or not. assumes that timeline_start has already been filtered to
include only the members the client needs to know about.

Returns:
dict
Expand All @@ -1472,9 +1544,25 @@ def _calculate_state(timeline_contains, timeline_start, previous, current):
}

c_ids = set(e for e in current.values())
tc_ids = set(e for e in timeline_contains.values())
p_ids = set(e for e in previous.values())
ts_ids = set(e for e in timeline_start.values())
p_ids = set(e for e in previous.values())
tc_ids = set(e for e in timeline_contains.values())

# If we are lazyloading room members, we explicitly add the membership events
# for the senders in the timeline into the state block returned by /sync,
# as we may not have sent them to the client before. We find these membership
# events by filtering them out of timeline_start, which has already been filtered
# to only include membership events for the senders in the timeline.
# In practice, we can do this by removing them from the p_ids list,
# which is the list of relevant state we know we have already sent to the client.
# see https://github.com/matrix-org/synapse/pull/2970
# /files/efcdacad7d1b7f52f879179701c7e0d9b763511f#r204732809

if lazy_load_members:
p_ids.difference_update(
e for t, e in timeline_start.iteritems()
if t[0] == EventTypes.Member
)

state_ids = ((c_ids | ts_ids) - p_ids) - tc_ids

Expand Down
Loading