Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MSC4171 Omit service members from room summary #17866

Closed
wants to merge 23 commits into from
Closed
Show file tree
Hide file tree
Changes from 21 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/17866.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add experimental support for filtering out "service members" from room summary responses, as described in [MSC4171](https://github.com/matrix-org/matrix-spec-proposals/pull/4171).
2 changes: 2 additions & 0 deletions synapse/api/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ class EventTypes:

PollStart: Final = "m.poll.start"

MSC4171FunctionalMembers: Final = "io.element.functional_members"


class ToDeviceEventTypes:
RoomKeyRequest: Final = "m.room_key_request"
Expand Down
3 changes: 3 additions & 0 deletions synapse/config/experimental.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,5 +448,8 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None:
# MSC4151: Report room API (Client-Server API)
self.msc4151_enabled: bool = experimental.get("msc4151_enabled", False)

# MSC4171: Service members
self.msc4171_enabled: bool = experimental.get("msc4171_enabled", False)

# MSC4210: Remove legacy mentions
self.msc4210_enabled: bool = experimental.get("msc4210_enabled", False)
2 changes: 2 additions & 0 deletions synapse/federation/federation_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,8 @@ async def on_send_join_request(
state_event_ids: Collection[str]
servers_in_room: Optional[Collection[str]]
if caller_supports_partial_state:
# NOTE: We do not exclude service members from the federated
# room summary.
summary = await self.store.get_room_summary(room_id)
state_event_ids = _get_event_ids_for_partial_state_join(
event, prev_state_ids, summary
Expand Down
8 changes: 7 additions & 1 deletion synapse/handlers/sliding_sync/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ def __init__(self, hs: "HomeServer"):
self.event_sources = hs.get_event_sources()
self.relations_handler = hs.get_relations_handler()
self.rooms_to_exclude_globally = hs.config.server.rooms_to_exclude_from_sync
self.should_exclude_service_members = hs.config.experimental.msc4171_enabled

self.is_mine_id = hs.is_mine_id

self.connection_store = SlidingSyncConnectionStore(self.store)
Expand Down Expand Up @@ -829,7 +831,11 @@ async def get_room_sync_data(
# For invite/knock rooms we don't include the information.
room_membership_summary = {}
else:
room_membership_summary = await self.store.get_room_summary(room_id)
room_membership_summary = await self.store.get_room_summary(
room_id,
self.should_exclude_service_members
and sync_config.exclude_service_members_from_heroes,
)
# TODO: Reverse/rewind back to the `to_token`

hero_user_ids = extract_heroes_from_room_summary(
Expand Down
8 changes: 7 additions & 1 deletion synapse/handlers/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ class SyncConfig:
filter_collection: FilterCollection
is_guest: bool
device_id: Optional[str]
exclude_service_members_from_heroes: bool


@attr.s(slots=True, frozen=True, auto_attribs=True)
Expand Down Expand Up @@ -343,6 +344,7 @@ def __init__(self, hs: "HomeServer"):
self._task_scheduler = hs.get_task_scheduler()

self.should_calculate_push_rules = hs.config.push.enable_push
self.should_exclude_service_members = hs.config.experimental.msc4171_enabled

# TODO: flush cache entries on subsequent sync request.
# Once we get the next /sync request (ie, one with the same access token
Expand Down Expand Up @@ -1040,7 +1042,11 @@ async def compute_summary(
)

# this is heavily cached, thus: fast.
details = await self.store.get_room_summary(room_id)
details = await self.store.get_room_summary(
room_id,
self.should_exclude_service_members
and sync_config.exclude_service_members_from_heroes,
)

name_id = state_ids.get((EventTypes.Name, ""))
canonical_alias_id = state_ids.get((EventTypes.CanonicalAlias, ""))
Expand Down
16 changes: 15 additions & 1 deletion synapse/rest/client/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,16 +151,20 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
)
filter_id = parse_string(request, "filter")
full_state = parse_boolean(request, "full_state", default=False)
exclude_service_members_from_heroes = parse_boolean(
request, "msc4171_exclude_service_members", default=False
)

logger.debug(
"/sync: user=%r, timeout=%r, since=%r, "
"set_presence=%r, filter_id=%r, device_id=%r",
"set_presence=%r, filter_id=%r, device_id=%r, exclude_service_members=%r",
user,
timeout,
since,
set_presence,
filter_id,
device_id,
exclude_service_members_from_heroes,
)

# Stream position of the last ignored users account data event for this user,
Expand Down Expand Up @@ -220,6 +224,7 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
filter_collection=filter_collection,
is_guest=requester.is_guest,
device_id=device_id,
exclude_service_members_from_heroes=exclude_service_members_from_heroes,
)

since_token = None
Expand Down Expand Up @@ -682,12 +687,16 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]:

timeout = parse_integer(request, "timeout", default=0)
since = parse_string(request, "since")
exclude_service_members_from_heroes = parse_boolean(
request, "msc4171_exclude_service_members", default=False
)

sync_config = SyncConfig(
user=user,
filter_collection=self.only_member_events_filter_collection,
is_guest=requester.is_guest,
device_id=device_id,
exclude_service_members_from_heroes=exclude_service_members_from_heroes,
)

since_token = None
Expand Down Expand Up @@ -886,6 +895,10 @@ async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
# Position in the stream
from_token_string = parse_string(request, "pos")

exclude_service_members_from_heroes = parse_boolean(
request, "msc4171_exclude_service_members", default=False
)

from_token = None
if from_token_string is not None:
from_token = await SlidingSyncStreamToken.from_string(
Expand Down Expand Up @@ -935,6 +948,7 @@ async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
lists=body.lists,
room_subscriptions=body.room_subscriptions,
extensions=body.extensions,
exclude_service_members_from_heroes=exclude_service_members_from_heroes,
)

sliding_sync_results = await self.sliding_sync_handler.wait_for_sync_for_user(
Expand Down
45 changes: 40 additions & 5 deletions synapse/storage/databases/main/roommember.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,9 @@ def _get_users_in_room_with_profiles(
)

@cached(max_entries=100000) # type: ignore[synapse-@cached-mutable]
async def get_room_summary(self, room_id: str) -> Mapping[str, MemberSummary]:
async def get_room_summary(
self, room_id: str, exclude_service_users: bool = False
) -> Mapping[str, MemberSummary]:
"""
Get the details of a room roughly suitable for use by the room
summary extension to /sync. Useful when lazy loading room members.
Expand All @@ -301,12 +303,13 @@ async def get_room_summary(self, room_id: str) -> Mapping[str, MemberSummary]:

Args:
room_id: The room ID to query
exclude_service_users: Should MSC4171 be used to exclude service members
Returns:
dict of membership states, pointing to a MemberSummary named tuple.
"""

def _get_room_summary_txn(
txn: LoggingTransaction,
txn: LoggingTransaction, exclude_members: List[str]
) -> Dict[str, MemberSummary]:
# first get counts.
# We do this all in one transaction to keep the cache small.
Expand All @@ -318,6 +321,10 @@ def _get_room_summary_txn(
for membership, count in counts.items():
res.setdefault(membership, MemberSummary([], count))

exclude_users_clause, args = make_in_list_sql_clause(
self.database_engine, "state_key", exclude_members, negative=True
)
Comment on lines +324 to +326
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should be careful about there being too many exclude_members and bailing to prevent DoS.

Alternatively, we could just fetch N extra members like we do in case one of them is the calling user and do the exclusion outside of the SQL. But we should probably have a limit on that as well. Perhaps something to clarify in the spec and then we can bail if the list is longer than the specc'ed max. Perhaps we just rely on max length of an event?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was going to rely on the max length of an event, which I think even if you had a long list of very short userIds would still only be up to about 9k or so (once you've included the usual event padding).

I'm not quite sure on the performance cost here, but I'd assume that a 9k string list filter in postgres isn't terrible as it's not going to impact IO.

Alternatively we could do as you say and set a sensible max number of users in the spec (say 100 or so). I'm generally a bit allergic to limitations in the spec, as someones probably going to come up with a use case of 101 members however it might be justified in the case of performance.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, and get_room_summary has big fat cache on it so it's probably okay to do a slightly more expensive call here. Admittedly this does impact the hot path of sync, but I think the operation of pulling out excluded users is fairly fast.

Copy link
Contributor

@MadLittleMods MadLittleMods Oct 31, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We tend to limit make_in_list_sql_clause(...) to at-most 1000 (see usages of batch_iter(..., 1000)) but it's a bit tough to do here with the negative=True condition.

We should at-least add a comment here that we are assuming that there should be no more than 9.3k members to exclude based on the max length of an event (65535 bytes).

  • There is nothing stopping someone from just using a single letter string over and over in the list so it could be practically ~65k things in the list.
  • With actual MXIDs: 16.4k = 65535 / 4 (@m:h) maximally
  • In the most likely maximal scenario with public federation: 10.9k = 65535 / 6 (@m:h.io)
  • With enough unique combos in localpart: 9.3k = 65535 / 7 (@mm:h.io)

We could have a valid MXID check and add them to a Set to deduplicate but I don't think it's worth the extra computation.

Perhaps we should just have a practical limit to re-evaluate if someone hits it. Have a 1k check and set exclude_members = [] in that case with a log warning (warning instead of assert because we don't want to break the whole /sync response). This can always be increased in the future when someone has a practical use case but avoids rooms where there only goal is performance abuse.


# Order by membership (joins -> invites -> leave (former insiders) ->
# everything else (outsiders like bans/knocks), then by `stream_ordering` so
# the first members in the room show up first and to make the sort stable
Expand All @@ -330,16 +337,18 @@ def _get_room_summary_txn(
FROM current_state_events
WHERE type = 'm.room.member' AND room_id = ?
AND membership IS NOT NULL
AND %s
ORDER BY
CASE membership WHEN ? THEN 1 WHEN ? THEN 2 WHEN ? THEN 3 ELSE 4 END ASC,
event_stream_ordering ASC
LIMIT ?
"""
""" % (exclude_users_clause)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use f-strings, or the more modern .format(..).

The old % is a bit of a footgun, as e.g. its meant to take a collection like a tuple, but you've actually just passed it a plain string (since you forgot a comma), which is valid as a string is a collection of strings. This does work, as python does magic to detect the case, but its a bit ugh.


txn.execute(
sql,
(
room_id,
*args,
# Sort order
Membership.JOIN,
Membership.INVITE,
Expand All @@ -357,8 +366,33 @@ def _get_room_summary_txn(

return res

exclude_members = []
if exclude_service_users:
functional_members_event_id = await self.db_pool.simple_select_one_onecol(
table="current_state_events",
keyvalues={
"room_id": room_id,
"type": EventTypes.MSC4171FunctionalMembers,
"state_key": "",
},
retcol="event_id",
allow_none=True,
)
if functional_members_event_id:
functional_members_event = await self.get_event(
functional_members_event_id
)
functional_members_data = functional_members_event.content.get(
"service_members"
)
# ONLY use this value if this looks like a valid list of strings. Otherwise, ignore.
if isinstance(functional_members_data, list) and all(
isinstance(item, str) for item in functional_members_data
):
exclude_members = functional_members_data

return await self.db_pool.runInteraction(
"get_room_summary", _get_room_summary_txn
"get_room_summary", _get_room_summary_txn, exclude_members
)

@cached()
Expand Down Expand Up @@ -1754,7 +1788,8 @@ def __init__(


def extract_heroes_from_room_summary(
details: Mapping[str, MemberSummary], me: str
details: Mapping[str, MemberSummary],
me: str,
) -> List[str]:
"""Determine the users that represent a room, from the perspective of the `me` user.

Expand Down
1 change: 1 addition & 0 deletions tests/handlers/test_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -1079,4 +1079,5 @@ def generate_sync_config(
filter_collection=filter_collection,
is_guest=False,
device_id=device_id,
exclude_service_members_from_heroes=False,
)
93 changes: 92 additions & 1 deletion tests/storage/test_roommember.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,14 +628,105 @@ def test_extract_heroes_from_room_summary_first_five_joins(self) -> None:
room_membership_summary = self.get_success(self.store.get_room_summary(room_id))

hero_user_ids = extract_heroes_from_room_summary(
room_membership_summary, me="@fakuser"
room_membership_summary, me="@fakeuser"
)

# First 5 users to join the room
self.assertListEqual(
hero_user_ids, [user1_id, user2_id, user3_id, user4_id, user5_id]
)

def test_extract_heroes_from_room_summary_exclude_service_members(self) -> None:
"""
Test that `extract_heroes_from_room_summary(...)` returns the first 5 joins who are
not mentioned in the functional members state event.
"""
user1_id = self.register_user("user1", "pass")
user1_tok = self.login(user1_id, "pass")
user2_id = self.register_user("user2", "pass")
user2_tok = self.login(user2_id, "pass")
user3_id = self.register_user("user3", "pass")
user3_tok = self.login(user3_id, "pass")
user4_id = self.register_user("user4", "pass")
user4_tok = self.login(user4_id, "pass")
user5_id = self.register_user("user5", "pass")
user5_tok = self.login(user5_id, "pass")
user6_id = self.register_user("user6", "pass")
user6_tok = self.login(user6_id, "pass")
user7_id = self.register_user("user7", "pass")
user7_tok = self.login(user7_id, "pass")

# Setup the room (user1 is the creator and is joined to the room)
room_id = self.helper.create_room_as(user1_id, tok=user1_tok)

# Exclude some users
self.helper.send_state(
room_id,
event_type=EventTypes.MSC4171FunctionalMembers,
body={"service_members": [user2_id, user3_id]},
tok=user1_tok,
)

# User2 -> User7 joins
self.helper.join(room_id, user2_id, tok=user2_tok)
self.helper.join(room_id, user3_id, tok=user3_tok)
self.helper.join(room_id, user4_id, tok=user4_tok)
self.helper.join(room_id, user5_id, tok=user5_tok)
self.helper.join(room_id, user6_id, tok=user6_tok)
self.helper.join(room_id, user7_id, tok=user7_tok)

room_membership_summary = self.get_success(
self.store.get_room_summary(room_id, True)
)

hero_user_ids = extract_heroes_from_room_summary(
room_membership_summary, me="@fakeuser"
)

# First 5 users to join the room, excluding service members.
self.assertListEqual(
Half-Shot marked this conversation as resolved.
Show resolved Hide resolved
hero_user_ids, [user1_id, user4_id, user5_id, user6_id, user7_id]
)

MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
def test_extract_heroes_from_room_summary_exclude_service_members_with_empty_heroes(
self,
) -> None:
"""
Test that `extract_heroes_from_room_summary(...)` will return an
empty set of heroes if all users have been excluded.
"""
user1_id = self.register_user("user1", "pass")
user1_tok = self.login(user1_id, "pass")
user2_id = self.register_user("user2", "pass")
user2_tok = self.login(user2_id, "pass")
user3_id = self.register_user("user3", "pass")
user3_tok = self.login(user3_id, "pass")

# Setup the room (user1 is the creator and is joined to the room)
room_id = self.helper.create_room_as(user1_id, tok=user1_tok)

# Exclude all users (except the creator, who is excluded from the results anyway)
self.helper.send_state(
room_id,
event_type=EventTypes.MSC4171FunctionalMembers,
body={"service_members": [user2_id, user3_id]},
tok=user1_tok,
)

self.helper.join(room_id, user2_id, tok=user2_tok)
self.helper.join(room_id, user3_id, tok=user3_tok)

room_membership_summary = self.get_success(
self.store.get_room_summary(room_id, True)
)

hero_user_ids = extract_heroes_from_room_summary(
room_membership_summary, me=user1_id
)

# First 5 users to join the room, excluding service members.
self.assertListEqual(hero_user_ids, [])

def test_extract_heroes_from_room_summary_membership_order(self) -> None:
"""
Test that `extract_heroes_from_room_summary(...)` prefers joins/invites over
Expand Down
Loading