Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Add an approximate difference method to StateFilters #10825

Merged
merged 35 commits into from
Oct 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
7dad902
Add an approximate difference method to StateFilters
reivilibre Sep 15, 2021
1fe75e6
Add tests for the approximate difference of StateFilters
reivilibre Sep 15, 2021
a05692c
Newsfile
reivilibre Sep 15, 2021
10a7071
Try to clarify docstring for `approx_difference`
reivilibre Sep 17, 2021
d0e14d5
Process all the keys to return a narrower state filter
reivilibre Sep 17, 2021
a5fdd46
Add more test cases
reivilibre Sep 17, 2021
0e0085c
STASH
reivilibre Sep 17, 2021
9d50f05
Tighten up the postconditions of `approx_difference`
reivilibre Sep 20, 2021
ace3316
Merge remote-tracking branch 'origin/develop' into rei/sf_diff
reivilibre Sep 20, 2021
c72c436
More wordsmithing — thanks David
reivilibre Sep 20, 2021
bacd394
Revert "STASH"
reivilibre Sep 20, 2021
cd1de9b
Remove needless set construction
reivilibre Sep 22, 2021
6bedcba
Simplify logic a bit, since this isn't operating in-place anyway
reivilibre Sep 22, 2021
42617db
Attempt to clean up `approx_difference` with improved comments and names
reivilibre Sep 22, 2021
0c8e930
Split out tests into own TestCase class
reivilibre Sep 22, 2021
b6274d6
Add extensive tests for all 4 combinations of include_others
reivilibre Sep 22, 2021
f6b4dc5
Merge a test
reivilibre Sep 22, 2021
0d1c3d8
Split out some very simple tests
reivilibre Sep 22, 2021
18714d7
Deduplicate the old tests into the systematic style tests
reivilibre Sep 22, 2021
770afea
Add function to decompose a StateFilter into four parts
reivilibre Sep 22, 2021
e119af9
Add `StateFilter.freeze` convenience constructor
reivilibre Sep 22, 2021
70f646a
Add `recompose_from_four_parts` method as inverse
reivilibre Sep 22, 2021
c9bb226
Use a shorter version of `decompose_into_four_parts`.
reivilibre Sep 24, 2021
093f670
Use a shorter version of `recompose_from_four_parts`
reivilibre Sep 24, 2021
a187c24
Use a step-by-step implementation of `approx_difference` rather than …
reivilibre Sep 24, 2021
4bbe3d1
Nest the ifs for clarity; no functional change
reivilibre Sep 24, 2021
20bc299
Prefix decompose/recompose method names with underscores
reivilibre Sep 24, 2021
27c3a7a
Use self_excludes as it's equivalent with this definition of decompose
reivilibre Sep 24, 2021
54d77c9
Rename `subtrahend` to `other` to follow convention
reivilibre Sep 28, 2021
538f99e
Try 'included' rather than 'admitted' to describe state filters
reivilibre Sep 28, 2021
bf202bc
Rename derived variables from sub(trahend) to other.
reivilibre Sep 28, 2021
9169d38
Add a little bit of context as to why this is useful
reivilibre Sep 28, 2021
1f3008b
Use 'returned' instead of 'resultant' as that may be clearer
reivilibre Sep 28, 2021
3552bc1
Remove formal definition to focus on one way of explaining
reivilibre Sep 28, 2021
4eaf980
Use `StateFilter.freeze` in tests to improve readability
reivilibre Sep 29, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/10825.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add an 'approximate difference' method to `StateFilter`.
172 changes: 171 additions & 1 deletion synapse/storage/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
from typing import (
TYPE_CHECKING,
Awaitable,
Collection,
Dict,
Iterable,
List,
Mapping,
Optional,
Set,
Tuple,
Expand All @@ -29,7 +31,7 @@

from synapse.api.constants import EventTypes
from synapse.events import EventBase
from synapse.types import MutableStateMap, StateMap
from synapse.types import MutableStateMap, StateKey, StateMap

if TYPE_CHECKING:
from typing import FrozenSet # noqa: used within quoted type hint; flake8 sad
Expand Down Expand Up @@ -134,6 +136,23 @@ def from_lazy_load_member_list(members: Iterable[str]) -> "StateFilter":
include_others=True,
)

@staticmethod
def freeze(types: Mapping[str, Optional[Collection[str]]], include_others: bool):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Somewhat feels like this could be replaced with return StateFilter(synapse.util.frozenutil.freeze(types), include_others)? I'm not sure if that's clearer or not though.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that sounds useful, probably worth doing :)

"""
Returns a (frozen) StateFilter with the same contents as the parameters
specified here, which can be made of mutable types.
"""
types_with_frozen_values: Dict[str, Optional[FrozenSet[str]]] = {}
for state_types, state_keys in types.items():
if state_keys is not None:
types_with_frozen_values[state_types] = frozenset(state_keys)
else:
types_with_frozen_values[state_types] = None

return StateFilter(
frozendict(types_with_frozen_values), include_others=include_others
)

def return_expanded(self) -> "StateFilter":
"""Creates a new StateFilter where type wild cards have been removed
(except for memberships). The returned filter is a superset of the
Expand Down Expand Up @@ -356,6 +375,157 @@ def get_member_split(self) -> Tuple["StateFilter", "StateFilter"]:

return member_filter, non_member_filter

def _decompose_into_four_parts(
self,
) -> Tuple[Tuple[bool, Set[str]], Tuple[Set[str], Set[StateKey]]]:
"""
Decomposes this state filter into 4 constituent parts, which can be
thought of as this:
all? - minus_wildcards + plus_wildcards + plus_state_keys

where
* all represents ALL state
* minus_wildcards represents entire state types to remove
* plus_wildcards represents entire state types to add
* plus_state_keys represents individual state keys to add

See `recompose_from_four_parts` for the other direction of this
correspondence.
"""
is_all = self.include_others
excluded_types: Set[str] = {t for t in self.types if is_all}
wildcard_types: Set[str] = {t for t, s in self.types.items() if s is None}
concrete_keys: Set[StateKey] = set(self.concrete_types())

return (is_all, excluded_types), (wildcard_types, concrete_keys)

@staticmethod
def _recompose_from_four_parts(
all_part: bool,
minus_wildcards: Set[str],
plus_wildcards: Set[str],
plus_state_keys: Set[StateKey],
) -> "StateFilter":
"""
Recomposes a state filter from 4 parts.

See `decompose_into_four_parts` (the other direction of this
correspondence) for descriptions on each of the parts.
"""

# {state type -> set of state keys OR None for wildcard}
# (The same structure as that of a StateFilter.)
new_types: Dict[str, Optional[Set[str]]] = {}

# if we start with all, insert the excluded statetypes as empty sets
# to prevent them from being included
if all_part:
new_types.update({state_type: set() for state_type in minus_wildcards})

# insert the plus wildcards
new_types.update({state_type: None for state_type in plus_wildcards})

# insert the specific state keys
for state_type, state_key in plus_state_keys:
if state_type in new_types:
entry = new_types[state_type]
if entry is not None:
entry.add(state_key)
elif not all_part:
# don't insert if the entire type is already included by
# include_others as this would actually shrink the state allowed
# by this filter.
new_types[state_type] = {state_key}

return StateFilter.freeze(new_types, include_others=all_part)

def approx_difference(self, other: "StateFilter") -> "StateFilter":
"""
Returns a state filter which represents `self - other`.

This is useful for determining what state remains to be pulled out of the
database if we want the state included by `self` but already have the state
included by `other`.

The returned state filter
- MUST include all state events that are included by this filter (`self`)
unless they are included by `other`;
- MUST NOT include state events not included by this filter (`self`); and
- MAY be an over-approximation: the returned state filter
MAY additionally include some state events from `other`.

This implementation attempts to return the narrowest such state filter.
In the case that `self` contains wildcards for state types where
`other` contains specific state keys, an approximation must be made:
the returned state filter keeps the wildcard, as state filters are not
able to express 'all state keys except some given examples'.
e.g.
StateFilter(m.room.member -> None (wildcard))
minus
StateFilter(m.room.member -> {'@wombat:example.org'})
is approximated as
StateFilter(m.room.member -> None (wildcard))
"""

# We first transform self and other into an alternative representation:
# - whether or not they include all events to begin with ('all')
# - if so, which event types are excluded? ('excludes')
# - which entire event types to include ('wildcards')
# - which concrete state keys to include ('concrete state keys')
(self_all, self_excludes), (
self_wildcards,
self_concrete_keys,
) = self._decompose_into_four_parts()
(other_all, other_excludes), (
other_wildcards,
other_concrete_keys,
) = other._decompose_into_four_parts()

# Start with an estimate of the difference based on self
new_all = self_all
# Wildcards from the other can be added to the exclusion filter
new_excludes = self_excludes | other_wildcards
# We remove wildcards that appeared as wildcards in the other
new_wildcards = self_wildcards - other_wildcards
# We filter out the concrete state keys that appear in the other
# as wildcards or concrete state keys.
new_concrete_keys = {
(state_type, state_key)
for (state_type, state_key) in self_concrete_keys
if state_type not in other_wildcards
} - other_concrete_keys

if other_all:
if self_all:
# If self starts with all, then we add as wildcards any
# types which appear in the other's exclusion filter (but
# aren't in the self exclusion filter). This is as the other
# filter will return everything BUT the types in its exclusion, so
# we need to add those excluded types that also match the self
# filter as wildcard types in the new filter.
new_wildcards |= other_excludes.difference(self_excludes)

# If other is an `include_others` then the difference isn't.
new_all = False
# (We have no need for excludes when we don't start with all, as there
# is nothing to exclude.)
new_excludes = set()

# We also filter out all state types that aren't in the exclusion
# list of the other.
new_wildcards &= other_excludes
new_concrete_keys = {
(state_type, state_key)
for (state_type, state_key) in new_concrete_keys
if state_type in other_excludes
}

# Transform our newly-constructed state filter from the alternative
# representation back into the normal StateFilter representation.
return StateFilter._recompose_from_four_parts(
new_all, new_excludes, new_wildcards, new_concrete_keys
)


class StateGroupStorage:
"""High level interface to fetching state for event."""
Expand Down
Loading