Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Performance improvements and refactor of Ratelimiter #7595

Merged
merged 33 commits into from
Jun 5, 2020
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
4f715be
Refactor and comment ratelimiting. Set limits in constructor
anoadragon453 May 28, 2020
0e6ee7c
Ratelimiters are instantiated by the HomeServer class
anoadragon453 May 28, 2020
82eac22
Modify servlets to pull Ratelimiters from HomeServer class
anoadragon453 May 28, 2020
a0ef594
Update unittests
anoadragon453 May 28, 2020
6a07c2d
lint
anoadragon453 May 28, 2020
c322ba0
changelog
anoadragon453 May 28, 2020
f6203a6
Make rate_hz and burst_count overridable per-request
anoadragon453 May 29, 2020
1f6156b
Set clock with constructor, store rate_hz per key again
anoadragon453 Jun 1, 2020
c236806
Instantiate Ratelimiters in respective classes
anoadragon453 Jun 1, 2020
470de6e
Use patch for the Ratelimiter in some tests. Set using config in others
anoadragon453 Jun 1, 2020
515a186
Update copyright header
anoadragon453 Jun 1, 2020
87ab836
Remove resolved question
anoadragon453 Jun 1, 2020
56c52a5
lint
anoadragon453 Jun 1, 2020
2d7e087
lint, mypy
anoadragon453 Jun 1, 2020
a566b46
Remove unittest.DEBUG statement
anoadragon453 Jun 1, 2020
41c7288
Update changelog.d/7595.misc
anoadragon453 Jun 2, 2020
58d4919
Remove erroneous print statement
anoadragon453 Jun 2, 2020
aa1f4c3
Merge branch 'anoa/ratelimit_config_perf' of github.com:matrix-org/sy…
anoadragon453 Jun 2, 2020
39b484b
Move update after optional method arguments
anoadragon453 Jun 2, 2020
d727bed
Make it obvious that time_now_s is just for testing
anoadragon453 Jun 2, 2020
9f76a8d
Update ratelimiter calling methods and tests
anoadragon453 Jun 2, 2020
8867900
No need to re-check for None in can_do_action
anoadragon453 Jun 2, 2020
ef7383f
time_now_s is used in ratelimit
anoadragon453 Jun 3, 2020
189c01b
Comment changes revolving around time_allowed
anoadragon453 Jun 3, 2020
4a88edb
Fix missed call to self.rate_hz
anoadragon453 Jun 3, 2020
14a0af5
Test Ratelimiter ratelimit method and param overrides
anoadragon453 Jun 3, 2020
c145c81
Back out some changes.
clokep Jun 4, 2020
12b4d47
Do not specify ratelimiters in tests when unnecessary.
clokep Jun 4, 2020
d84d779
Update timestamp comment
anoadragon453 Jun 4, 2020
45a7791
Clean up Exception raising assertion
anoadragon453 Jun 4, 2020
3899589
Clean up and split out tests
anoadragon453 Jun 4, 2020
08c5114
Remove _ = style
anoadragon453 Jun 4, 2020
9beee5f
Merge branch 'anoa/ratelimit_config_perf' of github.com:matrix-org/sy…
anoadragon453 Jun 4, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/7595.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Refactor `Ratelimiter` and try to limit the amount of related, expensive config value accesses.
anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved
118 changes: 80 additions & 38 deletions synapse/api/ratelimiting.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,79 +13,121 @@
# limitations under the License.

from collections import OrderedDict
from typing import Any, Optional, Tuple
from typing import Any, Tuple

from synapse.api.errors import LimitExceededError


class Ratelimiter(object):
"""
Ratelimit message sending by user.
Ratelimit actions marked by arbitrary keys.

Args:
rate_hz: The long term number of actions that can be performed in a
second.
burst_count: How many actions that can be performed before being
limited.
"""

def __init__(self):
self.message_counts = (
OrderedDict()
) # type: OrderedDict[Any, Tuple[float, int, Optional[float]]]
def __init__(self, rate_hz: float, burst_count: int):
# A ordered dictionary keeping track of actions, when they were last
# performed and how often. Each entry is a mapping from a key of arbitrary type
# to a tuple representing:
# * How many times an action has occurred since a point in time
# * That point in time
self.actions = OrderedDict() # type: OrderedDict[Any, Tuple[float, int]]
self.rate_hz = rate_hz
self.burst_count = burst_count

def can_do_action(self, key, time_now_s, rate_hz, burst_count, update=True):
def can_do_action(
self, key: Any, time_now_s: int, update: bool = True,
) -> Tuple[bool, float]:
"""Can the entity (e.g. user or IP address) perform the action?

Args:
key: The key we should use when rate limiting. Can be a user ID
(when sending events), an IP address, etc.
time_now_s: The time now.
rate_hz: The long term number of messages a user can send in a
second.
burst_count: How many messages the user can send before being
limited.
update (bool): Whether to update the message rates or not. This is
useful to check if a message would be allowed to be sent before
its ready to be actually sent.
time_now_s: The time now
update: Whether to count this check as performing the action
Returns:
A pair of a bool indicating if they can send a message now and a
time in seconds of when they can next send a message.
A tuple containing:
* A bool indicating if they can perform the action now
* The time in seconds of when it can next be performed.
-1 if a rate_hz has not been defined for this Ratelimiter
"""
self.prune_message_counts(time_now_s)
message_count, time_start, _ignored = self.message_counts.get(
key, (0.0, time_now_s, None)
)
# Remove any expired entries
self._prune_message_counts(time_now_s)

# Check if there is an existing count entry for this key
action_count, time_start, = self.actions.get(key, (0.0, time_now_s))

# Check whether performing another action is allowed
time_delta = time_now_s - time_start
sent_count = message_count - time_delta * rate_hz
if sent_count < 0:
performed_count = action_count - time_delta * self.rate_hz
if performed_count < 0:
# Allow, reset back to count 1
allowed = True
time_start = time_now_s
message_count = 1.0
elif sent_count > burst_count - 1.0:
action_count = 1.0
elif performed_count > self.burst_count - 1.0:
# Deny, we have exceeded our burst count
allowed = False
else:
# We haven't reached our limit yet
allowed = True
message_count += 1
action_count += 1.0

if update:
self.message_counts[key] = (message_count, time_start, rate_hz)
self.actions[key] = (action_count, time_start)

if rate_hz > 0:
time_allowed = time_start + (message_count - burst_count + 1) / rate_hz
# Figure out the time when an action can be performed again
if self.rate_hz > 0:
time_allowed = (
time_start + (action_count - self.burst_count + 1) / self.rate_hz
)

# Don't give back a time in the past
if time_allowed < time_now_s:
time_allowed = time_now_s
else:
# This does not apply
time_allowed = -1

return allowed, time_allowed

def prune_message_counts(self, time_now_s):
for key in list(self.message_counts.keys()):
message_count, time_start, rate_hz = self.message_counts[key]
def _prune_message_counts(self, time_now_s: int):
"""Remove message count entries that are older than
anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved

Args:
time_now_s: The current time
"""
# We create a copy of the key list here as the dictionary is modified during
# the loop
for key in list(self.actions.keys()):
action_count, time_start = self.actions[key]

time_delta = time_now_s - time_start
if message_count - time_delta * rate_hz > 0:
if action_count - time_delta * self.rate_hz > 0:
# XXX: Should this be a continue?
break
anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved
else:
del self.message_counts[key]
del self.actions[key]

def ratelimit(
self, key: Any, time_now_s: int, update: bool = True,
):
"""Checks if an action can be performed. If not, raises a LimitExceededError

def ratelimit(self, key, time_now_s, rate_hz, burst_count, update=True):
allowed, time_allowed = self.can_do_action(
key, time_now_s, rate_hz, burst_count, update
)
Args:
key: An arbitrary key used to classify an action
time_now_s: The current time
update: Whether to count this check as performing the action

Raises:
LimitExceededError: If an action could not be performed, along with the time in
milliseconds until the action can be performed again
"""
allowed, time_allowed = self.can_do_action(key, time_now_s, update)

if not allowed:
raise LimitExceededError(
Expand Down
8 changes: 7 additions & 1 deletion synapse/config/ratelimiting.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Dict

from ._base import Config


class RateLimitConfig(object):
def __init__(self, config, defaults={"per_second": 0.17, "burst_count": 3.0}):
def __init__(
self,
config: Dict[str, float],
defaults={"per_second": 0.17, "burst_count": 3.0},
):
self.per_second = config.get("per_second", defaults["per_second"])
self.burst_count = config.get("burst_count", defaults["burst_count"])

Expand Down
56 changes: 21 additions & 35 deletions synapse/handlers/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

import synapse.types
from synapse.api.constants import EventTypes, Membership
from synapse.api.errors import LimitExceededError
from synapse.types import UserID

logger = logging.getLogger(__name__)
Expand All @@ -44,11 +43,16 @@ def __init__(self, hs):
self.notifier = hs.get_notifier()
self.state_handler = hs.get_state_handler()
self.distributor = hs.get_distributor()
self.ratelimiter = hs.get_ratelimiter()
self.admin_redaction_ratelimiter = hs.get_admin_redaction_ratelimiter()
self.clock = hs.get_clock()
self.hs = hs

self.ratelimiter = None
anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved
self.request_ratelimiter = hs.get_request_ratelimiter()
self._rc_message = self.hs.config.rc_message

# If special admin redaction ratelimiting is disabled, this will be None
self.admin_redaction_ratelimiter = hs.get_admin_redaction_ratelimiter()

self.server_name = hs.hostname

self.event_builder_factory = hs.get_event_builder_factory()
Expand Down Expand Up @@ -83,48 +87,30 @@ def ratelimit(self, requester, update=True, is_admin_redaction=False):
if requester.app_service and not requester.app_service.is_rate_limited():
return

messages_per_second = self._rc_message.per_second
burst_count = self._rc_message.burst_count

# Check if there is a per user override in the DB.
override = yield self.store.get_ratelimit_for_user(user_id)
if override:
# If overriden with a null Hz then ratelimiting has been entirely
# If overridden with a null Hz then ratelimiting has been entirely
# disabled for the user
if not override.messages_per_second:
return

messages_per_second = override.messages_per_second
burst_count = override.burst_count

if is_admin_redaction and self.admin_redaction_ratelimiter:
# If we have separate config for admin redactions, use a separate
# ratelimiter as to not have user_id's clash
self.admin_redaction_ratelimiter.ratelimit(user_id, time_now, update)
else:
# We default to different values if this is an admin redaction and
# the config is set
if is_admin_redaction and self.hs.config.rc_admin_redaction:
messages_per_second = self.hs.config.rc_admin_redaction.per_second
burst_count = self.hs.config.rc_admin_redaction.burst_count
else:
messages_per_second = self.hs.config.rc_message.per_second
burst_count = self.hs.config.rc_message.burst_count

if is_admin_redaction and self.hs.config.rc_admin_redaction:
# If we have separate config for admin redactions we use a separate
# ratelimiter
allowed, time_allowed = self.admin_redaction_ratelimiter.can_do_action(
user_id,
time_now,
rate_hz=messages_per_second,
burst_count=burst_count,
update=update,
)
else:
allowed, time_allowed = self.ratelimiter.can_do_action(
user_id,
time_now,
rate_hz=messages_per_second,
burst_count=burst_count,
update=update,
)
if not allowed:
raise LimitExceededError(
retry_after_ms=int(1000 * (time_allowed - time_now))
)
# Override rate and burst count per-user
self.request_ratelimiter.rate_hz = messages_per_second
self.request_ratelimiter.burst_count = burst_count
anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved

self.request_ratelimiter.ratelimit(user_id, time_now, update)

async def maybe_kick_guest_users(self, event, context=None):
# Technically this function invalidates current_state by changing it.
Expand Down
18 changes: 7 additions & 11 deletions synapse/handlers/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,11 @@ def __init__(self, hs):

# Ratelimiter for failed auth during UIA. Uses same ratelimit config
# as per `rc_login.failed_attempts`.
self._failed_uia_attempts_ratelimiter = Ratelimiter()
# XXX: Should this be hs.get_login_failed_attempts_ratelimiter?
clokep marked this conversation as resolved.
Show resolved Hide resolved
self._failed_uia_attempts_ratelimiter = Ratelimiter(
rate_hz=self.hs.config.rc_login_failed_attempts.per_second,
burst_count=self.hs.config.rc_login_failed_attempts.burst_count,
)

self._clock = self.hs.get_clock()

Expand Down Expand Up @@ -197,11 +201,7 @@ async def validate_user_via_ui_auth(

# Check if we should be ratelimited due to too many previous failed attempts
self._failed_uia_attempts_ratelimiter.ratelimit(
user_id,
time_now_s=self._clock.time(),
rate_hz=self.hs.config.rc_login_failed_attempts.per_second,
burst_count=self.hs.config.rc_login_failed_attempts.burst_count,
update=False,
user_id, time_now_s=self._clock.time(), update=False,
)

# build a list of supported flows
Expand All @@ -214,11 +214,7 @@ async def validate_user_via_ui_auth(
except LoginError:
# Update the ratelimite to say we failed (`can_do_action` doesn't raise).
self._failed_uia_attempts_ratelimiter.can_do_action(
user_id,
time_now_s=self._clock.time(),
rate_hz=self.hs.config.rc_login_failed_attempts.per_second,
burst_count=self.hs.config.rc_login_failed_attempts.burst_count,
update=True,
user_id, time_now_s=self._clock.time(), update=True,
)
raise

Expand Down
1 change: 0 additions & 1 deletion synapse/handlers/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,6 @@ def __init__(self, hs):
self.profile_handler = hs.get_profile_handler()
self.event_builder_factory = hs.get_event_builder_factory()
self.server_name = hs.hostname
self.ratelimiter = hs.get_ratelimiter()
self.notifier = hs.get_notifier()
self.config = hs.config
self.require_membership_for_aliases = hs.config.require_membership_for_aliases
Expand Down
5 changes: 1 addition & 4 deletions synapse/handlers/register.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,10 +428,7 @@ def check_registration_ratelimit(self, address):
time_now = self.clock.time()

self.ratelimiter.ratelimit(
address,
time_now_s=time_now,
rate_hz=self.hs.config.rc_registration.per_second,
burst_count=self.hs.config.rc_registration.burst_count,
address, time_now_s=time_now,
)

def register_with_store(
Expand Down
Loading