Skip to content

Commit

Permalink
presence: Improve INSERT conflict handling in do_update_user_presence.
Browse files Browse the repository at this point in the history
This was discussed in the review of zulip#29999:
zulip#29999 (comment)

The previous way of handling wasn't entirely correct, as unnecessary
events were omitted, with a bad guarantee of even being in the correct
order.

This is an improvement as now the function detects that it ended up
doing nothing and can skip sending an event.

The race condition is hard to make up in an automated test, but we can
hackily simulate it by injecting a side_effect which will create a
conflicting UserPresence row when the function requests a cursor. Aside
of that, the actual race was simulated in manual testing to verify the
expected behavior.
  • Loading branch information
mateuszmandera authored and timabbott committed Jun 16, 2024
1 parent 2feee9d commit 7ff1227
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 11 deletions.
30 changes: 19 additions & 11 deletions zerver/actions/presence.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import time
from datetime import datetime, timedelta

Expand All @@ -18,6 +19,8 @@
from zerver.models.users import active_user_ids
from zerver.tornado.django_api import send_event

logger = logging.getLogger(__name__)


def send_presence_changed(
user_profile: UserProfile, presence: UserPresence, *, force_send_update: bool = False
Expand Down Expand Up @@ -219,20 +222,15 @@ def do_update_user_presence(
# There's a small possibility of a race where a different process may have
# already created a row for this user. Given the extremely close timing
# of these events, there's no clear reason to prefer one over the other,
# so we just go with the most direct approach of DO UPDATE, so that the
# last event emitted (the one coming from our process, since we're the slower one)
# matches the created presence state.
# TODO: Might be worth changing this to DO NOTHING instead with a bit of extra logic
# to skip emitting an event in such a scenario.
# so we choose the simplest option of DO NOTHING here and let the other
# concurrent transaction win.
# This also allows us to avoid sending a spurious presence update event,
# by checking if the row was actually created.
query += sql.SQL("""
INSERT INTO zerver_userpresence (user_profile_id, last_active_time, last_connected_time, realm_id, last_update_id)
VALUES ({user_profile_id}, {last_active_time}, {last_connected_time}, {realm_id}, (SELECT last_update_id FROM new_last_update_id))
ON CONFLICT (user_profile_id) DO UPDATE SET
last_active_time = EXCLUDED.last_active_time,
last_connected_time = EXCLUDED.last_connected_time,
realm_id = EXCLUDED.realm_id,
last_update_id = EXCLUDED.last_update_id;
""").format(
ON CONFLICT (user_profile_id) DO NOTHING
""").format(
user_profile_id=sql.Literal(user_profile.id),
last_active_time=sql.Literal(presence.last_active_time),
last_connected_time=sql.Literal(presence.last_connected_time),
Expand All @@ -256,6 +254,16 @@ def do_update_user_presence(

with connection.cursor() as cursor:
cursor.execute(query)
if creating:
# Check if the row was actually created or if we
# hit the ON CONFLICT DO NOTHING case.
actually_created = cursor.rowcount > 0

if creating and not actually_created:
# If we ended up doing nothing due to something else creating the row
# in the meantime, then we shouldn't send an event here.
logger.info("UserPresence row already created for %s, returning.", user_profile.id)
return

if force_send_update or (
not user_profile.realm.presence_disabled and (creating or became_online)
Expand Down
50 changes: 50 additions & 0 deletions zerver/tests/test_presence.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import time_machine
from django.conf import settings
from django.db import connection
from django.utils.timezone import now as timezone_now
from typing_extensions import override

Expand Down Expand Up @@ -87,6 +88,55 @@ def back_date(num_weeks: int) -> None:
self.assert_length(presence_dct, 0)
self.assertEqual(last_update_id, -1)

def test_user_presence_row_creation_simulated_race(self) -> None:
"""
There is a theoretical race condition, where while a UserPresence
row is being created for a user, a concurrent process creates it first,
right before we execute our INSERT. This conflict is handled with
ON CONFLICT DO NOTHING in the SQL query and an early return
if that happens.
"""

user_profile = self.example_user("hamlet")

UserPresence.objects.filter(user_profile=user_profile).delete()

self.login_user(user_profile)

def insert_row_and_return_cursor() -> Any:
# This is the function we will inject into connection.cursor
# to simulate the race condition.
# When the underlying code requests a cursor, we will create
# the UserPresence row for the user, before returning a real
# cursor to the caller. This ensures the caller will hit the
# INSERT conflict when it tries to execute its query.
UserPresence.objects.create(user_profile=user_profile, realm=user_profile.realm)

cursor = connection.cursor()
return cursor

with mock.patch("zerver.actions.presence.connection") as mock_connection, self.assertLogs(
"zerver.actions.presence", level="INFO"
) as mock_logs:
# This is a tricky mock. We need to set things up so that connection.cursor()
# in do_update_user_presence runs our custom code when the caller tries to
# enter the context manager.
# We also need to take care to only affect the connection that exists in
# zerver.actions.presence rather than affecting the entire django.db.connection,
# as that would break code higher up in the stack.
mock_connection.cursor.return_value.__enter__.side_effect = insert_row_and_return_cursor

result = self.client_post("/json/users/me/presence", {"status": "active"})

# The request finished gracefully and the situation was logged:
self.assert_json_success(result)
self.assertEqual(
mock_logs.output,
[
f"INFO:zerver.actions.presence:UserPresence row already created for {user_profile.id}, returning."
],
)

def test_last_update_id_logic(self) -> None:
slim_presence = True
UserPresence.objects.all().delete()
Expand Down

0 comments on commit 7ff1227

Please sign in to comment.