From 52c054f5eb4a605fab1f4e01e081f402e85a3d64 Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Wed, 16 Nov 2022 19:09:59 +0000 Subject: [PATCH 1/3] Remove option to skip locking of tables during emulated upserts To perform an emulated upsert into a table safely, we must either: * lock the table, * be the only writer upserting into the table * or rely on another unique index being present. When the 2nd or 3rd cases were applicable, we previously avoided locking the table as an optimization. However, as seen in #14406, it is easy to slip up when adding new schema deltas and corrupt the database. Since #13760, Synapse has required SQLite >= 3.27.0, which has support for native upserts. This means that we now only perform emulated upserts while waiting for background updates to add unique indexes. Since emulated upserts are far less frequent now, let's remove the option to skip locking tables, so that we don't shoot ourselves in the foot again. Signed-off-by: Sean Quah --- synapse/storage/database.py | 55 ++++++------------- .../storage/databases/main/account_data.py | 8 --- synapse/storage/databases/main/appservice.py | 2 - synapse/storage/databases/main/devices.py | 9 --- .../databases/main/event_federation.py | 1 - synapse/storage/databases/main/pusher.py | 6 -- synapse/storage/databases/main/room.py | 6 -- synapse/storage/databases/main/room_batch.py | 2 - .../storage/databases/main/user_directory.py | 2 - 9 files changed, 17 insertions(+), 74 deletions(-) diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 4717c9728a0b..7ade81f9b2e6 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -1129,7 +1129,6 @@ async def simple_upsert( values: Dict[str, Any], insertion_values: Optional[Dict[str, Any]] = None, desc: str = "simple_upsert", - lock: bool = True, ) -> bool: """Insert a row with values + insertion_values; on conflict, update with values. @@ -1154,21 +1153,11 @@ async def simple_upsert( requiring that a unique index exist on the column names used to detect a conflict (i.e. `keyvalues.keys()`). - If there is no such index, we can "emulate" an upsert with a SELECT followed - by either an INSERT or an UPDATE. This is unsafe: we cannot make the same - atomicity guarantees that a native upsert can and are very vulnerable to races - and crashes. Therefore if we wish to upsert without an appropriate unique index, - we must either: - - 1. Acquire a table-level lock before the emulated upsert (`lock=True`), or - 2. VERY CAREFULLY ensure that we are the only thread and worker which will be - writing to this table, in which case we can proceed without a lock - (`lock=False`). - - Generally speaking, you should use `lock=True`. If the table in question has a - unique index[*], this class will use a native upsert (which is atomic and so can - ignore the `lock` argument). Otherwise this class will use an emulated upsert, - in which case we want the safer option unless we been VERY CAREFUL. + If there is no such index yet[*], we can "emulate" an upsert with a SELECT + followed by either an INSERT or an UPDATE. This is unsafe: we cannot make the + same atomicity guarantees that a native upsert can and are very vulnerable to + races and crashes. Therefore if we wish to upsert without an appropriate unique + index, we must acquire a table-level lock before the emulated upsert. [*]: Some tables have unique indices added to them in the background. Those tables `T` are keys in the dictionary UNIQUE_INDEX_BACKGROUND_UPDATES, @@ -1189,7 +1178,6 @@ async def simple_upsert( values: The nonunique columns and their new values insertion_values: additional key/values to use only when inserting desc: description of the transaction, for logging and metrics - lock: True to lock the table when doing the upsert. Returns: Returns True if a row was inserted or updated (i.e. if `values` is not empty then this always returns True) @@ -1209,7 +1197,6 @@ async def simple_upsert( keyvalues, values, insertion_values, - lock=lock, db_autocommit=autocommit, ) except self.engine.module.IntegrityError as e: @@ -1232,7 +1219,6 @@ def simple_upsert_txn( values: Dict[str, Any], insertion_values: Optional[Dict[str, Any]] = None, where_clause: Optional[str] = None, - lock: bool = True, ) -> bool: """ Pick the UPSERT method which works best on the platform. Either the @@ -1245,8 +1231,6 @@ def simple_upsert_txn( values: The nonunique columns and their new values insertion_values: additional key/values to use only when inserting where_clause: An index predicate to apply to the upsert. - lock: True to lock the table when doing the upsert. Unused when performing - a native upsert. Returns: Returns True if a row was inserted or updated (i.e. if `values` is not empty then this always returns True) @@ -1270,7 +1254,6 @@ def simple_upsert_txn( values, insertion_values=insertion_values, where_clause=where_clause, - lock=lock, ) def simple_upsert_txn_emulated( @@ -1291,14 +1274,15 @@ def simple_upsert_txn_emulated( insertion_values: additional key/values to use only when inserting where_clause: An index predicate to apply to the upsert. lock: True to lock the table when doing the upsert. + Must not be False unless the table has already been locked. Returns: Returns True if a row was inserted or updated (i.e. if `values` is not empty then this always returns True) """ insertion_values = insertion_values or {} - # We need to lock the table :(, unless we're *really* careful if lock: + # We need to lock the table :( self.engine.lock_table(txn, table) def _getwhere(key: str) -> str: @@ -1406,7 +1390,6 @@ async def simple_upsert_many( value_names: Collection[str], value_values: Collection[Collection[Any]], desc: str, - lock: bool = True, ) -> None: """ Upsert, many times. @@ -1418,8 +1401,6 @@ async def simple_upsert_many( value_names: The value column names value_values: A list of each row's value column values. Ignored if value_names is empty. - lock: True to lock the table when doing the upsert. Unused when performing - a native upsert. """ # We can autocommit if it safe to upsert @@ -1433,7 +1414,6 @@ async def simple_upsert_many( key_values, value_names, value_values, - lock=lock, db_autocommit=autocommit, ) @@ -1445,7 +1425,6 @@ def simple_upsert_many_txn( key_values: Collection[Iterable[Any]], value_names: Collection[str], value_values: Iterable[Iterable[Any]], - lock: bool = True, ) -> None: """ Upsert, many times. @@ -1457,8 +1436,6 @@ def simple_upsert_many_txn( value_names: The value column names value_values: A list of each row's value column values. Ignored if value_names is empty. - lock: True to lock the table when doing the upsert. Unused when performing - a native upsert. """ if table not in self._unsafe_to_upsert_tables: return self.simple_upsert_many_txn_native_upsert( @@ -1466,7 +1443,12 @@ def simple_upsert_many_txn( ) else: return self.simple_upsert_many_txn_emulated( - txn, table, key_names, key_values, value_names, value_values, lock=lock + txn, + table, + key_names, + key_values, + value_names, + value_values, ) def simple_upsert_many_txn_emulated( @@ -1477,7 +1459,6 @@ def simple_upsert_many_txn_emulated( key_values: Collection[Iterable[Any]], value_names: Collection[str], value_values: Iterable[Iterable[Any]], - lock: bool = True, ) -> None: """ Upsert, many times, but without native UPSERT support or batching. @@ -1489,18 +1470,16 @@ def simple_upsert_many_txn_emulated( value_names: The value column names value_values: A list of each row's value column values. Ignored if value_names is empty. - lock: True to lock the table when doing the upsert. """ # No value columns, therefore make a blank list so that the following # zip() works correctly. if not value_names: value_values = [() for x in range(len(key_values))] - if lock: - # Lock the table just once, to prevent it being done once per row. - # Note that, according to Postgres' documentation, once obtained, - # the lock is held for the remainder of the current transaction. - self.engine.lock_table(txn, "user_ips") + # Lock the table just once, to prevent it being done once per row. + # Note that, according to Postgres' documentation, once obtained, + # the lock is held for the remainder of the current transaction. + self.engine.lock_table(txn, "user_ips") for keyv, valv in zip(key_values, value_values): _keys = {x: y for x, y in zip(key_names, keyv)} diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py index c38b8a9e5a7e..8aaca4ce2413 100644 --- a/synapse/storage/databases/main/account_data.py +++ b/synapse/storage/databases/main/account_data.py @@ -459,9 +459,6 @@ async def add_account_data_to_room( content_json = json_encoder.encode(content) async with self._account_data_id_gen.get_next() as next_id: - # no need to lock here as room_account_data has a unique constraint - # on (user_id, room_id, account_data_type) so simple_upsert will - # retry if there is a conflict. await self.db_pool.simple_upsert( desc="add_room_account_data", table="room_account_data", @@ -471,7 +468,6 @@ async def add_account_data_to_room( "account_data_type": account_data_type, }, values={"stream_id": next_id, "content": content_json}, - lock=False, ) self._account_data_stream_cache.entity_has_changed(user_id, next_id) @@ -527,15 +523,11 @@ def _add_account_data_for_user( ) -> None: content_json = json_encoder.encode(content) - # no need to lock here as account_data has a unique constraint on - # (user_id, account_data_type) so simple_upsert will retry if - # there is a conflict. self.db_pool.simple_upsert_txn( txn, table="account_data", keyvalues={"user_id": user_id, "account_data_type": account_data_type}, values={"stream_id": next_id, "content": content_json}, - lock=False, ) # Ignored users get denormalized into a separate table as an optimisation. diff --git a/synapse/storage/databases/main/appservice.py b/synapse/storage/databases/main/appservice.py index 63046c052771..25da0c56c5d1 100644 --- a/synapse/storage/databases/main/appservice.py +++ b/synapse/storage/databases/main/appservice.py @@ -451,8 +451,6 @@ async def set_appservice_stream_type_pos( table="application_services_state", keyvalues={"as_id": service.id}, values={f"{stream_type}_stream_id": pos}, - # no need to lock when emulating upsert: as_id is a unique key - lock=False, desc="set_appservice_stream_type_pos", ) diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index e114c733d108..81c6835f21ac 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1747,9 +1747,6 @@ def _update_remote_device_list_cache_entry_txn( table="device_lists_remote_cache", keyvalues={"user_id": user_id, "device_id": device_id}, values={"content": json_encoder.encode(content)}, - # we don't need to lock, because we assume we are the only thread - # updating this user's devices. - lock=False, ) txn.call_after(self._get_cached_user_device.invalidate, (user_id, device_id)) @@ -1763,9 +1760,6 @@ def _update_remote_device_list_cache_entry_txn( table="device_lists_remote_extremeties", keyvalues={"user_id": user_id}, values={"stream_id": stream_id}, - # again, we can assume we are the only thread updating this user's - # extremity. - lock=False, ) async def update_remote_device_list_cache( @@ -1818,9 +1812,6 @@ def _update_remote_device_list_cache_txn( table="device_lists_remote_extremeties", keyvalues={"user_id": user_id}, values={"stream_id": stream_id}, - # we don't need to lock, because we can assume we are the only thread - # updating this user's extremity. - lock=False, ) async def add_device_change_to_streams( diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 309a4ba6643c..bbee02ab18f0 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1686,7 +1686,6 @@ async def insert_insertion_extremity(self, event_id: str, room_id: str) -> None: }, insertion_values={}, desc="insert_insertion_extremity", - lock=False, ) async def insert_received_event_to_staging( diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py index 4a01562d4552..38177881d9a9 100644 --- a/synapse/storage/databases/main/pusher.py +++ b/synapse/storage/databases/main/pusher.py @@ -331,14 +331,11 @@ async def get_throttle_params_by_room( async def set_throttle_params( self, pusher_id: str, room_id: str, params: ThrottleParams ) -> None: - # no need to lock because `pusher_throttle` has a primary key on - # (pusher, room_id) so simple_upsert will retry await self.db_pool.simple_upsert( "pusher_throttle", {"pusher": pusher_id, "room_id": room_id}, {"last_sent_ts": params.last_sent_ts, "throttle_ms": params.throttle_ms}, desc="set_throttle_params", - lock=False, ) async def _remove_deactivated_pushers(self, progress: dict, batch_size: int) -> int: @@ -595,8 +592,6 @@ async def add_pusher( device_id: Optional[str] = None, ) -> None: async with self._pushers_id_gen.get_next() as stream_id: - # no need to lock because `pushers` has a unique key on - # (app_id, pushkey, user_name) so simple_upsert will retry await self.db_pool.simple_upsert( table="pushers", keyvalues={"app_id": app_id, "pushkey": pushkey, "user_name": user_id}, @@ -615,7 +610,6 @@ async def add_pusher( "device_id": device_id, }, desc="add_pusher", - lock=False, ) user_has_pusher = self.get_if_user_has_pusher.cache.get_immediate( diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 4fbaefad7304..a8075a86cc6b 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1843,9 +1843,6 @@ async def upsert_room_on_join( "creator": room_creator, "has_auth_chain_index": has_auth_chain_index, }, - # rooms has a unique constraint on room_id, so no need to lock when doing an - # emulated upsert. - lock=False, ) async def store_partial_state_room( @@ -1966,9 +1963,6 @@ async def maybe_store_room_on_outlier_membership( "creator": "", "has_auth_chain_index": has_auth_chain_index, }, - # rooms has a unique constraint on room_id, so no need to lock when doing an - # emulated upsert. - lock=False, ) async def set_room_is_public(self, room_id: str, is_public: bool) -> None: diff --git a/synapse/storage/databases/main/room_batch.py b/synapse/storage/databases/main/room_batch.py index 39e80f6f5b11..131f357d04d5 100644 --- a/synapse/storage/databases/main/room_batch.py +++ b/synapse/storage/databases/main/room_batch.py @@ -44,6 +44,4 @@ async def store_state_group_id_for_event_id( table="event_to_state_groups", keyvalues={"event_id": event_id}, values={"state_group": state_group_id, "event_id": event_id}, - # Unique constraint on event_id so we don't have to lock - lock=False, ) diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index 698d6f7515a5..044435deab4e 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -481,7 +481,6 @@ def _update_profile_in_user_dir_txn(txn: LoggingTransaction) -> None: table="user_directory", keyvalues={"user_id": user_id}, values={"display_name": display_name, "avatar_url": avatar_url}, - lock=False, # We're only inserter ) if isinstance(self.database_engine, PostgresEngine): @@ -511,7 +510,6 @@ def _update_profile_in_user_dir_txn(txn: LoggingTransaction) -> None: table="user_directory_search", keyvalues={"user_id": user_id}, values={"value": value}, - lock=False, # We're only inserter ) else: # This should be unreachable. From 2836401206a8fa24506c594f9c87946e2f88399e Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Wed, 16 Nov 2022 19:41:48 +0000 Subject: [PATCH 2/3] Add newsfile Signed-off-by: Sean Quah --- changelog.d/14469.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/14469.misc diff --git a/changelog.d/14469.misc b/changelog.d/14469.misc new file mode 100644 index 000000000000..a12a21e9aecb --- /dev/null +++ b/changelog.d/14469.misc @@ -0,0 +1 @@ +Remove option to skip locking of tables when performing emulated upserts, to avoid a class of bugs in future. From 7e5ae6518fd61637084a64bac4c6a33759bb80d8 Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Fri, 18 Nov 2022 20:29:16 +0000 Subject: [PATCH 3/3] fixup: reword comment --- synapse/storage/database.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 7ade81f9b2e6..a0b1809e3262 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -1154,10 +1154,11 @@ async def simple_upsert( conflict (i.e. `keyvalues.keys()`). If there is no such index yet[*], we can "emulate" an upsert with a SELECT - followed by either an INSERT or an UPDATE. This is unsafe: we cannot make the - same atomicity guarantees that a native upsert can and are very vulnerable to - races and crashes. Therefore if we wish to upsert without an appropriate unique - index, we must acquire a table-level lock before the emulated upsert. + followed by either an INSERT or an UPDATE. This is unsafe unless *all* upserters + run at the SERIALIZABLE isolation level: we cannot make the same atomicity + guarantees that a native upsert can and are very vulnerable to races and + crashes. Therefore to upsert without an appropriate unique index, we acquire a + table-level lock before the emulated upsert. [*]: Some tables have unique indices added to them in the background. Those tables `T` are keys in the dictionary UNIQUE_INDEX_BACKGROUND_UPDATES,