Skip to content

Commit

Permalink
Issue#45 Replace trx_sys_t::rw_trx_set with LF_HASH
Browse files Browse the repository at this point in the history
trx_sys_t::rw_trx_set is implemented as std::set, which does a few quite
expensive operations under trx_sys_t::mutex protection: e.g. malloc/free
when adding/removing elements. Traversing b-tree is not that cheap either.

This has negative scalability impact, which is especially visible when running
oltp_update_index.lua benchmark on a ramdisk.

To reduce trx_sys_t::mutex contention std::set is replaced with LF_HASH. None
of LF_HASH operations require trx_sys_t::mutex (nor any other global mutex)
protection.
  • Loading branch information
zwang28 committed Apr 13, 2021
1 parent 49a4593 commit 7e5933a
Show file tree
Hide file tree
Showing 20 changed files with 485 additions and 394 deletions.
4 changes: 2 additions & 2 deletions storage/innobase/clone/clone0copy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -327,14 +327,14 @@ int Clone_Snapshot::update_binlog_position() {
}

int Clone_Snapshot::wait_trx_end(THD *thd, trx_id_t trx_id) {
auto trx = trx_rw_is_active(trx_id, nullptr, false);
auto trx = trx_sys->rw_trx_hash.find(trx_id, false);
if (trx == nullptr) {
return (0);
}

auto wait_cond = [&](bool alert, bool &result) {
/* Check if transaction is still active. */
auto trx = trx_rw_is_active(trx_id, nullptr, false);
auto trx = trx_sys->rw_trx_hash.find(trx_id, false);
if (trx == nullptr) {
result = false;
return (0);
Expand Down
17 changes: 17 additions & 0 deletions storage/innobase/handler/ha_innodb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,7 @@ static PSI_mutex_info all_innodb_mutexes[] = {
#endif /* UNIV_DEBUG */
PSI_MUTEX_KEY(rw_lock_list_mutex, 0, 0, PSI_DOCUMENT_ME),
PSI_MUTEX_KEY(rw_lock_mutex, 0, 0, PSI_DOCUMENT_ME),
PSI_MUTEX_KEY(rw_trx_hash_element_mutex, 0, 0, PSI_DOCUMENT_ME),
PSI_MUTEX_KEY(srv_innodb_monitor_mutex, 0, 0, PSI_DOCUMENT_ME),
PSI_MUTEX_KEY(srv_misc_tmpfile_mutex, 0, 0, PSI_DOCUMENT_ME),
PSI_MUTEX_KEY(srv_monitor_file_mutex, 0, 0, PSI_DOCUMENT_ME),
Expand Down Expand Up @@ -19322,6 +19323,22 @@ static bool innodb_buffer_pool_size_validate(THD *thd,
return true;
}

/**
Gets current trx.

This function may be called during InnoDB initialisation, when
innodb_hton_ptr->slot is not yet set to meaningful value.
*/
trx_t *current_trx() {
THD *thd = current_thd;
if (likely(thd != nullptr) && innodb_hton_ptr->slot != HA_SLOT_UNDEF) {
trx_t *&trx = thd_to_trx(thd);
return (trx);
} else {
return (nullptr);
}
}

/** Update the system variable innodb_buffer_pool_size using the "saved"
value. This function is registered as a callback with MySQL.
@param[in] thd thread handle
Expand Down
6 changes: 4 additions & 2 deletions storage/innobase/include/row0vers.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class ReadView;

/** Finds out if an active transaction has inserted or modified a secondary
index record.
@param[in,out] caller_trx trx of current thread
@param[in] rec record in a secondary index
@param[in] index the secondary index
@param[in] offsets rec_get_offsets(rec, index)
Expand All @@ -56,8 +57,8 @@ class ReadView;
negatives. The caller must confirm all positive results by checking if the trx
is still active.
*/
trx_t *row_vers_impl_x_locked(const rec_t *rec, const dict_index_t *index,
const ulint *offsets);
trx_t *row_vers_impl_x_locked(trx_t *caller_trx, const rec_t *rec,
const dict_index_t *index, const ulint *offsets);

/** Finds out if we must preserve a delete marked earlier version of a clustered
index record, because it is >= the purge view.
Expand Down Expand Up @@ -119,6 +120,7 @@ dberr_t row_vers_build_for_consistent_read(
/** Constructs the last committed version of a clustered index record,
which should be seen by a semi-consistent read. */
void row_vers_build_for_semi_consistent_read(
trx_t* caller_trx, /*!< in/out: trx of current thread */
const rec_t *rec, /*!< in: record in a clustered index; the
caller must have a latch on the page; this
latch locks the top of the stack of versions
Expand Down
1 change: 1 addition & 0 deletions storage/innobase/include/sync0sync.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ extern mysql_pfs_key_t clone_task_mutex_key;
extern mysql_pfs_key_t clone_snapshot_mutex_key;
extern mysql_pfs_key_t parallel_read_mutex_key;
extern mysql_pfs_key_t dblwr_mutex_key;
extern mysql_pfs_key_t rw_trx_hash_element_mutex_key;
#endif /* UNIV_PFS_MUTEX */

#ifdef UNIV_PFS_RWLOCK
Expand Down
2 changes: 2 additions & 0 deletions storage/innobase/include/sync0types.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ enum latch_level_t {
SYNC_THREADS,
SYNC_TRX,
SYNC_POOL,
SYNC_RW_TRX_HASH_ELEMENT,
SYNC_POOL_MANAGER,
SYNC_TRX_SYS,
SYNC_LOCK_SYS_SHARDED,
Expand Down Expand Up @@ -466,6 +467,7 @@ enum latch_id_t {
LATCH_ID_DBLR,
LATCH_ID_REDO_LOG_ARCHIVE_ADMIN_MUTEX,
LATCH_ID_REDO_LOG_ARCHIVE_QUEUE_MUTEX,
LATCH_ID_RW_TRX_HASH_ELEMENT,
LATCH_ID_TEST_MUTEX,
LATCH_ID_MAX = LATCH_ID_TEST_MUTEX
};
Expand Down
Loading

0 comments on commit 7e5933a

Please sign in to comment.