From 306ae4178134eb6c52d9f99af026572dfb86d041 Mon Sep 17 00:00:00 2001 From: Kamil Holubicki Date: Fri, 3 Jan 2020 14:16:14 +0100 Subject: [PATCH] PS-6150: MySQL crash - sync_mutex_to_string https://jira.percona.com/browse/PS-6150 https://jira.percona.com/browse/PS-6093 Two related issues fixed: 1. When printing out locked mutexes (SHOW ENGINE INNODB STATUS), LATCH_ID_AUTOINC object was cast to wrong type in sync0arr.cc sync_array_cell_print(), because of wrong request_type stored in cell. That caused calling GenericPolicy::to_string() instead of AggregateMutexStatsPolicy::to_string(). As the result of wrong cast, m_id member of policy used inside to_string() contained random value. This value was used to index the array in sync_latch_get_meta() which caused memory access violation. 2. When locking of the mutex reaches OS level, sync_array cell is created in ut0mutex.ic TTASEventMutex::wait(), LATCH_ID_BUF_POOL_ZIP is mapped to SYNC_BUF_BLOCK cell type. This causes proper cast of mutex in sync0arr.cc sync_array_cell_print() to AggregateMutexStatsPolicy::to_string(). However to_string() method did not service LATCH_ID_BUF_POOL_ZIP causing assertion. MTR test covering this case was not implemented, because the problem shows up when innodb internal threads lock zip_mutex (buf_flush_page_cleaner_coordinator thread and buf_lru_manager). Locking of zip_mutex on OS level can be reached by heavy insertion into the compressed table (and setting innodb_lru_scan_depth to high value) which will cause InnoDB to aggresively flush buffer pool pages. However, as locking threads are InnoDB internal threads, and DEBUG_SYNC facility is thread aware, there is no way to setup debug sync points from MTR test level. --- .../r/percona_extended_innodb_status.result | 16 ++++++ .../t/percona_extended_innodb_status.test | 53 +++++++++++++++++++ storage/innobase/handler/ha_innodb.cc | 2 + storage/innobase/include/sync0policy.ic | 1 + storage/innobase/include/ut0mutex.ic | 22 ++++++-- storage/innobase/sync/sync0arr.cc | 13 +++-- 6 files changed, 97 insertions(+), 10 deletions(-) diff --git a/mysql-test/suite/innodb/r/percona_extended_innodb_status.result b/mysql-test/suite/innodb/r/percona_extended_innodb_status.result index abc4b5aa9526..0ed9f4f2727d 100644 --- a/mysql-test/suite/innodb/r/percona_extended_innodb_status.result +++ b/mysql-test/suite/innodb/r/percona_extended_innodb_status.result @@ -82,3 +82,19 @@ DROP TABLE t; # mysql -e "SHOW ENGINE INNODB STATUS" Grepping InnoDB status for Buffer pool size, bytes 25149440 +# +# PS-6150: MySQL crash - sync_mutex_to_string +# +CREATE TABLE t1(id INT(15) NOT NULL AUTO_INCREMENT, a INT, PRIMARY KEY(id)) ENGINE=InnoDB; +SET DEBUG_SYNC='innobase_lock_autoinc SIGNAL autoinc_lock.locked WAIT_FOR autoinc_lock.continue'; +INSERT INTO test.t1(a) VALUES(1); +SET DEBUG_SYNC='now WAIT_FOR autoinc_lock.locked'; +SET SESSION debug="+d,catch_autoinc_mutex_os_lock"; +SET DEBUG_SYNC='autoinc_mutex_wait SIGNAL autoinc_mutex_wait.locked'; +INSERT INTO test.t1(a) VALUES(1); +SET DEBUG_SYNC='now WAIT_FOR autoinc_mutex_wait.locked'; +SHOW ENGINE INNODB STATUS; +SET DEBUG_SYNC='now SIGNAL autoinc_lock.continue'; +SET SESSION debug="-d,catch_autoinc_mutex_os_lock"; +SET DEBUG_SYNC = 'RESET'; +DROP TABLE t1; diff --git a/mysql-test/suite/innodb/t/percona_extended_innodb_status.test b/mysql-test/suite/innodb/t/percona_extended_innodb_status.test index 4c312b991a03..81c3da36360c 100644 --- a/mysql-test/suite/innodb/t/percona_extended_innodb_status.test +++ b/mysql-test/suite/innodb/t/percona_extended_innodb_status.test @@ -66,3 +66,56 @@ DROP TABLE t; --echo Grepping InnoDB status for $SEARCH_PATTERN --source include/search_pattern_in_file.inc --remove_file $innodb_status + + +--echo # +--echo # PS-6150: MySQL crash - sync_mutex_to_string +--echo # + +connect (con1,localhost,root,,); +connect (con2,localhost,root,,); + +--connection default +CREATE TABLE t1(id INT(15) NOT NULL AUTO_INCREMENT, a INT, PRIMARY KEY(id)) ENGINE=InnoDB; + +--connection con1 +SET DEBUG_SYNC='innobase_lock_autoinc SIGNAL autoinc_lock.locked WAIT_FOR autoinc_lock.continue'; +--send INSERT INTO test.t1(a) VALUES(1) + +--connection con2 +# Be sure that con1 locked autoinc_lock +SET DEBUG_SYNC='now WAIT_FOR autoinc_lock.locked'; + +# The following will cause autoinc_lock to reach OS level locking and registering the lock in sync_array used by 'SHOW ENGINE INNODB STATUS' to list locked mutexes +SET SESSION debug="+d,catch_autoinc_mutex_os_lock"; +SET DEBUG_SYNC='autoinc_mutex_wait SIGNAL autoinc_mutex_wait.locked'; +--send INSERT INTO test.t1(a) VALUES(1) + +--connection default +# wait up to the point when lock is registered in sync_array (in fact OS lock is not acquired yet, but this is enough) +SET DEBUG_SYNC='now WAIT_FOR autoinc_mutex_wait.locked'; + +# Send the query that caused the crash +--disable_result_log +SHOW ENGINE INNODB STATUS; +--enable_result_log + + +# cleanup +# Unblock connections. +--connection default +SET DEBUG_SYNC='now SIGNAL autoinc_lock.continue'; +SET SESSION debug="-d,catch_autoinc_mutex_os_lock"; +SET DEBUG_SYNC = 'RESET'; + +--connection con1 +--reap + +--connection con2 +--reap + +--disconnect con1 +--disconnect con2 + +--connection default +DROP TABLE t1; diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 84f4c5ad794c..f992887090fa 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -8510,6 +8510,8 @@ ha_innobase::innobase_lock_autoinc(void) /* Acquire the AUTOINC mutex. */ dict_table_autoinc_lock(ib_table); + DEBUG_SYNC_C("innobase_lock_autoinc"); + /* We need to check that another transaction isn't already holding the AUTOINC lock on the table. */ if (ib_table->n_waiting_or_granted_auto_inc_locks) { diff --git a/storage/innobase/include/sync0policy.ic b/storage/innobase/include/sync0policy.ic index 8c2b44331a43..f1e747bb54a3 100644 --- a/storage/innobase/include/sync0policy.ic +++ b/storage/innobase/include/sync0policy.ic @@ -45,6 +45,7 @@ std::string AggregateMutexStatsPolicy::to_string() const switch (m_id) { case LATCH_ID_BUF_BLOCK_MUTEX: + case LATCH_ID_BUF_POOL_ZIP: /* I don't think it makes sense to keep track of the file name and line number for each block mutex. Too much of overhead. Use the latch id to figure out the location from the source. */ diff --git a/storage/innobase/include/ut0mutex.ic b/storage/innobase/include/ut0mutex.ic index 4377aa69073f..b46aea08ce09 100644 --- a/storage/innobase/include/ut0mutex.ic +++ b/storage/innobase/include/ut0mutex.ic @@ -54,13 +54,19 @@ TTASEventMutex::wait( { sync_cell_t* cell; sync_array_t* sync_arr; - + latch_id_t latch_id = m_policy.get_id(); + ulint type = SYNC_MUTEX; + + if (latch_id == LATCH_ID_BUF_BLOCK_MUTEX + || latch_id == LATCH_ID_BUF_POOL_ZIP) { + type = SYNC_BUF_BLOCK; + } else if (latch_id == LATCH_ID_AUTOINC) { + type = SYNC_DICT_AUTOINC_MUTEX; + } + sync_arr = sync_array_get_and_reserve_cell( this, - (m_policy.get_id() == LATCH_ID_BUF_BLOCK_MUTEX - || m_policy.get_id() == LATCH_ID_BUF_POOL_ZIP) - ? SYNC_BUF_BLOCK - : SYNC_MUTEX, + type, filename, line, &cell); /* The memory order of the array reservation and @@ -71,6 +77,12 @@ TTASEventMutex::wait( and then the event is set to the signaled state. */ set_waiters(); + + DBUG_EXECUTE_IF("catch_autoinc_mutex_os_lock", { + if (m_policy.get_id() == LATCH_ID_AUTOINC) { + DEBUG_SYNC_C("autoinc_mutex_wait"); + } + }); /* Try to reserve still a few times. */ diff --git a/storage/innobase/sync/sync0arr.cc b/storage/innobase/sync/sync0arr.cc index bca404aff5c2..fb727a48288d 100644 --- a/storage/innobase/sync/sync0arr.cc +++ b/storage/innobase/sync/sync0arr.cc @@ -312,8 +312,8 @@ sync_cell_get_event( return(cell->latch.mutex->event()); - } else if (type == SYNC_BUF_BLOCK) { - + } else if (type == SYNC_BUF_BLOCK || + type == SYNC_DICT_AUTOINC_MUTEX) { return(cell->latch.bpmutex->event()); } else if (type == RW_LOCK_X_WAIT) { @@ -374,7 +374,8 @@ sync_array_reserve_cell( if (cell->request_type == SYNC_MUTEX) { cell->latch.mutex = reinterpret_cast(object); - } else if (cell->request_type == SYNC_BUF_BLOCK) { + } else if (cell->request_type == SYNC_BUF_BLOCK || + cell->request_type == SYNC_DICT_AUTOINC_MUTEX) { cell->latch.bpmutex = reinterpret_cast(object); } else { cell->latch.lock = reinterpret_cast(object); @@ -534,7 +535,8 @@ sync_array_cell_print( (ulong) policy.get_enter_line() #endif /* UNIV_DEBUG */ ); - } else if (type == SYNC_BUF_BLOCK) { + } else if (type == SYNC_BUF_BLOCK || + type == SYNC_DICT_AUTOINC_MUTEX) { BlockWaitMutex* mutex = cell->latch.bpmutex; const BlockWaitMutex::MutexPolicy& policy = @@ -777,7 +779,8 @@ sync_array_detect_deadlock( return(false); } - case SYNC_BUF_BLOCK: { + case SYNC_BUF_BLOCK: + case SYNC_DICT_AUTOINC_MUTEX: { BlockWaitMutex* mutex = cell->latch.bpmutex;