Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PS-9391 Fixes replication break error because of HASH_SCAN #5528

Draft
wants to merge 1 commit into
base: 8.0
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
include/master-slave.inc
Warnings:
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
Note #### Storing MySQL user name or password information in the connection metadata repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START REPLICA; see the 'START REPLICA Syntax' in the MySQL Manual for more information.
[connection master]
[connection slave]
set @prior_slave_rows_search_algorithms=@@global.slave_rows_search_algorithms;
Warnings:
Warning 1287 '@@slave_rows_search_algorithms' is deprecated and will be removed in a future release.
set @@global.slave_rows_search_algorithms= 'HASH_SCAN';
Warnings:
Warning 1287 '@@slave_rows_search_algorithms' is deprecated and will be removed in a future release.
[connection master]
CREATE TABLE t1 (i INT, INDEX t1_i(i));
CREATE FUNCTION f1 () RETURNS INT BEGIN
UPDATE t1 SET i = 11 WHERE i = 10;
UPDATE t1 SET i = 12 WHERE i = 11;
RETURN 0;
END|
include/sync_slave_sql_with_master.inc
[connection master]
INSERT INTO t1 VALUES (10);
SELECT f1();
f1()
0
include/sync_slave_sql_with_master.inc
include/assert.inc ['There is only one row in table t1']
[connection master]
include/diff_tables.inc [master:test.t1, slave:test.t1]
CREATE FUNCTION f2 () RETURNS INT BEGIN
UPDATE t1 SET i = 11 WHERE i = 12;
UPDATE t1 SET i = 13 WHERE i = 11;
RETURN 0;
END|
include/sync_slave_sql_with_master.inc
[connection master]
SELECT f2();
f2()
0
include/sync_slave_sql_with_master.inc
include/assert.inc ['There is only one row in table t1']
[connection master]
include/diff_tables.inc [master:test.t1, slave:test.t1]
[connection master]
SELECT * FROM t1;
i
13
DROP FUNCTION f1;
DROP FUNCTION f2;
DROP TABLE t1;
[connection slave]
set @@global.slave_rows_search_algorithms= @prior_slave_rows_search_algorithms;
Warnings:
Warning 1287 '@@slave_rows_search_algorithms' is deprecated and will be removed in a future release.
include/rpl_end.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Bug#115741 Test whether setting slave_rows_search_algorithms to HASH_SCAN works properly
# when multiple updates are targeted to the same row within a single update rows log event

--source include/have_binlog_format_row.inc
--source include/set_privilege_checks_user_as_system_user.inc
--source include/master-slave.inc

--source include/rpl_connection_slave.inc
set @prior_slave_rows_search_algorithms=@@global.slave_rows_search_algorithms;
set @@global.slave_rows_search_algorithms= 'HASH_SCAN';

# Scenarios considered in this test case involve a table with no primary key but an index on the column
# When slave_rows_search_algorithms is set to HASH_SCAN, a list "m_distinct_keys" is created internally
# to store the unique key values which, later will be used to perform a table scan to apply the events
# for the rows present in the table.
--source include/rpl_connection_master.inc
# Create a table, with no primary key and an index.
CREATE TABLE t1 (i INT, INDEX t1_i(i));

# Scenario no.1
# Sorting the elements of m_distinct_keys doesn't alter the original order of updates
# Create a stored function so that only one Update_rows_log_event is generated.
--delimiter |
CREATE FUNCTION f1 () RETURNS INT BEGIN
UPDATE t1 SET i = 11 WHERE i = 10;
UPDATE t1 SET i = 12 WHERE i = 11;
RETURN 0;
END|
--delimiter ;
--source include/sync_slave_sql_with_master.inc

--source include/rpl_connection_master.inc
INSERT INTO t1 VALUES (10);
SELECT f1();

--source include/sync_slave_sql_with_master.inc
--let $assert_text= 'There is only one row in table t1'
--let $assert_cond= [SELECT COUNT(*) FROM t1] = 1
--source include/assert.inc

--source include/rpl_connection_master.inc

# Verify that there is no difference between tables of master and slave.
--let diff_tables=master:test.t1, slave:test.t1
--source include/diff_tables.inc


# Scenario no.2
# Sorting the elements of m_distinct_keys ALTERs the original order of updates causing
# the case where the key value from the list doesn't exist in the table yet until other
# UPDATEs are executed.
--delimiter |
CREATE FUNCTION f2 () RETURNS INT BEGIN
UPDATE t1 SET i = 11 WHERE i = 12;
UPDATE t1 SET i = 13 WHERE i = 11;
RETURN 0;
END|
--delimiter ;
--source include/sync_slave_sql_with_master.inc

--source include/rpl_connection_master.inc
SELECT f2();

--source include/sync_slave_sql_with_master.inc
--let $assert_text= 'There is only one row in table t1'
--let $assert_cond= [SELECT COUNT(*) FROM t1] = 1
--source include/assert.inc

--source include/rpl_connection_master.inc

# Verify that there is no difference between tables of master and slave.
--let diff_tables=master:test.t1, slave:test.t1
--source include/diff_tables.inc

# Cleanup
--source include/rpl_connection_master.inc
SELECT * FROM t1;
DROP FUNCTION f1;
DROP FUNCTION f2;
DROP TABLE t1;

--source include/rpl_connection_slave.inc
set @@global.slave_rows_search_algorithms= @prior_slave_rows_search_algorithms;
--source include/rpl_end.inc
25 changes: 11 additions & 14 deletions sql/log_event.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7870,7 +7870,7 @@ Rows_log_event::Rows_log_event(THD *thd_arg, TABLE *tbl_arg,
m_curr_row_end(nullptr),
m_key(nullptr),
m_key_info(nullptr),
m_distinct_keys(Key_compare(&m_key_info)),
m_distinct_keys(0, Key_hash(&m_key_info), Key_equal(&m_key_info)),
m_distinct_key_spare_buf(nullptr) {
DBUG_TRACE;
common_header->type_code = event_type;
Expand Down Expand Up @@ -7961,7 +7961,7 @@ Rows_log_event::Rows_log_event(
m_curr_row_end(nullptr),
m_key(nullptr),
m_key_info(nullptr),
m_distinct_keys(Key_compare(&m_key_info)),
m_distinct_keys(0, Key_hash(&m_key_info), Key_equal(&m_key_info)),
m_distinct_key_spare_buf(nullptr)
#endif
{
Expand Down Expand Up @@ -9052,9 +9052,9 @@ int Rows_log_event::next_record_scan(bool first_read) {
marker according to the next key value that we have in the list.
*/
if (error) {
if (m_itr != m_distinct_keys.end()) {
m_key = *m_itr;
m_itr++;
if (m_distinct_key_idx != m_distinct_keys_original_order.size()) {
m_key = m_distinct_keys_original_order[m_distinct_key_idx];
m_distinct_key_idx++;
first_read = true;
} else {
if (!is_trx_retryable_upon_engine_error(error))
Expand Down Expand Up @@ -9088,14 +9088,11 @@ int Rows_log_event::open_record_scan() {

if (m_key_index < MAX_KEY) {
if (m_rows_lookup_algorithm == ROW_LOOKUP_HASH_SCAN) {
/* initialize the iterator over the list of distinct keys that we have */
m_itr = m_distinct_keys.begin();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we change this to m_distinct_key_id = 0?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

m_distinct_key_idx is already set to 0 when it is declared.


/* get the first element from the list of keys and increment the
iterator
/* get the first element from the vector of keys and increment the
index
*/
m_key = *m_itr;
m_itr++;
m_key = m_distinct_keys_original_order[m_distinct_key_idx];
m_distinct_key_idx++;
} else {
/* this is an INDEX_SCAN we need to store the key in m_key */
assert((m_rows_lookup_algorithm == ROW_LOOKUP_INDEX_SCAN) && m_key);
Expand Down Expand Up @@ -9142,9 +9139,9 @@ int Rows_log_event::add_key_to_distinct_keyset() {
DBUG_TRACE;
assert(m_key_index < MAX_KEY);
key_copy(m_distinct_key_spare_buf, m_table->record[0], m_key_info, 0);
std::pair<std::set<uchar *, Key_compare>::iterator, bool> ret =
m_distinct_keys.insert(m_distinct_key_spare_buf);
auto ret = m_distinct_keys.insert(m_distinct_key_spare_buf);
if (ret.second) {
m_distinct_keys_original_order.push_back(m_distinct_key_spare_buf);
/* Insert is successful, so allocate a new buffer for next key */
m_distinct_key_spare_buf = (uchar *)thd->alloc(m_key_info->key_length);
if (!m_distinct_key_spare_buf) {
Expand Down
38 changes: 29 additions & 9 deletions sql/log_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
#include <functional>
#include <list>
#include <map>
#include <set>
#include <unordered_set>
#include <string>
#include <string_view>

Expand Down Expand Up @@ -2882,30 +2882,50 @@ class Rows_log_event : public virtual binary_log::Rows_event, public Log_event {
uchar *m_key; /* Buffer to keep key value during searches */
uint m_key_index;
KEY *m_key_info; /* Points to description of index #m_key_index */
class Key_compare {
class Key_equal {
public:
/**
@param ki Where to find KEY description
@note m_distinct_keys is instantiated when Rows_log_event is constructed;
it stores a Key_compare object internally. However at that moment, the
it stores a Key_equal object internally. However at that moment, the
index (KEY*) to use for comparisons, is not yet known. So, at
instantiation, we indicate the Key_compare the place where it can
instantiation, we indicate the Key_equal the place where it can
find the KEY* when needed (this place is Rows_log_event::m_key_info),
Key_compare remembers the place in member m_key_info.
Key_equal remembers the place in member m_key_info.
Before we need to do comparisons - i.e. before we need to insert
elements, we update Rows_log_event::m_key_info once for all.
*/
Key_compare(KEY **ki = nullptr) : m_key_info(ki) {}
Key_equal(KEY **ki = nullptr) : m_key_info(ki) {}
bool operator()(uchar *k1, uchar *k2) const {
return key_cmp2((*m_key_info)->key_part, k1, (*m_key_info)->key_length,
k2, (*m_key_info)->key_length) < 0;
k2, (*m_key_info)->key_length) == 0;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rename this helper class to Key_equal.
Compare may be misleading as it may mean comparator and in your scenario this is no longer the case. All you do now is checking for equality.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

}

private:
KEY **m_key_info;
};
std::set<uchar *, Key_compare> m_distinct_keys;
std::set<uchar *, Key_compare>::iterator m_itr;
class Key_hash {
public:
Key_hash(KEY **ki = nullptr) : m_key_info(ki) {}
size_t operator()(uchar* ptr) const {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May be just create a std::string_view from ptr and (*m_key_info)->key_length and calculate a hash of this string_view using standard hash function.

std::string_view sv{ptr, (*m_key_info)->key_length};
return std::hash(sv);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the suggestion. Done.

size_t hash = 0;
if (ptr) {
std::string_view sv{reinterpret_cast<const char*>(ptr),
(*m_key_info)->key_length};
return std::hash<std::string_view>{}(sv);
}
return hash;
}
private:
KEY **m_key_info;
};
std::unordered_set<uchar *, Key_hash, Key_equal> m_distinct_keys;

/**
A linear list to store the distinct keys preserving the insert order
*/
std::vector<uchar *> m_distinct_keys_original_order;
std::size_t m_distinct_key_idx = 0;
/**
A spare buffer which will be used when saving the distinct keys
for doing an index scan with HASH_SCAN search algorithm.
Expand Down