From a5360e8caa8f1ccf54f0236fa65920a812b937b4 Mon Sep 17 00:00:00 2001 From: Nikita Zheleztsov Date: Tue, 30 Jan 2024 00:15:48 +0300 Subject: [PATCH] replicaset: validate name in locate_master Currently, the name validation is not used, when locate_master() is called. It makes an explicit call via the connection to obtain a future object in order to figure out, whether the node is a master. We should not make any calls to a replica until the time, we definitely know, that its name and uuid was validated. Let's use replica_call instead of conn.call. Follow-up #426 NO_DOC=bugfix --- test/replicaset-luatest/vconnect_test.lua | 27 +++++++++++++++++++++ test/router/router.result | 3 ++- vshard/replicaset.lua | 29 ++++++++++++++++------- 3 files changed, 50 insertions(+), 9 deletions(-) diff --git a/test/replicaset-luatest/vconnect_test.lua b/test/replicaset-luatest/vconnect_test.lua index b00ce872..da1c3c49 100644 --- a/test/replicaset-luatest/vconnect_test.lua +++ b/test/replicaset-luatest/vconnect_test.lua @@ -187,3 +187,30 @@ test_group.test_async_no_yield = function(g) ivshard.storage._call = _G._call end) end + +-- +-- Test, that during master search name is validated. +-- +test_group.test_locate_master = function() + -- Replace name with the bad one. + local new_cfg = table.deepcopy(global_cfg) + local cfg_rs = new_cfg.sharding.replicaset + cfg_rs.replicas.bad = cfg_rs.replicas.replica + cfg_rs.replicas.replica = nil + local _, rs = next(vreplicaset.buildall(new_cfg)) + + -- Avoid noop in locate_master. + rs.master = nil + rs.is_master_auto = true + -- Retry, until the connection is established and + -- name mismach error is encountered. + local ok, is_nop, last_err + t.helpers.retrying(timeout_opts, function() + ok, is_nop, last_err = rs:locate_master() + t.assert_not_equals(last_err, nil) + end) + + t.assert_equals(last_err.name, 'INSTANCE_NAME_MISMATCH') + t.assert_equals(is_nop, false) + t.assert_equals(ok, false) +end diff --git a/test/router/router.result b/test/router/router.result index 0fca93f6..ea8319d8 100644 --- a/test/router/router.result +++ b/test/router/router.result @@ -1525,7 +1525,8 @@ table.sort(error_messages) ... error_messages --- -- - Use replica:detach_conn(...) instead of replica.detach_conn(...) +- - Use replica:check_is_connected(...) instead of replica.check_is_connected(...) + - Use replica:detach_conn(...) instead of replica.detach_conn(...) - Use replica:is_connected(...) instead of replica.is_connected(...) - Use replica:safe_uri(...) instead of replica.safe_uri(...) ... diff --git a/vshard/replicaset.lua b/vshard/replicaset.lua index 5fbecf75..363037e7 100644 --- a/vshard/replicaset.lua +++ b/vshard/replicaset.lua @@ -1060,7 +1060,7 @@ local function replicaset_locate_master(replicaset) local func = 'vshard.storage._call' local args = {'info'} local const_timeout = consts.MASTER_SEARCH_TIMEOUT - local ok, res, err, f + local ok, res, err local master = replicaset.master if master then local sync_opts = {timeout = const_timeout} @@ -1091,17 +1091,20 @@ local function replicaset_locate_master(replicaset) local futures = {} local timeout = const_timeout local deadline = fiber_clock() + timeout - local async_opts = {is_async = true} + local async_opts = {is_async = true, timeout = timeout} local replicaset_id = replicaset.id for replica_id, replica in pairs(replicaset.replicas) do - local conn = replicaset_connect_to_replica(replicaset, replica) - if conn:is_connected() then - ok, f = pcall(conn.call, conn, func, args, async_opts) - if not ok then - last_err = lerror.make(f) + replicaset_connect_to_replica(replicaset, replica) + ok, err = replica:check_is_connected() + if ok then + ok, res, err = replica_call(replica, func, args, async_opts) + if not ok and err ~= nil then + last_err = err else - futures[replica_id] = f + futures[replica_id] = res end + elseif err ~= nil then + last_err = err end end local master_id @@ -1201,6 +1204,16 @@ local replica_mt = { return replica.conn and replica.conn:is_connected() and conn_vconnect_check_or_close(replica.conn) end, + -- Does the same thing as is_connected(), but returns true or nil, err. + check_is_connected = function(replica) + if not replica.conn then + return nil, lerror.from_string("%s.conn is nil") + end + if not replica.conn:is_connected() then + return nil, lerror.from_string('%s.conn is not connected') + end + return conn_vconnect_check_or_close(replica.conn) + end, safe_uri = function(replica) local uri = luri.parse(replica.uri) uri.password = nil