From f5d6c93498765fa2b0c74416c787f6d1c3c8ecf2 Mon Sep 17 00:00:00 2001 From: heyuchen Date: Thu, 15 Oct 2020 13:21:46 +0800 Subject: [PATCH 1/3] feat(split): update register_child with split_status --- src/meta/meta_split_service.cpp | 101 ++++++++------ src/meta/test/meta_split_service_test.cpp | 128 +++++++++++------ src/replica/replica_check.cpp | 5 + src/replica/replica_config.cpp | 3 +- src/replica/split/replica_split_manager.cpp | 129 ++++++++++++------ src/replica/split/test/replica_split_test.cpp | 108 +++++++-------- src/replica/test/mock_utils.h | 2 + src/replication.thrift | 1 + 8 files changed, 293 insertions(+), 184 deletions(-) diff --git a/src/meta/meta_split_service.cpp b/src/meta/meta_split_service.cpp index e64b716dfb..3e9ea88813 100644 --- a/src/meta/meta_split_service.cpp +++ b/src/meta/meta_split_service.cpp @@ -131,47 +131,74 @@ void meta_split_service::do_start_partition_split(std::shared_ptr app void meta_split_service::register_child_on_meta(register_child_rpc rpc) { const auto &request = rpc.request(); + const std::string &app_name = request.app.app_name; auto &response = rpc.response(); response.err = ERR_IO_PENDING; zauto_write_lock(app_lock()); - std::shared_ptr app = _state->get_app(request.app.app_id); - dassert_f(app != nullptr, "app is not existed, id({})", request.app.app_id); - dassert_f(app->is_stateful, "app is stateless currently, id({})", request.app.app_id); - - dsn::gpid parent_gpid = request.parent_config.pid; - dsn::gpid child_gpid = request.child_config.pid; - const partition_configuration &parent_config = - app->partitions[parent_gpid.get_partition_index()]; - const partition_configuration &child_config = app->partitions[child_gpid.get_partition_index()]; - config_context &parent_context = app->helpers->contexts[parent_gpid.get_partition_index()]; - - if (request.parent_config.ballot < parent_config.ballot) { - dwarn_f("partition({}) register child failed, request is out-dated, request ballot = {}, " - "meta ballot = {}", - parent_gpid, - request.parent_config.ballot, - parent_config.ballot); + std::shared_ptr app = _state->get_app(app_name); + dassert_f(app != nullptr, "app({}) is not existed", app_name); + dassert_f(app->is_stateful, "app({}) is stateless currently", app_name); + + const gpid &parent_gpid = request.parent_config.pid; + const gpid &child_gpid = request.child_config.pid; + const auto &parent_config = app->partitions[parent_gpid.get_partition_index()]; + if (request.parent_config.ballot != parent_config.ballot) { + derror_f("app({}) partition({}) register child({}) failed, request is out-dated, request " + "parent ballot = {}, local parent ballot = {}", + app_name, + parent_gpid, + child_gpid, + request.parent_config.ballot, + parent_config.ballot); response.err = ERR_INVALID_VERSION; + response.parent_config = parent_config; return; } - if (child_config.ballot != invalid_ballot) { - dwarn_f( - "duplicated register child request, child({}) has already been registered, ballot = {}", - child_gpid, - child_config.ballot); + config_context &parent_context = app->helpers->contexts[parent_gpid.get_partition_index()]; + if (parent_context.stage == config_status::pending_proposal || + parent_context.stage == config_status::pending_remote_sync) { + dwarn_f("app({}) partition({}): another request is syncing with remote storage, ignore " + "this request", + app_name, + parent_gpid); + return; + } + + // TODO(heyuchen): pause/cancel split check + + auto iter = app->helpers->split_states.status.find(parent_gpid.get_partition_index()); + if (iter == app->helpers->split_states.status.end()) { + derror_f( + "duplicated register request, app({}) child partition({}) has already been registered", + app_name, + child_gpid); + const auto &child_config = app->partitions[child_gpid.get_partition_index()]; + dassert_f(child_config.ballot > 0, + "app({}) partition({}) should have been registered", + app_name, + child_gpid); response.err = ERR_CHILD_REGISTERED; + response.parent_config = parent_config; return; } - if (parent_context.stage == config_status::pending_proposal || - parent_context.stage == config_status::pending_remote_sync) { - dwarn_f("another request is syncing with remote storage, ignore this request"); + if (iter->second != split_status::SPLITTING) { + derror_f( + "app({}) partition({}) register child({}) failed, current partition split_status = {}", + app_name, + parent_gpid, + child_gpid, + dsn::enum_to_string(iter->second)); + response.err = ERR_INVALID_STATE; return; } - ddebug_f("parent({}) will register child({})", parent_gpid, child_gpid); + app->helpers->split_states.status.erase(parent_gpid.get_partition_index()); + app->helpers->split_states.splitting_count--; + ddebug_f("app({}) parent({}) will register child({})", app_name, parent_gpid, child_gpid); + parent_context.stage = config_status::pending_remote_sync; parent_context.msg = rpc.dsn_request(); parent_context.pending_sync_task = add_child_on_remote_storage(rpc, true); @@ -211,19 +238,17 @@ void meta_split_service::on_add_child_on_remote_storage_reply(error_code ec, register_child_rpc rpc, bool create_new) { - zauto_write_lock(app_lock()); - const auto &request = rpc.request(); auto &response = rpc.response(); + const std::string &app_name = request.app.app_name; - std::shared_ptr app = _state->get_app(request.app.app_id); - dassert_f(app != nullptr, "app is not existed, id({})", request.app.app_id); - dassert_f(app->status == app_status::AS_AVAILABLE || app->status == app_status::AS_DROPPING, - "app is not available now, id({})", - request.app.app_id); + zauto_write_lock(app_lock()); + std::shared_ptr app = _state->get_app(app_name); + dassert_f(app != nullptr, "app({}) is not existed", app_name); + dassert_f(app->is_stateful, "app({}) is stateless currently", app_name); - dsn::gpid parent_gpid = request.parent_config.pid; - dsn::gpid child_gpid = request.child_config.pid; + const gpid &parent_gpid = request.parent_config.pid; + const gpid &child_gpid = request.child_config.pid; config_context &parent_context = app->helpers->contexts[parent_gpid.get_partition_index()]; if (ec == ERR_TIMEOUT || @@ -241,7 +266,7 @@ void meta_split_service::on_add_child_on_remote_storage_reply(error_code ec, std::chrono::seconds(delay)); return; } - dassert_f(ec == ERR_OK, "we can't handle this right now, err = {}", ec.to_string()); + dassert_f(ec == ERR_OK, "we can't handle this right now, err = {}", ec); ddebug_f("parent({}) resgiter child({}) on remote storage succeed", parent_gpid, child_gpid); @@ -257,8 +282,6 @@ void meta_split_service::on_add_child_on_remote_storage_reply(error_code ec, child_config.secondaries = request.child_config.secondaries; _state->update_configuration_locally(*app, update_child_request); - parent_context.pending_sync_task = nullptr; - parent_context.stage = config_status::not_pending; if (parent_context.msg) { response.err = ERR_OK; response.app = *app; @@ -266,6 +289,8 @@ void meta_split_service::on_add_child_on_remote_storage_reply(error_code ec, response.child_config = app->partitions[child_gpid.get_partition_index()]; parent_context.msg = nullptr; } + parent_context.pending_sync_task = nullptr; + parent_context.stage = config_status::not_pending; } } // namespace replication diff --git a/src/meta/test/meta_split_service_test.cpp b/src/meta/test/meta_split_service_test.cpp index 6cb479476b..d28d30094a 100644 --- a/src/meta/test/meta_split_service_test.cpp +++ b/src/meta/test/meta_split_service_test.cpp @@ -42,6 +42,13 @@ class meta_split_service_test : public meta_test_base { meta_test_base::SetUp(); create_app(NAME, PARTITION_COUNT); + app = find_app(NAME); + } + + void TearDown() + { + app.reset(); + meta_test_base::TearDown(); } error_code start_partition_split(const std::string &app_name, int new_partition_count) @@ -56,48 +63,30 @@ class meta_split_service_test : public meta_test_base return rpc.response().err; } - register_child_response - register_child(ballot req_parent_ballot, ballot child_ballot, bool wait_zk = false) + error_code register_child(int32_t parent_index, ballot req_parent_ballot, bool wait_zk) { - // mock local app info - auto app = find_app(NAME); - app->partition_count *= 2; - app->partitions.resize(app->partition_count); - app->helpers->contexts.resize(app->partition_count); - for (int i = 0; i < app->partition_count; ++i) { - app->helpers->contexts[i].config_owner = &app->partitions[i]; - app->partitions[i].pid = dsn::gpid(app->app_id, i); - if (i >= app->partition_count / 2) { - app->partitions[i].ballot = invalid_ballot; - } else { - app->partitions[i].ballot = PARENT_BALLOT; - } - } - app->partitions[CHILD_INDEX].ballot = child_ballot; - - // mock node state - node_state node; - node.put_partition(dsn::gpid(app->app_id, PARENT_INDEX), true); - mock_node_state(dsn::rpc_address("127.0.0.1", 10086), node); - - // mock register_child_request partition_configuration parent_config; parent_config.ballot = req_parent_ballot; parent_config.last_committed_decree = 5; parent_config.max_replica_count = 3; - parent_config.pid = dsn::gpid(app->app_id, PARENT_INDEX); + parent_config.pid = gpid(app->app_id, parent_index); - dsn::partition_configuration child_config; + partition_configuration child_config; child_config.ballot = PARENT_BALLOT + 1; child_config.last_committed_decree = 5; - child_config.pid = dsn::gpid(app->app_id, CHILD_INDEX); + child_config.pid = gpid(app->app_id, parent_index + PARTITION_COUNT); + + // mock node state + node_state node; + node.put_partition(gpid(app->app_id, PARENT_INDEX), true); + mock_node_state(NODE, node); - // register_child_request request; auto request = dsn::make_unique(); + request->app.app_name = app->app_name; request->app.app_id = app->app_id; request->parent_config = parent_config; request->child_config = child_config; - request->primary_address = dsn::rpc_address("127.0.0.1", 10086); + request->primary_address = NODE; register_child_rpc rpc(std::move(request), RPC_CM_REGISTER_CHILD_REPLICA); split_svc().register_child_on_meta(rpc); @@ -105,7 +94,33 @@ class meta_split_service_test : public meta_test_base if (wait_zk) { std::this_thread::sleep_for(std::chrono::milliseconds(100)); } - return rpc.response(); + return rpc.response().err; + } + + void mock_app_partition_split_context() + { + app->partition_count = NEW_PARTITION_COUNT; + app->partitions.resize(app->partition_count); + app->helpers->contexts.resize(app->partition_count); + app->helpers->split_states.splitting_count = app->partition_count / 2; + for (int i = 0; i < app->partition_count; ++i) { + app->helpers->contexts[i].config_owner = &app->partitions[i]; + app->partitions[i].pid = gpid(app->app_id, i); + if (i >= app->partition_count / 2) { + app->partitions[i].ballot = invalid_ballot; + } else { + app->partitions[i].ballot = PARENT_BALLOT; + app->helpers->contexts[i].stage = config_status::not_pending; + app->helpers->split_states.status[i] = split_status::SPLITTING; + } + } + } + + void mock_child_registered() + { + app->partitions[CHILD_INDEX].ballot = PARENT_BALLOT; + app->helpers->split_states.splitting_count--; + app->helpers->split_states.status.erase(PARENT_INDEX); } const std::string NAME = "split_table"; @@ -114,6 +129,8 @@ class meta_split_service_test : public meta_test_base const int32_t PARENT_BALLOT = 3; const int32_t PARENT_INDEX = 0; const int32_t CHILD_INDEX = 4; + const rpc_address NODE = rpc_address("127.0.0.1", 10086); + std::shared_ptr app; }; // start split unit tests @@ -145,23 +162,44 @@ TEST_F(meta_split_service_test, start_split_test) } } -// TODO(heyuchen): refactor register unit tests -TEST_F(meta_split_service_test, register_child_with_wrong_ballot) -{ - auto resp = register_child(PARENT_BALLOT - 1, invalid_ballot); - ASSERT_EQ(resp.err, ERR_INVALID_VERSION); -} - -TEST_F(meta_split_service_test, register_child_with_child_registered) +// register child unit tests +TEST_F(meta_split_service_test, register_child_test) { - auto resp = register_child(PARENT_BALLOT, PARENT_BALLOT + 1); - ASSERT_EQ(resp.err, ERR_CHILD_REGISTERED); -} + // Test case: + // - request is out-dated + // - child has been registered + // - TODO(heyuchen): parent partition has been paused splitting + // - parent partition is sync config to remote storage + // - register child succeed + struct register_test + { + int32_t parent_ballot; + bool mock_child_registered; + bool mock_parent_paused; + bool mock_pending; + error_code expected_err; + bool wait_zk; + } tests[] = { + {PARENT_BALLOT - 1, false, false, false, ERR_INVALID_VERSION, false}, + {PARENT_BALLOT, true, false, false, ERR_CHILD_REGISTERED, false}, + {PARENT_BALLOT, false, false, true, ERR_IO_PENDING, false}, + {PARENT_BALLOT, false, false, false, ERR_OK, true}, + }; -TEST_F(meta_split_service_test, register_child_succeed) -{ - auto resp = register_child(PARENT_BALLOT, invalid_ballot, true); - ASSERT_EQ(resp.err, ERR_OK); + for (auto test : tests) { + mock_app_partition_split_context(); + if (test.mock_child_registered) { + mock_child_registered(); + } + if (test.mock_parent_paused) { + // TODO(heyuchen): mock split paused + } + if (test.mock_pending) { + app->helpers->contexts[PARENT_INDEX].stage = config_status::pending_remote_sync; + } + ASSERT_EQ(register_child(PARENT_INDEX, test.parent_ballot, test.wait_zk), + test.expected_err); + } } } // namespace replication diff --git a/src/replica/replica_check.cpp b/src/replica/replica_check.cpp index a87703ad39..267d756ca3 100644 --- a/src/replica/replica_check.cpp +++ b/src/replica/replica_check.cpp @@ -42,12 +42,15 @@ #include #include +#include namespace dsn { namespace replication { void replica::init_group_check() { + FAIL_POINT_INJECT_F("replica_init_group_check", [](dsn::string_view) {}); + _checker.only_one_thread_access(); ddebug("%s: init group check", name()); @@ -66,6 +69,8 @@ void replica::init_group_check() void replica::broadcast_group_check() { + FAIL_POINT_INJECT_F("replica_broadcast_group_check", [](dsn::string_view) {}); + dassert(nullptr != _primary_states.group_check_task, ""); ddebug("%s: start to broadcast group check", name()); diff --git a/src/replica/replica_config.cpp b/src/replica/replica_config.cpp index 633b09cadf..165d96fa73 100644 --- a/src/replica/replica_config.cpp +++ b/src/replica/replica_config.cpp @@ -628,7 +628,8 @@ bool replica::update_local_configuration(const replica_configuration &config, FAIL_POINT_INJECT_F("replica_update_local_configuration", [=](dsn::string_view) -> bool { auto old_status = status(); _config = config; - ddebug_replica("update status from {} to {}", enum_to_string(old_status), status()); + ddebug_replica( + "update status from {} to {}", enum_to_string(old_status), enum_to_string(status())); return true; }); diff --git a/src/replica/split/replica_split_manager.cpp b/src/replica/split/replica_split_manager.cpp index 07e5d614ed..92e29fd992 100644 --- a/src/replica/split/replica_split_manager.cpp +++ b/src/replica/split/replica_split_manager.cpp @@ -653,6 +653,21 @@ void replica_split_manager::register_child_on_meta(ballot b) // on primary paren return; } + if (status() != partition_status::PS_PRIMARY || _split_status != split_status::SPLITTING) { + derror_replica( + "wrong partition status or wrong split status, partition_status={}, split_status={}", + enum_to_string(status()), + enum_to_string(_split_status)); + _stub->split_replica_error_handler( + _child_gpid, + std::bind(&replica_split_manager::child_handle_split_error, + std::placeholders::_1, + "register child failed, wrong partition status or split status")); + _replica->_primary_states.sync_send_write_request = false; + parent_cleanup_split_context(); + return; + } + if (_replica->_primary_states.reconfiguration_task != nullptr) { dwarn_replica("under reconfiguration, delay and retry to register child"); _replica->_primary_states.register_child_task = @@ -728,65 +743,89 @@ void replica_split_manager::on_register_child_on_meta_reply( derror_replica("status wrong or stub is not connected, status = {}", enum_to_string(status())); _replica->_primary_states.register_child_task = nullptr; - // TODO(heyuchen): TBD - clear other split tasks in primary context return; } - // TODO(heyuchen): update following error handler error_code err = ec == ERR_OK ? response.err : ec; + if (err == ERR_INVALID_STATE || err == ERR_INVALID_VERSION || err == ERR_CHILD_REGISTERED) { + if (err == ERR_CHILD_REGISTERED) { + derror_replica( + "register child({}) failed, error = {}, child has already been registered", + request.child_config.pid, + err); + } else { + derror_replica("register child({}) failed, error = {}, request is out-of-dated", + request.child_config.pid, + err); + _stub->split_replica_error_handler( + request.child_config.pid, + std::bind(&replica_split_manager::child_handle_split_error, + std::placeholders::_1, + "register child failed, request is out-of-dated")); + } + parent_cleanup_split_context(); + _replica->_primary_states.register_child_task = nullptr; + _replica->_primary_states.sync_send_write_request = false; + if (response.parent_config.ballot >= get_ballot()) { + ddebug_replica("response ballot = {}, local ballot = {}, should update configuration", + response.parent_config.ballot, + get_ballot()); + _replica->update_configuration(response.parent_config); + } + return; + } + if (err != ERR_OK) { dwarn_replica( - "register child({}) failed, error = {}, request child ballot = {}, local ballot = {}", + "register child({}) failed, error = {}, wait and retry", request.child_config.pid, err); + _replica->_primary_states.register_child_task = tasking::enqueue( + LPC_PARTITION_SPLIT, + tracker(), + std::bind(&replica_split_manager::parent_send_register_request, this, request), + get_gpid().thread_hash(), + std::chrono::seconds(1)); + return; + } + + if (response.parent_config.ballot < get_ballot()) { + dwarn_replica( + "register child({}) failed, parent ballot from response is {}, local ballot is {}", request.child_config.pid, - err.to_string(), - request.child_config.ballot, + response.parent_config.ballot, get_ballot()); - - // register request is out-of-dated - if (err == ERR_INVALID_VERSION) { - return; - } - - // we need not resend register request if child has been registered - if (err != ERR_CHILD_REGISTERED) { - _replica->_primary_states.register_child_task = tasking::enqueue( - LPC_DELAY_UPDATE_CONFIG, - tracker(), - std::bind(&replica_split_manager::parent_send_register_request, this, request), - get_gpid().thread_hash(), - std::chrono::seconds(1)); - return; - } + _replica->_primary_states.register_child_task = tasking::enqueue( + LPC_PARTITION_SPLIT, + tracker(), + std::bind(&replica_split_manager::parent_send_register_request, this, request), + get_gpid().thread_hash(), + std::chrono::seconds(1)); + return; } - if (err == ERR_OK) { - ddebug_replica("register child({}) succeed, response parent ballot = {}, local ballot = " - "{}, local status = {}", - response.child_config.pid, - response.parent_config.ballot, - get_ballot(), - enum_to_string(status())); + ddebug_replica("register child({}) succeed, response parent ballot = {}, local ballot = " + "{}, local status = {}", + request.child_config.pid, + response.parent_config.ballot, + get_ballot(), + enum_to_string(status())); - dcheck_eq_replica(_replica->_app_info.partition_count * 2, response.app.partition_count); - _stub->split_replica_exec(LPC_PARTITION_SPLIT, - response.child_config.pid, - std::bind(&replica_split_manager::child_partition_active, - std::placeholders::_1, - response.child_config)); + dcheck_ge_replica(response.parent_config.ballot, get_ballot()); + dcheck_eq_replica(_replica->_app_info.partition_count * 2, response.app.partition_count); - // TODO(heyuchen): TBD - update parent group partition_count - } + _stub->split_replica_exec(LPC_PARTITION_SPLIT, + response.child_config.pid, + std::bind(&replica_split_manager::child_partition_active, + std::placeholders::_1, + response.child_config)); - // parent register child succeed or child partition has already resgitered - // in both situation, we should reset resgiter child task and child_gpid + // update parent config + _replica->update_configuration(response.parent_config); _replica->_primary_states.register_child_task = nullptr; - _child_gpid.set_app_id(0); - if (response.parent_config.ballot >= get_ballot()) { - ddebug_replica("response ballot = {}, local ballot = {}, should update configuration", - response.parent_config.ballot, - get_ballot()); - _replica->update_configuration(response.parent_config); - } + _replica->_primary_states.sync_send_write_request = false; + + // TODO(heyuchen): TBD - update parent group partition_count + + parent_cleanup_split_context(); } // ThreadPool: THREAD_POOL_REPLICATION diff --git a/src/replica/split/test/replica_split_test.cpp b/src/replica/split/test/replica_split_test.cpp index 372334b5d1..e469d223f9 100644 --- a/src/replica/split/test/replica_split_test.cpp +++ b/src/replica/split/test/replica_split_test.cpp @@ -51,23 +51,6 @@ class replica_split_test : public replica_test_base _app_info.partition_count = OLD_PARTITION_COUNT; } - // TODO(heyuchen): refactor it - void mock_register_child_request() - { - partition_configuration &p_config = _register_req.parent_config; - p_config.pid = PARENT_GPID; - p_config.ballot = INIT_BALLOT; - p_config.last_committed_decree = DECREE; - - partition_configuration &c_config = _register_req.child_config; - c_config.pid = CHILD_GPID; - c_config.ballot = INIT_BALLOT + 1; - c_config.last_committed_decree = 0; - - _register_req.app = _app_info; - _register_req.primary_address = dsn::rpc_address("127.0.0.1", 10086); - } - void generate_child() { _child_replica = stub->generate_replica( @@ -264,23 +247,39 @@ class replica_split_test : public replica_test_base void test_register_child_on_meta() { + parent_set_split_status(split_status::SPLITTING); _parent_split_mgr->register_child_on_meta(INIT_BALLOT); _parent_replica->tracker()->wait_outstanding_tasks(); } - void test_on_register_child_rely(partition_status::type status, dsn::error_code resp_err) + void test_on_register_child_reply(partition_status::type status, dsn::error_code resp_err) { - mock_register_child_request(); - _parent_replica->_config.status = status; + stub->set_state_connected(); + stub->set_rpc_address(PRIMARY); + mock_parent_split_context(status); + _parent_replica->_primary_states.sync_send_write_request = true; + _parent_split_mgr->_partition_version = -1; + _parent_replica->_inactive_is_transient = true; + + register_child_request req; + req.app = _app_info; + req.parent_config.pid = PARENT_GPID; + req.parent_config.ballot = INIT_BALLOT; + req.parent_config.last_committed_decree = DECREE; + req.parent_config.primary = PRIMARY; + req.child_config.pid = CHILD_GPID; + req.child_config.ballot = INIT_BALLOT + 1; + req.child_config.last_committed_decree = 0; + req.primary_address = PRIMARY; register_child_response resp; resp.err = resp_err; - resp.app = _register_req.app; + resp.app = req.app; resp.app.partition_count *= 2; - resp.parent_config = _register_req.parent_config; - resp.child_config = _register_req.child_config; + resp.parent_config = req.parent_config; + resp.child_config = req.child_config; - _parent_split_mgr->on_register_child_on_meta_reply(ERR_OK, _register_req, resp); + _parent_split_mgr->on_register_child_on_meta_reply(ERR_OK, req, resp); _parent_replica->tracker()->wait_outstanding_tasks(); } @@ -340,8 +339,7 @@ class replica_split_test : public replica_test_base std::unique_ptr _parent_split_mgr; std::unique_ptr _child_split_mgr; - dsn::app_info _app_info; - register_child_request _register_req; + app_info _app_info; std::vector _private_log_files; std::vector _mutation_list; prepare_list *_mock_plist; @@ -527,38 +525,38 @@ TEST_F(replica_split_test, register_child_test) ASSERT_EQ(_parent_split_mgr->get_partition_version(), -1); } -// TODO(heyuchen): refactor register child reply -TEST_F(replica_split_test, register_child_reply_with_wrong_status) -{ - generate_child(); - mock_child_split_context(true, true); - - test_on_register_child_rely(partition_status::PS_PRIMARY, ERR_OK); - primary_context parent_primary_states = get_replica_primary_context(_parent_replica); - ASSERT_EQ(parent_primary_states.register_child_task, nullptr); -} - -TEST_F(replica_split_test, register_child_reply_with_child_registered) +// register_child_reply tests +TEST_F(replica_split_test, register_child_reply_test) { + fail::cfg("replica_init_group_check", "return()"); + fail::cfg("replica_broadcast_group_check", "return()"); generate_child(); - mock_child_split_context(true, true); - test_on_register_child_rely(partition_status::PS_INACTIVE, ERR_CHILD_REGISTERED); - - primary_context parent_primary_states = get_replica_primary_context(_parent_replica); - ASSERT_EQ(parent_primary_states.register_child_task, nullptr); - ASSERT_TRUE(is_parent_not_in_split()); -} - -TEST_F(replica_split_test, register_child_reply_succeed) -{ - generate_child(); - mock_child_split_context(true, true); - - fail::cfg("replica_stub_split_replica_exec", "return()"); - test_on_register_child_rely(partition_status::PS_INACTIVE, ERR_OK); - - ASSERT_TRUE(is_parent_not_in_split()); + // Test cases: + // - wrong partition status + // - response error = INVALID_STATE + // - response error = CHILD_REGISTERED + // - TODO(heyuchen): add response error = OK + // ({partition_status::PS_INACTIVE, ERR_OK, NEW_PARTITION_COUNT - 1}) + // after implement parent update group partition count + struct register_child_reply_test + { + partition_status::type parent_partition_status; + error_code resp_err; + int32_t expected_parent_partition_version; + } tests[] = {{partition_status::PS_PRIMARY, ERR_OK, -1}, + {partition_status::PS_INACTIVE, ERR_INVALID_STATE, -1}, + {partition_status::PS_INACTIVE, ERR_CHILD_REGISTERED, -1}}; + for (auto test : tests) { + mock_child_split_context(true, true); + test_on_register_child_reply(test.parent_partition_status, test.resp_err); + ASSERT_EQ(_parent_replica->status(), partition_status::PS_PRIMARY); + if (test.parent_partition_status == partition_status::PS_INACTIVE) { + // TODO(heyuchen): check primary parent finish split + ASSERT_EQ(_parent_split_mgr->get_partition_version(), + test.expected_parent_partition_version); + } + } } } // namespace replication diff --git a/src/replica/test/mock_utils.h b/src/replica/test/mock_utils.h index e976977c2b..08d9dafab6 100644 --- a/src/replica/test/mock_utils.h +++ b/src/replica/test/mock_utils.h @@ -270,6 +270,8 @@ class mock_replica_stub : public replica_stub { _bulk_load_downloading_count.store(count); } + + void set_rpc_address(const rpc_address &address) { _primary_address = address; } }; class mock_log_file : public log_file diff --git a/src/replication.thrift b/src/replication.thrift index a5ae545cae..5a525a4cc5 100644 --- a/src/replication.thrift +++ b/src/replication.thrift @@ -890,6 +890,7 @@ struct register_child_response // - ERR_INVALID_VERSION: request is out-dated // - ERR_CHILD_REGISTERED: child has been registered // - ERR_IO_PENDING: meta is executing another remote sync task + // - ERR_INVALID_STATE: parent partition is not splitting 1:dsn.error_code err; 2:dsn.layer2.app_info app; 3:dsn.layer2.partition_configuration parent_config; From 20b3265ef3bbdbf9804e155b2550f6f844d5f596 Mon Sep 17 00:00:00 2001 From: heyuchen Date: Thu, 15 Oct 2020 14:46:01 +0800 Subject: [PATCH 2/3] remove useless code --- src/replica/split/replica_split_manager.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/replica/split/replica_split_manager.cpp b/src/replica/split/replica_split_manager.cpp index 92e29fd992..ad8e0bd574 100644 --- a/src/replica/split/replica_split_manager.cpp +++ b/src/replica/split/replica_split_manager.cpp @@ -648,11 +648,6 @@ void replica_split_manager::parent_check_sync_point_commit(decree sync_point) // // ThreadPool: THREAD_POOL_REPLICATION void replica_split_manager::register_child_on_meta(ballot b) // on primary parent { - if (status() != partition_status::PS_PRIMARY) { - dwarn_replica("failed to register child, status = {}", enum_to_string(status())); - return; - } - if (status() != partition_status::PS_PRIMARY || _split_status != split_status::SPLITTING) { derror_replica( "wrong partition status or wrong split status, partition_status={}, split_status={}", From 532e0806647ff2cb49af23f1b767a83eec0e1ae5 Mon Sep 17 00:00:00 2001 From: heyuchen Date: Tue, 20 Oct 2020 09:30:48 +0800 Subject: [PATCH 3/3] small fix --- src/meta/meta_split_service.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/meta/meta_split_service.cpp b/src/meta/meta_split_service.cpp index 3e9ea88813..c1a1515135 100644 --- a/src/meta/meta_split_service.cpp +++ b/src/meta/meta_split_service.cpp @@ -144,7 +144,7 @@ void meta_split_service::register_child_on_meta(register_child_rpc rpc) const gpid &child_gpid = request.child_config.pid; const auto &parent_config = app->partitions[parent_gpid.get_partition_index()]; if (request.parent_config.ballot != parent_config.ballot) { - derror_f("app({}) partition({}) register child({}) failed, request is out-dated, request " + derror_f("app({}) partition({}) register child({}) failed, request is outdated, request " "parent ballot = {}, local parent ballot = {}", app_name, parent_gpid,