diff --git a/include/dsn/dist/replication/replication_enums.h b/include/dsn/dist/replication/replication_enums.h index 575e5f0eb6..30ca1db2b4 100644 --- a/include/dsn/dist/replication/replication_enums.h +++ b/include/dsn/dist/replication/replication_enums.h @@ -21,6 +21,7 @@ ENUM_REG(replication::partition_status::PS_ERROR) ENUM_REG(replication::partition_status::PS_PRIMARY) ENUM_REG(replication::partition_status::PS_SECONDARY) ENUM_REG(replication::partition_status::PS_POTENTIAL_SECONDARY) +ENUM_REG(replication::partition_status::PS_PARTITION_SPLIT) ENUM_END2(replication::partition_status::type, partition_status) ENUM_BEGIN2(replication::read_semantic::type, @@ -98,4 +99,12 @@ ENUM_BEGIN2(replication::detect_action::type, detect_action, replication::detect ENUM_REG(replication::detect_action::START) ENUM_REG(replication::detect_action::STOP) ENUM_END2(replication::detect_action::type, detect_action) + +ENUM_BEGIN2(replication::split_status::type, split_status, replication::split_status::NOT_SPLIT) +ENUM_REG(replication::split_status::NOT_SPLIT) +ENUM_REG(replication::split_status::SPLITTING) +ENUM_REG(replication::split_status::PAUSING) +ENUM_REG(replication::split_status::PAUSED) +ENUM_REG(replication::split_status::CANCELING) +ENUM_END2(replication::split_status::type, split_status) } diff --git a/include/dsn/dist/replication/replication_types.h b/include/dsn/dist/replication/replication_types.h index 4c7fcb6724..38e209f16d 100644 --- a/include/dsn/dist/replication/replication_types.h +++ b/include/dsn/dist/replication/replication_types.h @@ -77,6 +77,20 @@ struct learner_status extern const std::map _learner_status_VALUES_TO_NAMES; +struct split_status +{ + enum type + { + NOT_SPLIT = 0, + SPLITTING = 1, + PAUSING = 2, + PAUSED = 3, + CANCELING = 4 + }; +}; + +extern const std::map _split_status_VALUES_TO_NAMES; + struct config_type { enum type diff --git a/src/common/replication_types.cpp b/src/common/replication_types.cpp index 664d5575d7..9585ec0799 100644 --- a/src/common/replication_types.cpp +++ b/src/common/replication_types.cpp @@ -65,6 +65,16 @@ const std::map _learner_status_VALUES_TO_NAMES( ::apache::thrift::TEnumIterator(6, _klearner_statusValues, _klearner_statusNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); +int _ksplit_statusValues[] = {split_status::NOT_SPLIT, + split_status::SPLITTING, + split_status::PAUSING, + split_status::PAUSED, + split_status::CANCELING}; +const char *_ksplit_statusNames[] = {"NOT_SPLIT", "SPLITTING", "PAUSING", "PAUSED", "CANCELING"}; +const std::map _split_status_VALUES_TO_NAMES( + ::apache::thrift::TEnumIterator(5, _ksplit_statusValues, _ksplit_statusNames), + ::apache::thrift::TEnumIterator(-1, NULL, NULL)); + int _kconfig_typeValues[] = {config_type::CT_INVALID, config_type::CT_ASSIGN_PRIMARY, config_type::CT_UPGRADE_TO_PRIMARY, diff --git a/src/meta/meta_data.h b/src/meta/meta_data.h index 47a005230a..75de29a2b3 100644 --- a/src/meta/meta_data.h +++ b/src/meta/meta_data.h @@ -284,6 +284,19 @@ struct restore_state restore_state() : restore_status(dsn::ERR_OK), progress(0), reason() {} }; +// app partition_split states +// when starting partition split, `splitting_count` will be equal to old_partition_count, +// will be inserted into `status`. +// if partition[0] finish split, `splitting_count` will decrease and <0, SPLITTING> will be removed +// in `status`. +struct split_state +{ + int32_t splitting_count; + // partition_index -> split_status + std::map status; + split_state() : splitting_count(0) {} +}; + class app_state; class app_state_helper { @@ -293,6 +306,7 @@ class app_state_helper std::vector contexts; dsn::message_ex *pending_response; std::vector restore_states; + split_state split_states; public: app_state_helper() : owner(nullptr), partitions_in_progress(0) diff --git a/src/meta/meta_split_service.cpp b/src/meta/meta_split_service.cpp index 6b6fe0ff55..e64b716dfb 100644 --- a/src/meta/meta_split_service.cpp +++ b/src/meta/meta_split_service.cpp @@ -71,17 +71,13 @@ void meta_split_service::start_partition_split(start_split_rpc rpc) return; } - for (const auto &partition_config : app->partitions) { - // partition already during split - if (partition_config.ballot < 0) { - response.err = ERR_BUSY; - dwarn_f("app is already during partition split, client({}) sent repeated split " - "request: app({}), new_partition_count({})", - rpc.remote_address().to_string(), - request.app_name, - request.new_partition_count); - return; - } + if (app->helpers->split_states.splitting_count > 0) { + response.err = ERR_BUSY; + auto err_msg = + fmt::format("app({}) is already executing partition split", request.app_name); + derror_f("{}", err_msg); + response.hint_msg = err_msg; + return; } } @@ -101,15 +97,18 @@ void meta_split_service::do_start_partition_split(std::shared_ptr app app->partition_count * 2); zauto_write_lock l(app_lock()); + app->helpers->split_states.splitting_count = app->partition_count; app->partition_count *= 2; app->helpers->contexts.resize(app->partition_count); app->partitions.resize(app->partition_count); for (int i = 0; i < app->partition_count; ++i) { app->helpers->contexts[i].config_owner = &app->partitions[i]; - if (i >= app->partition_count / 2) { + if (i >= app->partition_count / 2) { // child partitions app->partitions[i].ballot = invalid_ballot; app->partitions[i].pid = gpid(app->app_id, i); + } else { // parent partitions + app->helpers->split_states.status[i] = split_status::SPLITTING; } } diff --git a/src/meta/test/meta_split_service_test.cpp b/src/meta/test/meta_split_service_test.cpp index aeeed875e6..6cb479476b 100644 --- a/src/meta/test/meta_split_service_test.cpp +++ b/src/meta/test/meta_split_service_test.cpp @@ -109,34 +109,40 @@ class meta_split_service_test : public meta_test_base } const std::string NAME = "split_table"; - const uint32_t PARTITION_COUNT = 4; - const uint32_t NEW_PARTITION_COUNT = 8; - const uint32_t PARENT_BALLOT = 3; - const uint32_t PARENT_INDEX = 0; - const uint32_t CHILD_INDEX = 4; + const int32_t PARTITION_COUNT = 4; + const int32_t NEW_PARTITION_COUNT = 8; + const int32_t PARENT_BALLOT = 3; + const int32_t PARENT_INDEX = 0; + const int32_t CHILD_INDEX = 4; }; -// TODO(heyuchen): refactor start split unit tests -TEST_F(meta_split_service_test, start_split_with_not_existed_app) +// start split unit tests +TEST_F(meta_split_service_test, start_split_test) { - auto err = start_partition_split("table_not_exist", PARTITION_COUNT); - ASSERT_EQ(err, ERR_APP_NOT_EXIST); -} - -TEST_F(meta_split_service_test, start_split_with_wrong_params) -{ - auto app = find_app(NAME); - auto err = start_partition_split(NAME, PARTITION_COUNT); - ASSERT_EQ(err, ERR_INVALID_PARAMETERS); - ASSERT_EQ(app->partition_count, PARTITION_COUNT); -} - -TEST_F(meta_split_service_test, start_split_succeed) -{ - auto app = find_app(NAME); - auto err = start_partition_split(NAME, NEW_PARTITION_COUNT); - ASSERT_EQ(err, ERR_OK); - ASSERT_EQ(app->partition_count, NEW_PARTITION_COUNT); + // Test case: + // - app not existed + // - wrong partition_count + // - app already splitting + // - start split succeed + struct start_test + { + std::string app_name; + int32_t new_partition_count; + bool need_mock_splitting; + error_code expected_err; + int32_t expected_partition_count; + } tests[] = {{"table_not_exist", PARTITION_COUNT, false, ERR_APP_NOT_EXIST, PARTITION_COUNT}, + {NAME, PARTITION_COUNT, false, ERR_INVALID_PARAMETERS, PARTITION_COUNT}, + {NAME, NEW_PARTITION_COUNT, true, ERR_BUSY, PARTITION_COUNT}, + {NAME, NEW_PARTITION_COUNT, false, ERR_OK, NEW_PARTITION_COUNT}}; + + for (auto test : tests) { + auto app = find_app(NAME); + app->helpers->split_states.splitting_count = test.need_mock_splitting ? PARTITION_COUNT : 0; + ASSERT_EQ(start_partition_split(test.app_name, test.new_partition_count), + test.expected_err); + ASSERT_EQ(app->partition_count, test.expected_partition_count); + } } // TODO(heyuchen): refactor register unit tests diff --git a/src/replica/split/replica_split_manager.cpp b/src/replica/split/replica_split_manager.cpp index a1f26a9aa0..07e5d614ed 100644 --- a/src/replica/split/replica_split_manager.cpp +++ b/src/replica/split/replica_split_manager.cpp @@ -35,7 +35,8 @@ replica_split_manager::replica_split_manager(replica *r) replica_split_manager::~replica_split_manager() {} // ThreadPool: THREAD_POOL_REPLICATION -void replica_split_manager::on_add_child(const group_check_request &request) // on parent partition +void replica_split_manager::parent_start_split( + const group_check_request &request) // on parent partition { if (status() != partition_status::PS_PRIMARY && status() != partition_status::PS_SECONDARY && (status() != partition_status::PS_INACTIVE || !_replica->_inactive_is_transient)) { @@ -53,14 +54,12 @@ void replica_split_manager::on_add_child(const group_check_request &request) // return; } - gpid child_gpid = request.child_gpid; - if (_child_gpid == child_gpid) { - dwarn_replica("child replica({}) is already existed, might be partition splitting, ignore " - "this request", - child_gpid); + if (_split_status == split_status::SPLITTING) { + dwarn_replica("partition is already splitting, ignore this request"); return; } + gpid child_gpid = request.child_gpid; if (child_gpid.get_partition_index() < _replica->_app_info.partition_count) { dwarn_replica( "receive old add child request, child_gpid={}, partition_count={}, ignore this request", @@ -69,6 +68,11 @@ void replica_split_manager::on_add_child(const group_check_request &request) // return; } + // TODO(heyuchen): if partition is primary, reset split related varieties + + _partition_version.store(_replica->_app_info.partition_count - 1); + + _split_status = split_status::SPLITTING; _child_gpid = child_gpid; _child_init_ballot = get_ballot(); @@ -137,12 +141,15 @@ bool replica_split_manager::parent_check_states() // on parent partition { FAIL_POINT_INJECT_F("replica_parent_check_states", [](dsn::string_view) { return true; }); - if (_child_init_ballot != get_ballot() || _child_gpid.get_app_id() == 0 || + if (_split_status != split_status::SPLITTING || _child_init_ballot != get_ballot() || + _child_gpid.get_app_id() == 0 || (status() != partition_status::PS_PRIMARY && status() != partition_status::PS_SECONDARY && (status() != partition_status::PS_INACTIVE || !_replica->_inactive_is_transient))) { - dwarn_replica("parent wrong states: status({}), init_ballot({}) VS current_ballot({}), " + dwarn_replica("parent wrong states: status({}), split_status({}), init_ballot({}) VS " + "current_ballot({}), " "child_gpid({})", enum_to_string(status()), + enum_to_string(_split_status), _child_init_ballot, get_ballot(), _child_gpid); @@ -541,8 +548,12 @@ void replica_split_manager::parent_handle_child_catch_up( const notify_catch_up_request &request, notify_cacth_up_response &response) // on primary parent { - if (status() != partition_status::PS_PRIMARY) { - derror_replica("status is {}", enum_to_string(status())); + if (status() != partition_status::PS_PRIMARY || _split_status != split_status::SPLITTING) { + derror_replica( + "wrong partition status or wrong split status, partition_status={}, split_status={}", + enum_to_string(status()), + enum_to_string(_split_status)); + response.err = ERR_INVALID_STATE; return; } @@ -798,6 +809,7 @@ void replica_split_manager::parent_cleanup_split_context() // on parent partitio { _child_gpid.set_app_id(0); _child_init_ballot = 0; + _split_status = split_status::NOT_SPLIT; } // ThreadPool: THREAD_POOL_REPLICATION diff --git a/src/replica/split/replica_split_manager.h b/src/replica/split/replica_split_manager.h index 534a6f0e5d..200e6c7882 100644 --- a/src/replica/split/replica_split_manager.h +++ b/src/replica/split/replica_split_manager.h @@ -35,8 +35,8 @@ class replica_split_manager : replica_base void set_child_gpid(gpid pid) { _child_gpid = pid; } private: - // parent partition create child - void on_add_child(const group_check_request &request); + // parent partition start split + void parent_start_split(const group_check_request &request); // child replica initialize config and state info void child_init_replica(gpid parent_gpid, rpc_address primary_address, ballot init_ballot); @@ -119,6 +119,8 @@ class replica_split_manager : replica_base friend class replica_stub; friend class replica_split_test; + split_status::type _split_status{split_status::NOT_SPLIT}; + // _child_gpid = gpid({app_id},{pidx}+{old_partition_count}) for parent partition // _child_gpid.app_id = 0 for parent partition not in partition split and child partition gpid _child_gpid{0, 0}; diff --git a/src/replica/split/test/replica_split_test.cpp b/src/replica/split/test/replica_split_test.cpp index 45da0656bf..372334b5d1 100644 --- a/src/replica/split/test/replica_split_test.cpp +++ b/src/replica/split/test/replica_split_test.cpp @@ -93,6 +93,7 @@ class replica_split_test : public replica_test_base void mock_parent_split_context(partition_status::type status) { + parent_set_split_status(split_status::SPLITTING); _parent_split_mgr->_child_gpid = CHILD_GPID; _parent_split_mgr->_child_init_ballot = INIT_BALLOT; _parent_replica->set_partition_status(status); @@ -182,15 +183,16 @@ class replica_split_test : public replica_test_base } /// test functions - - void test_on_add_child(ballot b, gpid req_child_gpid) + void test_parent_start_split(ballot b, gpid req_child_gpid, split_status::type status) { + parent_set_split_status(status); + group_check_request req; req.config.ballot = b; req.config.status = partition_status::PS_PRIMARY; req.__set_child_gpid(req_child_gpid); - _parent_split_mgr->on_add_child(req); + _parent_split_mgr->parent_start_split(req); _parent_replica->tracker()->wait_outstanding_tasks(); } @@ -297,6 +299,12 @@ class replica_split_test : public replica_test_base } bool child_is_caught_up() { return _child_replica->_split_states.is_caught_up; } + split_status::type parent_get_split_status() { return _parent_split_mgr->_split_status; } + void parent_set_split_status(split_status::type status) + { + _parent_split_mgr->_split_status = status; + } + primary_context get_replica_primary_context(mock_replica_ptr rep) { return rep->_primary_states; @@ -305,7 +313,12 @@ class replica_split_test : public replica_test_base { return _parent_replica->_primary_states.sync_send_write_request; } - bool is_parent_not_in_split() { return (_parent_split_mgr->_child_gpid.get_app_id() == 0); } + bool is_parent_not_in_split() + { + return _parent_split_mgr->_child_gpid.get_app_id() == 0 && + _parent_split_mgr->_child_init_ballot == 0 && + _parent_split_mgr->_split_status == split_status::NOT_SPLIT; + } public: const std::string APP_NAME = "split_table"; @@ -335,29 +348,40 @@ class replica_split_test : public replica_test_base learn_state _mock_learn_state; }; -// TODO(heyuchen): refactor add child unit tests -TEST_F(replica_split_test, add_child_wrong_ballot) -{ - ballot WRONG_BALLOT = 2; - test_on_add_child(WRONG_BALLOT, CHILD_GPID); - ASSERT_EQ(stub->get_replica(CHILD_GPID), nullptr); -} - -TEST_F(replica_split_test, add_child_with_child_existed) -{ - _parent_split_mgr->set_child_gpid(CHILD_GPID); - test_on_add_child(INIT_BALLOT, CHILD_GPID); - ASSERT_EQ(stub->get_replica(CHILD_GPID), nullptr); -} - -TEST_F(replica_split_test, add_child_succeed) +// parent_start_split tests +TEST_F(replica_split_test, parent_start_split_tests) { fail::cfg("replica_stub_create_child_replica_if_not_found", "return()"); fail::cfg("replica_child_init_replica", "return()"); - test_on_add_child(INIT_BALLOT, CHILD_GPID); - ASSERT_NE(stub->get_replica(CHILD_GPID), nullptr); - stub->get_replica(CHILD_GPID)->tracker()->wait_outstanding_tasks(); + ballot WRONG_BALLOT = 2; + + // Test cases: + // - wrong ballot + // - partition has already executing splitting + // - old add child request + // - start succeed + struct start_split_test + { + ballot req_ballot; + gpid req_child_gpid; + split_status::type local_split_status; + split_status::type expected_split_status; + bool start_split_succeed; + } tests[] = { + {WRONG_BALLOT, CHILD_GPID, split_status::NOT_SPLIT, split_status::NOT_SPLIT, false}, + {INIT_BALLOT, CHILD_GPID, split_status::SPLITTING, split_status::SPLITTING, false}, + {INIT_BALLOT, PARENT_GPID, split_status::NOT_SPLIT, split_status::NOT_SPLIT, false}, + {INIT_BALLOT, CHILD_GPID, split_status::NOT_SPLIT, split_status::SPLITTING, true}}; + for (auto test : tests) { + test_parent_start_split(test.req_ballot, test.req_child_gpid, test.local_split_status); + ASSERT_EQ(parent_get_split_status(), test.expected_split_status); + if (test.start_split_succeed) { + ASSERT_EQ(_parent_split_mgr->get_partition_version(), OLD_PARTITION_COUNT - 1); + stub->get_replica(CHILD_GPID)->tracker()->wait_outstanding_tasks(); + ASSERT_EQ(stub->get_replica(CHILD_GPID)->status(), partition_status::PS_INACTIVE); + } + } } // parent_check_states tests diff --git a/src/replication.thrift b/src/replication.thrift index 0834aa839d..a5ae545cae 100644 --- a/src/replication.thrift +++ b/src/replication.thrift @@ -158,6 +158,16 @@ struct learn_notify_response 3:i64 signature; // learning signature } +// partition split status +enum split_status +{ + NOT_SPLIT, + SPLITTING, + PAUSING, + PAUSED, + CANCELING +} + struct group_check_request { 1:dsn.layer2.app_info app;