From d66b24ee03f68e4d88a53cf7a87153ba984a992b Mon Sep 17 00:00:00 2001 From: wangdan Date: Wed, 10 Nov 2021 20:25:30 +0800 Subject: [PATCH 1/8] fix: restrict the replica count while creating app --- src/meta/server_state.cpp | 95 ++++++++++++++++++- src/meta/server_state.h | 5 + src/meta/test/meta_app_operation_test.cpp | 73 ++++++++++---- .../test/meta_duplication_service_test.cpp | 2 +- src/meta/test/meta_test_base.cpp | 60 ++++++++++++ src/meta/test/meta_test_base.h | 2 + 6 files changed, 219 insertions(+), 18 deletions(-) diff --git a/src/meta/server_state.cpp b/src/meta/server_state.cpp index de15d406e6..1932b6ad25 100644 --- a/src/meta/server_state.cpp +++ b/src/meta/server_state.cpp @@ -56,6 +56,24 @@ using namespace dsn; namespace dsn { namespace replication { +DSN_DEFINE_int32("meta_server", + max_allowed_replica_count, + 3, + "max allowed replica count for arbitrary number of nodes in a cluster"); + +DSN_DEFINE_validator(max_allowed_replica_count, [](int32_t allowed_replica_count) -> bool { + return allowed_replica_count > 0 && allowed_replica_count <= 3; +}); + +DSN_DEFINE_int32("meta_server", + min_allowed_replica_count, + 1, + "min allowed replica count for arbitrary number of nodes in a cluster"); + +DSN_DEFINE_validator(min_allowed_replica_count, [](int32_t allowed_replica_count) -> bool { + return allowed_replica_count > 0 && allowed_replica_count <= FLAGS_max_allowed_replica_count; +}); + static const char *lock_state = "lock"; static const char *unlock_state = "unlock"; @@ -1072,7 +1090,8 @@ void server_state::create_app(dsn::message_ex *msg) opt.replica_count == exist_app.max_replica_count; }; - if (request.options.partition_count <= 0 || request.options.replica_count <= 0) { + if (request.options.partition_count <= 0 || + !validate_target_max_replica_count(request.options.replica_count)) { response.err = ERR_INVALID_PARAMETERS; will_create_app = false; } else { @@ -2864,5 +2883,79 @@ void server_state::clear_app_envs(const app_env_rpc &env_rpc) new_envs.c_str()); }); } + +namespace { + +bool validate_target_max_replica_count_internal(int32_t max_replica_count, + int32_t alive_node_count, + std::string *hint_message) +{ + dassert_f(FLAGS_min_allowed_replica_count <= FLAGS_max_allowed_replica_count, + "min allowed replica count({}) should be <= max allowed replica count({})", + FLAGS_min_allowed_replica_count, + FLAGS_max_allowed_replica_count); + + if (alive_node_count < 1) { + *hint_message = "all replica servers are unalive"; + return false; + } + + if (max_replica_count > FLAGS_max_allowed_replica_count) { + *hint_message = fmt::format("requested replica count({}) exceeds the max " + "allowed replica count({})", + max_replica_count, + FLAGS_max_allowed_replica_count); + return false; + } + + if (max_replica_count < FLAGS_min_allowed_replica_count) { + *hint_message = fmt::format("requested replica count({}) is less than the min " + "allowed replica count({})", + max_replica_count, + FLAGS_min_allowed_replica_count); + return false; + } + + if (max_replica_count > alive_node_count) { + *hint_message = fmt::format("there are not enough alive replica servers({}) " + "for the requested replica count({})", + alive_node_count, + max_replica_count); + return false; + } + + return true; +} + +} // anonymous namespace + +bool server_state::validate_target_max_replica_count(int32_t max_replica_count) +{ + std::string hint_message; + + zauto_read_lock l(_lock); + + auto alive_node_count = get_alive_node_count_unlocked(); + + bool is_valid = validate_target_max_replica_count_internal( + max_replica_count, alive_node_count, &hint_message); + if (!is_valid) { + dwarn_f("target max replica count is invalid: message={}", hint_message); + } + + return is_valid; +} + +int32_t server_state::get_alive_node_count_unlocked() +{ + int32_t count = 0; + for (const auto &node : _nodes) { + if (node.second.alive()) { + ++count; + } + } + return count; +} + } // namespace replication } // namespace dsn diff --git a/src/meta/server_state.h b/src/meta/server_state.h index ac97167f3e..00b27dffbb 100644 --- a/src/meta/server_state.h +++ b/src/meta/server_state.h @@ -292,6 +292,11 @@ class server_state void process_one_partition(std::shared_ptr &app); void transition_staging_state(std::shared_ptr &app); + // check whether a max replica count is valid especially for a new app + bool validate_target_max_replica_count(int32_t max_replica_count); + + int32_t get_alive_node_count_unlocked(); + private: friend class bulk_load_service; friend class bulk_load_service_test; diff --git a/src/meta/test/meta_app_operation_test.cpp b/src/meta/test/meta_app_operation_test.cpp index 14e8aba81d..bd0e9e3f45 100644 --- a/src/meta/test/meta_app_operation_test.cpp +++ b/src/meta/test/meta_app_operation_test.cpp @@ -24,17 +24,24 @@ namespace dsn { namespace replication { + +DSN_DECLARE_int32(max_allowed_replica_count); + +DSN_DECLARE_int32(min_allowed_replica_count); + class meta_app_operation_test : public meta_test_base { public: meta_app_operation_test() {} - error_code - create_app_test(int32_t partition_count, int32_t replica_count, bool success_if_exist) + error_code create_app_test(int32_t partition_count, + int32_t replica_count, + bool success_if_exist, + const std::string &app_name) { configuration_create_app_request create_request; configuration_create_app_response create_response; - create_request.app_name = APP_NAME; + create_request.app_name = app_name; create_request.options.app_type = "simple_kv"; create_request.options.partition_count = partition_count; create_request.options.replica_count = replica_count; @@ -94,10 +101,10 @@ class meta_app_operation_test : public meta_test_base app->expire_second -= 604800; } + void clear_nodes() { _ss->_nodes.clear(); } + const std::string APP_NAME = "app_operation_test"; const std::string OLD_APP_NAME = "old_app_operation"; - const int32_t PARTITION_COUNT = 4; - const int32_t REPLICA_COUNT = 3; }; TEST_F(meta_app_operation_test, create_app) @@ -114,32 +121,66 @@ TEST_F(meta_app_operation_test, create_app) // - create succeed with success_if_exist=true struct create_test { + std::string app_name; int32_t partition_count; int32_t replica_count; + int32_t unalive_node_count; + int32_t max_allowed_replica_count; + int32_t min_allowed_replica_count; bool success_if_exist; app_status::type before_status; error_code expected_err; - } tests[] = { - {PARTITION_COUNT, REPLICA_COUNT, false, app_status::AS_INVALID, ERR_OK}, - {0, REPLICA_COUNT, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {PARTITION_COUNT, 0, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {PARTITION_COUNT, REPLICA_COUNT, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {PARTITION_COUNT, REPLICA_COUNT, false, app_status::AS_CREATING, ERR_BUSY_CREATING}, - {PARTITION_COUNT, REPLICA_COUNT, false, app_status::AS_RECALLING, ERR_BUSY_CREATING}, - {PARTITION_COUNT, REPLICA_COUNT, false, app_status::AS_DROPPING, ERR_BUSY_DROPPING}, - {PARTITION_COUNT, REPLICA_COUNT, false, app_status::AS_DROPPED, ERR_OK}, - {PARTITION_COUNT, REPLICA_COUNT, true, app_status::AS_INVALID, ERR_OK}}; + } tests[] = {{APP_NAME, -1, 3, 0, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 0, 3, 0, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, -1, 0, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 0, 0, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 5, 0, 5, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 1, 3, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 1, 2, 3, 3, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 1, 0, 3, 3, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME + "_1", 4, 1, 2, 3, 1, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME, 4, 2, 3, 3, 2, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 2, 1, 1, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 2, 1, 3, 3, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 2, 0, 3, 3, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME + "_2", 4, 2, 1, 3, 2, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME, 4, 3, 3, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 3, 1, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 3, 0, 2, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME + "_3", 4, 3, 0, 3, 3, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME, 4, 3, 0, 3, 1, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME, 4, 3, 0, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 3, 0, 3, 1, false, app_status::AS_CREATING, ERR_BUSY_CREATING}, + {APP_NAME, 4, 3, 0, 3, 1, false, app_status::AS_RECALLING, ERR_BUSY_CREATING}, + {APP_NAME, 4, 3, 0, 3, 1, false, app_status::AS_DROPPING, ERR_BUSY_DROPPING}, + {APP_NAME, 4, 3, 0, 3, 1, false, app_status::AS_DROPPED, ERR_OK}, + {APP_NAME, 4, 3, 0, 3, 1, true, app_status::AS_INVALID, ERR_OK}}; + + clear_nodes(); + std::vector nodes = ensure_enough_alive_nodes(3); for (auto test : tests) { + FLAGS_max_allowed_replica_count = test.max_allowed_replica_count; + FLAGS_min_allowed_replica_count = test.min_allowed_replica_count; + for (int32_t i = 0; i < test.unalive_node_count; i++) { + _ms->set_node_state({nodes[i]}, false); + } + if (test.before_status == app_status::AS_DROPPED) { update_app_status(app_status::AS_AVAILABLE); drop_app(APP_NAME); } else if (test.before_status != app_status::AS_INVALID) { update_app_status(test.before_status); } - auto err = create_app_test(test.partition_count, test.replica_count, test.success_if_exist); + auto err = create_app_test( + test.partition_count, test.replica_count, test.success_if_exist, test.app_name); ASSERT_EQ(err, test.expected_err); + + _ms->set_node_state(nodes, true); } + + FLAGS_max_allowed_replica_count = 3; + FLAGS_min_allowed_replica_count = 1; } TEST_F(meta_app_operation_test, drop_app) diff --git a/src/meta/test/meta_duplication_service_test.cpp b/src/meta/test/meta_duplication_service_test.cpp index cb3bd7a4a8..a7e3720ae7 100644 --- a/src/meta/test/meta_duplication_service_test.cpp +++ b/src/meta/test/meta_duplication_service_test.cpp @@ -507,7 +507,7 @@ TEST_F(meta_duplication_service_test, remove_dup) TEST_F(meta_duplication_service_test, duplication_sync) { - std::vector server_nodes = generate_node_list(3); + std::vector server_nodes = ensure_enough_alive_nodes(3); rpc_address node = server_nodes[0]; std::string test_app = "test_app_0"; diff --git a/src/meta/test/meta_test_base.cpp b/src/meta/test/meta_test_base.cpp index f4e627b17e..9d4fd18776 100644 --- a/src/meta/test/meta_test_base.cpp +++ b/src/meta/test/meta_test_base.cpp @@ -17,6 +17,8 @@ #include "meta_test_base.h" +#include + #include "meta/server_load_balancer.h" #include "meta/meta_server_failure_detector.h" #include "meta/meta_split_service.h" @@ -84,6 +86,62 @@ void meta_test_base::initialize_node_state() { _ss->initialize_node_state(); } void meta_test_base::wait_all() { _ms->tracker()->wait_outstanding_tasks(); } +std::vector meta_test_base::ensure_enough_alive_nodes(int min_node_count) +{ + std::vector nodes; + + if (min_node_count < 1) { + return nodes; + } + + { + zauto_read_lock l(_ss->_lock); + + for (const auto &node : _ss->_nodes) { + if (node.second.alive()) { + nodes.push_back(node.first); + } + } + + if (!nodes.empty()) { + auto node_count = static_cast(nodes.size()); + dassert_f(node_count >= min_node_count, + "there should be at least {} alive nodes, now we just have {} alive nodes", + min_node_count, + node_count); + } + } + + if (!nodes.empty()) { + dinfo_f("already exists {} alive nodes: ", nodes.size()); + for (const auto &node : nodes) { + dinfo_f(" {}", node.to_string()); + } + return nodes; + } + + nodes = generate_node_list(min_node_count); + _ms->set_node_state(nodes, true); + + while (true) { + { + zauto_read_lock l(_ss->_lock); + + if (_ss->get_alive_node_count_unlocked() >= min_node_count) { + break; + } + } + + std::this_thread::sleep_for(std::chrono::milliseconds(5)); + } + + dinfo_f("created {} alive nodes: ", nodes.size()); + for (const auto &node : nodes) { + dinfo_f(" {}", node.to_string()); + } + return nodes; +} + void meta_test_base::create_app(const std::string &name, uint32_t partition_count) { configuration_create_app_request req; @@ -96,6 +154,8 @@ void meta_test_base::create_app(const std::string &name, uint32_t partition_coun req.options.is_stateful = true; req.options.envs["value_version"] = "1"; + ensure_enough_alive_nodes(req.options.replica_count); + auto result = fake_create_app(_ss.get(), req); fake_wait_rpc(result, resp); ASSERT_EQ(resp.err, ERR_OK) << resp.err.to_string() << " " << name; diff --git a/src/meta/test/meta_test_base.h b/src/meta/test/meta_test_base.h index 1c19298820..9880ca9896 100644 --- a/src/meta/test/meta_test_base.h +++ b/src/meta/test/meta_test_base.h @@ -45,6 +45,8 @@ class meta_test_base : public testing::Test void wait_all(); + std::vector ensure_enough_alive_nodes(int min_node_count); + // create an app for test with specified name and specified partition count void create_app(const std::string &name, uint32_t partition_count); From b442b44227a94ee6b94966b693022545282fd91e Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Wed, 17 Nov 2021 16:21:44 +0800 Subject: [PATCH 2/8] fix: restrict the replica count while creating app --- src/meta/meta_service.cpp | 6 ++ src/meta/meta_service.h | 2 + src/meta/server_state.cpp | 78 ++++++++--------------- src/meta/server_state.h | 2 - src/meta/test/meta_app_operation_test.cpp | 63 +++++++++--------- src/meta/test/meta_test_base.cpp | 59 ++++++++++------- src/meta/test/meta_test_base.h | 7 ++ 7 files changed, 111 insertions(+), 106 deletions(-) diff --git a/src/meta/meta_service.cpp b/src/meta/meta_service.cpp index 33e41a033b..b848cd83da 100644 --- a/src/meta/meta_service.cpp +++ b/src/meta/meta_service.cpp @@ -1186,5 +1186,11 @@ void meta_service::on_query_backup_status(query_backup_status_rpc rpc) _backup_handler->query_backup_status(std::move(rpc)); } +size_t meta_service::get_alive_node_count() const +{ + zauto_lock l(_failure_detector->_lock); + return _alive_set.size(); +} + } // namespace replication } // namespace dsn diff --git a/src/meta/meta_service.h b/src/meta/meta_service.h index 0f2174d0db..df067972e0 100644 --- a/src/meta/meta_service.h +++ b/src/meta/meta_service.h @@ -134,6 +134,8 @@ class meta_service : public serverlet dsn::task_tracker *tracker() { return &_tracker; } + size_t get_alive_node_count() const; + private: void register_rpc_handlers(); void register_ctrl_commands(); diff --git a/src/meta/server_state.cpp b/src/meta/server_state.cpp index 1932b6ad25..2ba5c1aaf4 100644 --- a/src/meta/server_state.cpp +++ b/src/meta/server_state.cpp @@ -56,14 +56,7 @@ using namespace dsn; namespace dsn { namespace replication { -DSN_DEFINE_int32("meta_server", - max_allowed_replica_count, - 3, - "max allowed replica count for arbitrary number of nodes in a cluster"); - -DSN_DEFINE_validator(max_allowed_replica_count, [](int32_t allowed_replica_count) -> bool { - return allowed_replica_count > 0 && allowed_replica_count <= 3; -}); +const int32_t max_allowed_replica_count = 15; DSN_DEFINE_int32("meta_server", min_allowed_replica_count, @@ -71,7 +64,7 @@ DSN_DEFINE_int32("meta_server", "min allowed replica count for arbitrary number of nodes in a cluster"); DSN_DEFINE_validator(min_allowed_replica_count, [](int32_t allowed_replica_count) -> bool { - return allowed_replica_count > 0 && allowed_replica_count <= FLAGS_max_allowed_replica_count; + return allowed_replica_count > 0 && allowed_replica_count <= max_allowed_replica_count; }); static const char *lock_state = "lock"; @@ -1090,8 +1083,13 @@ void server_state::create_app(dsn::message_ex *msg) opt.replica_count == exist_app.max_replica_count; }; - if (request.options.partition_count <= 0 || - !validate_target_max_replica_count(request.options.replica_count)) { + auto level = _meta_svc->get_function_level(); + if (level <= meta_function_level::fl_freezed) { + dwarn_f("current meta function level is freezed since there are too few alive nodes"); + response.err = ERR_STATE_FREEZED; + will_create_app = false; + } else if (request.options.partition_count <= 0 || + !validate_target_max_replica_count(request.options.replica_count)) { response.err = ERR_INVALID_PARAMETERS; will_create_app = false; } else { @@ -2888,39 +2886,29 @@ namespace { bool validate_target_max_replica_count_internal(int32_t max_replica_count, int32_t alive_node_count, - std::string *hint_message) + std::string &hint_message) { - dassert_f(FLAGS_min_allowed_replica_count <= FLAGS_max_allowed_replica_count, - "min allowed replica count({}) should be <= max allowed replica count({})", - FLAGS_min_allowed_replica_count, - FLAGS_max_allowed_replica_count); - - if (alive_node_count < 1) { - *hint_message = "all replica servers are unalive"; - return false; - } - - if (max_replica_count > FLAGS_max_allowed_replica_count) { - *hint_message = fmt::format("requested replica count({}) exceeds the max " - "allowed replica count({})", - max_replica_count, - FLAGS_max_allowed_replica_count); + if (max_replica_count > max_allowed_replica_count) { + hint_message = fmt::format("requested replica count({}) exceeds the max " + "allowed replica count({})", + max_replica_count, + max_allowed_replica_count); return false; } if (max_replica_count < FLAGS_min_allowed_replica_count) { - *hint_message = fmt::format("requested replica count({}) is less than the min " - "allowed replica count({})", - max_replica_count, - FLAGS_min_allowed_replica_count); + hint_message = fmt::format("requested replica count({}) is less than the min " + "allowed replica count({})", + max_replica_count, + FLAGS_min_allowed_replica_count); return false; } if (max_replica_count > alive_node_count) { - *hint_message = fmt::format("there are not enough alive replica servers({}) " - "for the requested replica count({})", - alive_node_count, - max_replica_count); + hint_message = fmt::format("there are not enough alive replica servers({}) " + "for the requested replica count({})", + alive_node_count, + max_replica_count); return false; } @@ -2931,14 +2919,11 @@ bool validate_target_max_replica_count_internal(int32_t max_replica_count, bool server_state::validate_target_max_replica_count(int32_t max_replica_count) { - std::string hint_message; - - zauto_read_lock l(_lock); - - auto alive_node_count = get_alive_node_count_unlocked(); + auto alive_node_count = static_cast(_meta_svc->get_alive_node_count()); + std::string hint_message; bool is_valid = validate_target_max_replica_count_internal( - max_replica_count, alive_node_count, &hint_message); + max_replica_count, alive_node_count, hint_message); if (!is_valid) { dwarn_f("target max replica count is invalid: message={}", hint_message); } @@ -2946,16 +2931,5 @@ bool server_state::validate_target_max_replica_count(int32_t max_replica_count) return is_valid; } -int32_t server_state::get_alive_node_count_unlocked() -{ - int32_t count = 0; - for (const auto &node : _nodes) { - if (node.second.alive()) { - ++count; - } - } - return count; -} - } // namespace replication } // namespace dsn diff --git a/src/meta/server_state.h b/src/meta/server_state.h index 00b27dffbb..17a7533ee1 100644 --- a/src/meta/server_state.h +++ b/src/meta/server_state.h @@ -295,8 +295,6 @@ class server_state // check whether a max replica count is valid especially for a new app bool validate_target_max_replica_count(int32_t max_replica_count); - int32_t get_alive_node_count_unlocked(); - private: friend class bulk_load_service; friend class bulk_load_service_test; diff --git a/src/meta/test/meta_app_operation_test.cpp b/src/meta/test/meta_app_operation_test.cpp index bd0e9e3f45..c1365b8b70 100644 --- a/src/meta/test/meta_app_operation_test.cpp +++ b/src/meta/test/meta_app_operation_test.cpp @@ -25,8 +25,6 @@ namespace dsn { namespace replication { -DSN_DECLARE_int32(max_allowed_replica_count); - DSN_DECLARE_int32(min_allowed_replica_count); class meta_app_operation_test : public meta_test_base @@ -124,44 +122,50 @@ TEST_F(meta_app_operation_test, create_app) std::string app_name; int32_t partition_count; int32_t replica_count; + uint64_t min_live_node_count_for_unfreeze; int32_t unalive_node_count; - int32_t max_allowed_replica_count; int32_t min_allowed_replica_count; bool success_if_exist; app_status::type before_status; error_code expected_err; - } tests[] = {{APP_NAME, -1, 3, 0, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 0, 3, 0, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, -1, 0, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 0, 0, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 5, 0, 5, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 1, 3, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 1, 2, 3, 3, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 1, 0, 3, 3, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME + "_1", 4, 1, 2, 3, 1, false, app_status::AS_INVALID, ERR_OK}, - {APP_NAME, 4, 2, 3, 3, 2, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 2, 1, 1, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 2, 1, 3, 3, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 2, 0, 3, 3, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME + "_2", 4, 2, 1, 3, 2, false, app_status::AS_INVALID, ERR_OK}, - {APP_NAME, 4, 3, 3, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 3, 1, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 3, 0, 2, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME + "_3", 4, 3, 0, 3, 3, false, app_status::AS_INVALID, ERR_OK}, - {APP_NAME, 4, 3, 0, 3, 1, false, app_status::AS_INVALID, ERR_OK}, - {APP_NAME, 4, 3, 0, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 3, 0, 3, 1, false, app_status::AS_CREATING, ERR_BUSY_CREATING}, - {APP_NAME, 4, 3, 0, 3, 1, false, app_status::AS_RECALLING, ERR_BUSY_CREATING}, - {APP_NAME, 4, 3, 0, 3, 1, false, app_status::AS_DROPPING, ERR_BUSY_DROPPING}, - {APP_NAME, 4, 3, 0, 3, 1, false, app_status::AS_DROPPED, ERR_OK}, - {APP_NAME, 4, 3, 0, 3, 1, true, app_status::AS_INVALID, ERR_OK}}; + } tests[] = {{APP_NAME, -1, 3, 2, 0, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 0, 3, 2, 0, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, -1, 1, 0, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 0, 1, 0, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 16, 2, 0, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 15, 2, 0, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 1, 1, 3, 1, false, app_status::AS_INVALID, ERR_STATE_FREEZED}, + {APP_NAME, 4, 1, 1, 2, 3, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME + "_1", 4, 1, 1, 2, 1, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME, 4, 2, 1, 3, 2, false, app_status::AS_INVALID, ERR_STATE_FREEZED}, + {APP_NAME, 4, 2, 1, 2, 2, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 2, 1, 1, 3, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME + "_2", 4, 2, 1, 1, 2, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME, 4, 3, 2, 3, 1, false, app_status::AS_INVALID, ERR_STATE_FREEZED}, + {APP_NAME, 4, 3, 2, 2, 1, false, app_status::AS_INVALID, ERR_STATE_FREEZED}, + {APP_NAME, 4, 3, 2, 1, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 3, 2, 0, 5, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME + "_3", 4, 3, 2, 0, 3, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME, 4, 3, 2, 0, 1, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME, 4, 3, 2, 0, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 3, 2, 0, 1, false, app_status::AS_CREATING, ERR_BUSY_CREATING}, + {APP_NAME, 4, 3, 2, 0, 1, false, app_status::AS_RECALLING, ERR_BUSY_CREATING}, + {APP_NAME, 4, 3, 2, 0, 1, false, app_status::AS_DROPPING, ERR_BUSY_DROPPING}, + {APP_NAME, 4, 3, 2, 0, 1, false, app_status::AS_DROPPED, ERR_OK}, + {APP_NAME, 4, 3, 2, 0, 1, true, app_status::AS_INVALID, ERR_OK}}; clear_nodes(); std::vector nodes = ensure_enough_alive_nodes(3); + // the meta function level will become freezed once + // alive_nodes * 100 < total_nodes * node_live_percentage_threshold_for_update + // even if alive_nodes >= min_live_node_count_for_unfreeze + set_node_live_percentage_threshold_for_update(30); + for (auto test : tests) { - FLAGS_max_allowed_replica_count = test.max_allowed_replica_count; FLAGS_min_allowed_replica_count = test.min_allowed_replica_count; + set_min_live_node_count_for_unfreeze(test.min_live_node_count_for_unfreeze); + for (int32_t i = 0; i < test.unalive_node_count; i++) { _ms->set_node_state({nodes[i]}, false); } @@ -179,7 +183,6 @@ TEST_F(meta_app_operation_test, create_app) _ms->set_node_state(nodes, true); } - FLAGS_max_allowed_replica_count = 3; FLAGS_min_allowed_replica_count = 1; } diff --git a/src/meta/test/meta_test_base.cpp b/src/meta/test/meta_test_base.cpp index 9d4fd18776..87ade28f33 100644 --- a/src/meta/test/meta_test_base.cpp +++ b/src/meta/test/meta_test_base.cpp @@ -86,37 +86,52 @@ void meta_test_base::initialize_node_state() { _ss->initialize_node_state(); } void meta_test_base::wait_all() { _ms->tracker()->wait_outstanding_tasks(); } -std::vector meta_test_base::ensure_enough_alive_nodes(int min_node_count) +void meta_test_base::set_min_live_node_count_for_unfreeze(uint64_t node_count) { - std::vector nodes; + _ms->_meta_opts.min_live_node_count_for_unfreeze = node_count; +} - if (min_node_count < 1) { - return nodes; - } +void meta_test_base::set_node_live_percentage_threshold_for_update(uint64_t percentage_threshold) +{ + _ms->_node_live_percentage_threshold_for_update = percentage_threshold; +} - { - zauto_read_lock l(_ss->_lock); +std::vector meta_test_base::get_alive_nodes() const +{ + std::vector nodes; - for (const auto &node : _ss->_nodes) { - if (node.second.alive()) { - nodes.push_back(node.first); - } - } + zauto_read_lock l(_ss->_lock); - if (!nodes.empty()) { - auto node_count = static_cast(nodes.size()); - dassert_f(node_count >= min_node_count, - "there should be at least {} alive nodes, now we just have {} alive nodes", - min_node_count, - node_count); + for (const auto &node : _ss->_nodes) { + if (node.second.alive()) { + nodes.push_back(node.first); } } + return nodes; +} + +std::vector meta_test_base::ensure_enough_alive_nodes(int min_node_count) +{ + if (min_node_count < 1) { + return std::vector(); + } + + std::vector nodes(get_alive_nodes()); if (!nodes.empty()) { + auto node_count = static_cast(nodes.size()); + dassert_f(node_count >= min_node_count, + "there should be at least {} alive nodes, now we just have {} alive nodes", + min_node_count, + node_count); + dinfo_f("already exists {} alive nodes: ", nodes.size()); for (const auto &node : nodes) { dinfo_f(" {}", node.to_string()); } + + // ensure that _ms->_alive_set is identical with _ss->_nodes + _ms->set_node_state(nodes, true); return nodes; } @@ -125,9 +140,8 @@ std::vector meta_test_base::ensure_enough_alive_nodes(int min_node_ while (true) { { - zauto_read_lock l(_ss->_lock); - - if (_ss->get_alive_node_count_unlocked() >= min_node_count) { + std::vector alive_nodes(get_alive_nodes()); + if (static_cast(alive_nodes.size()) >= min_node_count) { break; } } @@ -154,7 +168,8 @@ void meta_test_base::create_app(const std::string &name, uint32_t partition_coun req.options.is_stateful = true; req.options.envs["value_version"] = "1"; - ensure_enough_alive_nodes(req.options.replica_count); + set_min_live_node_count_for_unfreeze(2); + ensure_enough_alive_nodes(3); auto result = fake_create_app(_ss.get(), req); fake_wait_rpc(result, resp); diff --git a/src/meta/test/meta_test_base.h b/src/meta/test/meta_test_base.h index 9880ca9896..c84d064ce2 100644 --- a/src/meta/test/meta_test_base.h +++ b/src/meta/test/meta_test_base.h @@ -45,6 +45,10 @@ class meta_test_base : public testing::Test void wait_all(); + void set_min_live_node_count_for_unfreeze(uint64_t node_count); + + void set_node_live_percentage_threshold_for_update(uint64_t percentage_threshold); + std::vector ensure_enough_alive_nodes(int min_node_count); // create an app for test with specified name and specified partition count @@ -72,6 +76,9 @@ class meta_test_base : public testing::Test std::shared_ptr _ss; std::unique_ptr _ms; std::string _app_root; + +private: + std::vector get_alive_nodes() const; }; } // namespace replication From 71664b86832394ec3c2f0258f056f3636b6aa2af Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Mon, 22 Nov 2021 19:02:42 +0800 Subject: [PATCH 3/8] fix: restrict the replica count while creating app --- src/meta/test/meta_app_operation_test.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/meta/test/meta_app_operation_test.cpp b/src/meta/test/meta_app_operation_test.cpp index c1365b8b70..25795984bc 100644 --- a/src/meta/test/meta_app_operation_test.cpp +++ b/src/meta/test/meta_app_operation_test.cpp @@ -162,6 +162,8 @@ TEST_F(meta_app_operation_test, create_app) // even if alive_nodes >= min_live_node_count_for_unfreeze set_node_live_percentage_threshold_for_update(30); + auto reserved_min_allowed_replica_count = FLAGS_min_allowed_replica_count; + for (auto test : tests) { FLAGS_min_allowed_replica_count = test.min_allowed_replica_count; set_min_live_node_count_for_unfreeze(test.min_live_node_count_for_unfreeze); @@ -183,7 +185,7 @@ TEST_F(meta_app_operation_test, create_app) _ms->set_node_state(nodes, true); } - FLAGS_min_allowed_replica_count = 1; + FLAGS_min_allowed_replica_count = reserved_min_allowed_replica_count; } TEST_F(meta_app_operation_test, drop_app) From e4113efa721a4f95a4f1eb5a3ec1fbd0b3463b77 Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Fri, 26 Nov 2021 01:21:10 +0800 Subject: [PATCH 4/8] fix: restrict the replica count while creating app --- src/meta/server_state.cpp | 4 +- src/meta/test/meta_app_operation_test.cpp | 111 +++++++++++++++------- 2 files changed, 81 insertions(+), 34 deletions(-) diff --git a/src/meta/server_state.cpp b/src/meta/server_state.cpp index 2ba5c1aaf4..b6178264a7 100644 --- a/src/meta/server_state.cpp +++ b/src/meta/server_state.cpp @@ -1085,7 +1085,7 @@ void server_state::create_app(dsn::message_ex *msg) auto level = _meta_svc->get_function_level(); if (level <= meta_function_level::fl_freezed) { - dwarn_f("current meta function level is freezed since there are too few alive nodes"); + derror_f("current meta function level is freezed since there are too few alive nodes"); response.err = ERR_STATE_FREEZED; will_create_app = false; } else if (request.options.partition_count <= 0 || @@ -2925,7 +2925,7 @@ bool server_state::validate_target_max_replica_count(int32_t max_replica_count) bool is_valid = validate_target_max_replica_count_internal( max_replica_count, alive_node_count, hint_message); if (!is_valid) { - dwarn_f("target max replica count is invalid: message={}", hint_message); + derror_f("target max replica count is invalid: message={}", hint_message); } return is_valid; diff --git a/src/meta/test/meta_app_operation_test.cpp b/src/meta/test/meta_app_operation_test.cpp index 25795984bc..d3e0d2fa06 100644 --- a/src/meta/test/meta_app_operation_test.cpp +++ b/src/meta/test/meta_app_operation_test.cpp @@ -16,6 +16,7 @@ // under the License. #include +#include #include #include "meta_service_test_app.h" @@ -107,10 +108,38 @@ class meta_app_operation_test : public meta_test_base TEST_F(meta_app_operation_test, create_app) { - // Test cases: + // Test cases: (assert min_allowed_replica_count <= max_allowed_replica_count) + // - wrong partition_count (< 0) + // - wrong partition_count (= 0) + // - wrong replica_count (< 0) + // - wrong replica_count (= 0) + // - wrong replica_count (> max_allowed_replica_count > alive_node_count) + // - wrong replica_count (> alive_node_count > max_allowed_replica_count) + // - wrong replica_count (> alive_node_count = max_allowed_replica_count) + // - wrong replica_count (= max_allowed_replica_count, and > alive_node_count) + // - wrong replica_count (< max_allowed_replica_count, and > alive_node_count) + // - wrong replica_count (= alive_node_count, and > max_allowed_replica_count) + // - wrong replica_count (< alive_node_count, and > max_allowed_replica_count) + // - valid replica_count (= max_allowed_replica_count, and = alive_node_count) + // - valid replica_count (= max_allowed_replica_count, and < alive_node_count) + // - valid replica_count (< max_allowed_replica_count, and = alive_node_count) + // - valid replica_count (< max_allowed_replica_count < alive_node_count) + // - valid replica_count (< alive_node_count < max_allowed_replica_count) + // - valid replica_count (< alive_node_count = max_allowed_replica_count) + // - wrong replica_count (< min_allowed_replica_count < alive_node_count) + // - wrong replica_count (< alive_node_count < min_allowed_replica_count) + // - wrong replica_count (< min_allowed_replica_count = alive_node_count) + // - wrong replica_count (< min_allowed_replica_count, and > alive_node_count) + // - wrong replica_count (< min_allowed_replica_count, and = alive_node_count) + // - wrong replica_count (= min_allowed_replica_count, and > alive_node_count) + // - valid replica_count (= min_allowed_replica_count, and < alive_node_count) + // - cluster freezed (alive_node_count = 0) + // - cluster freezed (alive_node_count = 1 < min_live_node_count_for_unfreeze) + // - cluster freezed (alive_node_count = 2 < min_live_node_count_for_unfreeze) + // - cluster not freezed (alive_node_count = min_live_node_count_for_unfreeze) + // - create succeed with single-replica + // - create succeed with double-replica // - create app succeed - // - wrong partition_count - // - wrong replica_count // - create failed with table existed // - wrong app_status creating // - wrong app_status recalling @@ -123,44 +152,58 @@ TEST_F(meta_app_operation_test, create_app) int32_t partition_count; int32_t replica_count; uint64_t min_live_node_count_for_unfreeze; - int32_t unalive_node_count; + int alive_node_count; int32_t min_allowed_replica_count; bool success_if_exist; app_status::type before_status; error_code expected_err; - } tests[] = {{APP_NAME, -1, 3, 2, 0, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 0, 3, 2, 0, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, -1, 1, 0, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 0, 1, 0, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 16, 2, 0, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 15, 2, 0, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 1, 1, 3, 1, false, app_status::AS_INVALID, ERR_STATE_FREEZED}, - {APP_NAME, 4, 1, 1, 2, 3, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME + "_1", 4, 1, 1, 2, 1, false, app_status::AS_INVALID, ERR_OK}, - {APP_NAME, 4, 2, 1, 3, 2, false, app_status::AS_INVALID, ERR_STATE_FREEZED}, - {APP_NAME, 4, 2, 1, 2, 2, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 2, 1, 1, 3, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME + "_2", 4, 2, 1, 1, 2, false, app_status::AS_INVALID, ERR_OK}, - {APP_NAME, 4, 3, 2, 3, 1, false, app_status::AS_INVALID, ERR_STATE_FREEZED}, - {APP_NAME, 4, 3, 2, 2, 1, false, app_status::AS_INVALID, ERR_STATE_FREEZED}, - {APP_NAME, 4, 3, 2, 1, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 3, 2, 0, 5, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME + "_3", 4, 3, 2, 0, 3, false, app_status::AS_INVALID, ERR_OK}, - {APP_NAME, 4, 3, 2, 0, 1, false, app_status::AS_INVALID, ERR_OK}, - {APP_NAME, 4, 3, 2, 0, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 3, 2, 0, 1, false, app_status::AS_CREATING, ERR_BUSY_CREATING}, - {APP_NAME, 4, 3, 2, 0, 1, false, app_status::AS_RECALLING, ERR_BUSY_CREATING}, - {APP_NAME, 4, 3, 2, 0, 1, false, app_status::AS_DROPPING, ERR_BUSY_DROPPING}, - {APP_NAME, 4, 3, 2, 0, 1, false, app_status::AS_DROPPED, ERR_OK}, - {APP_NAME, 4, 3, 2, 0, 1, true, app_status::AS_INVALID, ERR_OK}}; + } tests[] = {{APP_NAME, -1, 3, 2, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 0, 3, 2, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, -1, 1, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 0, 1, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 16, 2, 14, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 17, 2, 16, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 16, 2, 15, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 15, 2, 14, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 14, 2, 13, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 16, 2, 16, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 16, 2, 17, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME + "_1", 4, 15, 2, 15, 1, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME + "_2", 4, 15, 2, 16, 1, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME + "_3", 4, 14, 2, 14, 1, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME + "_4", 4, 14, 2, 16, 1, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME + "_5", 4, 13, 2, 14, 1, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME + "_6", 4, 14, 2, 15, 1, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME, 4, 3, 2, 5, 4, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 3, 2, 4, 5, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 3, 2, 4, 4, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 3, 2, 2, 4, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 3, 2, 3, 4, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 4, 2, 3, 4, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME + "_7", 4, 3, 2, 4, 3, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME, 4, 1, 1, 0, 1, false, app_status::AS_INVALID, ERR_STATE_FREEZED}, + {APP_NAME, 4, 2, 2, 1, 1, false, app_status::AS_INVALID, ERR_STATE_FREEZED}, + {APP_NAME, 4, 3, 3, 2, 1, false, app_status::AS_INVALID, ERR_STATE_FREEZED}, + {APP_NAME + "_8", 4, 3, 3, 3, 1, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME + "_9", 4, 1, 1, 1, 1, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME + "_10", 4, 2, 1, 2, 2, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME, 4, 3, 2, 3, 3, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME, 4, 3, 2, 3, 3, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 3, 2, 3, 3, false, app_status::AS_CREATING, ERR_BUSY_CREATING}, + {APP_NAME, 4, 3, 2, 3, 3, false, app_status::AS_RECALLING, ERR_BUSY_CREATING}, + {APP_NAME, 4, 3, 2, 3, 3, false, app_status::AS_DROPPING, ERR_BUSY_DROPPING}, + {APP_NAME, 4, 3, 2, 3, 3, false, app_status::AS_DROPPED, ERR_OK}, + {APP_NAME, 4, 3, 2, 3, 3, true, app_status::AS_INVALID, ERR_OK}}; clear_nodes(); - std::vector nodes = ensure_enough_alive_nodes(3); + + const int total_node_count = 20; + std::vector nodes = ensure_enough_alive_nodes(total_node_count); // the meta function level will become freezed once // alive_nodes * 100 < total_nodes * node_live_percentage_threshold_for_update // even if alive_nodes >= min_live_node_count_for_unfreeze - set_node_live_percentage_threshold_for_update(30); + set_node_live_percentage_threshold_for_update(0); auto reserved_min_allowed_replica_count = FLAGS_min_allowed_replica_count; @@ -168,7 +211,11 @@ TEST_F(meta_app_operation_test, create_app) FLAGS_min_allowed_replica_count = test.min_allowed_replica_count; set_min_live_node_count_for_unfreeze(test.min_live_node_count_for_unfreeze); - for (int32_t i = 0; i < test.unalive_node_count; i++) { + dassert_f(total_node_count >= test.alive_node_count, + "total_node_count({}) should be >= alive_node_count({})", + total_node_count, + test.alive_node_count); + for (int i = 0; i < total_node_count - test.alive_node_count; i++) { _ms->set_node_state({nodes[i]}, false); } From bc8283d0ae0c62c80c567178a7b496c6f98add71 Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Sat, 27 Nov 2021 01:48:49 +0800 Subject: [PATCH 5/8] feat: restrict the replica count while creating app --- src/meta/server_state.cpp | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/src/meta/server_state.cpp b/src/meta/server_state.cpp index b6178264a7..8f1118b77d 100644 --- a/src/meta/server_state.cpp +++ b/src/meta/server_state.cpp @@ -56,7 +56,7 @@ using namespace dsn; namespace dsn { namespace replication { -const int32_t max_allowed_replica_count = 15; +const int32_t kMaxAllowedReplicaCount = 15; DSN_DEFINE_int32("meta_server", min_allowed_replica_count, @@ -64,7 +64,7 @@ DSN_DEFINE_int32("meta_server", "min allowed replica count for arbitrary number of nodes in a cluster"); DSN_DEFINE_validator(min_allowed_replica_count, [](int32_t allowed_replica_count) -> bool { - return allowed_replica_count > 0 && allowed_replica_count <= max_allowed_replica_count; + return allowed_replica_count > 0 && allowed_replica_count <= kMaxAllowedReplicaCount; }); static const char *lock_state = "lock"; @@ -2888,19 +2888,13 @@ bool validate_target_max_replica_count_internal(int32_t max_replica_count, int32_t alive_node_count, std::string &hint_message) { - if (max_replica_count > max_allowed_replica_count) { - hint_message = fmt::format("requested replica count({}) exceeds the max " - "allowed replica count({})", + if (max_replica_count > kMaxAllowedReplicaCount || + max_replica_count < FLAGS_min_allowed_replica_count) { + hint_message = fmt::format("requested replica count({}) must be " + "within the range of [min={}, max={}]", max_replica_count, - max_allowed_replica_count); - return false; - } - - if (max_replica_count < FLAGS_min_allowed_replica_count) { - hint_message = fmt::format("requested replica count({}) is less than the min " - "allowed replica count({})", - max_replica_count, - FLAGS_min_allowed_replica_count); + FLAGS_min_allowed_replica_count, + kMaxAllowedReplicaCount); return false; } From 3f0f42344ea35920569dce27f7c7cc2b883eee98 Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Tue, 14 Dec 2021 17:43:05 +0800 Subject: [PATCH 6/8] feat: restrict the replica count while creating app --- src/meta/server_state.cpp | 32 +++++++-- src/meta/test/meta_app_operation_test.cpp | 88 ++++++++++++++++++----- 2 files changed, 99 insertions(+), 21 deletions(-) diff --git a/src/meta/server_state.cpp b/src/meta/server_state.cpp index 8f1118b77d..7e138b026d 100644 --- a/src/meta/server_state.cpp +++ b/src/meta/server_state.cpp @@ -56,15 +56,37 @@ using namespace dsn; namespace dsn { namespace replication { -const int32_t kMaxAllowedReplicaCount = 15; +DSN_DEFINE_int32("meta_server", + max_allowed_replica_count, + 5, + "max replica count allowed for any app of a cluster"); + +DSN_TAG_VARIABLE(max_allowed_replica_count, FT_MUTABLE); + +DSN_DEFINE_validator(max_allowed_replica_count, [](int32_t allowed_replica_count) -> bool { + return allowed_replica_count > 0; +}); DSN_DEFINE_int32("meta_server", min_allowed_replica_count, 1, - "min allowed replica count for arbitrary number of nodes in a cluster"); + "min replica count allowed for any app of a cluster"); + +DSN_TAG_VARIABLE(min_allowed_replica_count, FT_MUTABLE); DSN_DEFINE_validator(min_allowed_replica_count, [](int32_t allowed_replica_count) -> bool { - return allowed_replica_count > 0 && allowed_replica_count <= kMaxAllowedReplicaCount; + return allowed_replica_count > 0; +}); + +DSN_DEFINE_group_validator(min_max_allowed_replica_count, [](std::string &message) -> bool { + if (FLAGS_min_allowed_replica_count > FLAGS_max_allowed_replica_count) { + message = fmt::format( + "min_max_allowed_replica_count({}) should be <= FLAGS_max_allowed_replica_count({})", + FLAGS_min_allowed_replica_count, + FLAGS_max_allowed_replica_count); + return false; + } + return true; }); static const char *lock_state = "lock"; @@ -2888,13 +2910,13 @@ bool validate_target_max_replica_count_internal(int32_t max_replica_count, int32_t alive_node_count, std::string &hint_message) { - if (max_replica_count > kMaxAllowedReplicaCount || + if (max_replica_count > FLAGS_max_allowed_replica_count || max_replica_count < FLAGS_min_allowed_replica_count) { hint_message = fmt::format("requested replica count({}) must be " "within the range of [min={}, max={}]", max_replica_count, FLAGS_min_allowed_replica_count, - kMaxAllowedReplicaCount); + FLAGS_max_allowed_replica_count); return false; } diff --git a/src/meta/test/meta_app_operation_test.cpp b/src/meta/test/meta_app_operation_test.cpp index d3e0d2fa06..7623958065 100644 --- a/src/meta/test/meta_app_operation_test.cpp +++ b/src/meta/test/meta_app_operation_test.cpp @@ -28,6 +28,8 @@ namespace replication { DSN_DECLARE_int32(min_allowed_replica_count); +DSN_DECLARE_int32(max_allowed_replica_count); + class meta_app_operation_test : public meta_test_base { public: @@ -161,19 +163,19 @@ TEST_F(meta_app_operation_test, create_app) {APP_NAME, 0, 3, 2, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, {APP_NAME, 4, -1, 1, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, {APP_NAME, 4, 0, 1, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 16, 2, 14, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 17, 2, 16, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 16, 2, 15, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 15, 2, 14, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 14, 2, 13, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 16, 2, 16, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME, 4, 16, 2, 17, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, - {APP_NAME + "_1", 4, 15, 2, 15, 1, false, app_status::AS_INVALID, ERR_OK}, - {APP_NAME + "_2", 4, 15, 2, 16, 1, false, app_status::AS_INVALID, ERR_OK}, - {APP_NAME + "_3", 4, 14, 2, 14, 1, false, app_status::AS_INVALID, ERR_OK}, - {APP_NAME + "_4", 4, 14, 2, 16, 1, false, app_status::AS_INVALID, ERR_OK}, - {APP_NAME + "_5", 4, 13, 2, 14, 1, false, app_status::AS_INVALID, ERR_OK}, - {APP_NAME + "_6", 4, 14, 2, 15, 1, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME, 4, 6, 2, 4, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 7, 2, 6, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 6, 2, 5, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 5, 2, 4, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 4, 2, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 6, 2, 6, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME, 4, 6, 2, 7, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, + {APP_NAME + "_1", 4, 5, 2, 5, 1, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME + "_2", 4, 5, 2, 6, 1, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME + "_3", 4, 4, 2, 4, 1, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME + "_4", 4, 4, 2, 6, 1, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME + "_5", 4, 3, 2, 4, 1, false, app_status::AS_INVALID, ERR_OK}, + {APP_NAME + "_6", 4, 4, 2, 5, 1, false, app_status::AS_INVALID, ERR_OK}, {APP_NAME, 4, 3, 2, 5, 4, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, {APP_NAME, 4, 3, 2, 4, 5, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, {APP_NAME, 4, 3, 2, 4, 4, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS}, @@ -197,7 +199,8 @@ TEST_F(meta_app_operation_test, create_app) clear_nodes(); - const int total_node_count = 20; + // keep the number of all nodes greater than that of alive nodes + const int total_node_count = 10; std::vector nodes = ensure_enough_alive_nodes(total_node_count); // the meta function level will become freezed once @@ -205,10 +208,21 @@ TEST_F(meta_app_operation_test, create_app) // even if alive_nodes >= min_live_node_count_for_unfreeze set_node_live_percentage_threshold_for_update(0); + // save original FLAGS_max_allowed_replica_count + auto reserved_max_allowed_replica_count = FLAGS_max_allowed_replica_count; + + // keep FLAGS_max_allowed_replica_count fixed in the tests + auto res = update_flag("max_allowed_replica_count", "5"); + ASSERT_TRUE(res.is_ok()); + + // save original FLAGS_min_allowed_replica_count auto reserved_min_allowed_replica_count = FLAGS_min_allowed_replica_count; for (auto test : tests) { - FLAGS_min_allowed_replica_count = test.min_allowed_replica_count; + res = update_flag("min_allowed_replica_count", + std::to_string(test.min_allowed_replica_count)); + ASSERT_TRUE(res.is_ok()); + set_min_live_node_count_for_unfreeze(test.min_live_node_count_for_unfreeze); dassert_f(total_node_count >= test.alive_node_count, @@ -232,7 +246,49 @@ TEST_F(meta_app_operation_test, create_app) _ms->set_node_state(nodes, true); } - FLAGS_min_allowed_replica_count = reserved_min_allowed_replica_count; + // set FLAGS_min_allowed_replica_count successfully + res = update_flag("min_allowed_replica_count", "2"); + ASSERT_TRUE(res.is_ok()); + ASSERT_EQ(FLAGS_min_allowed_replica_count, 2); + + // set FLAGS_max_allowed_replica_count successfully + res = update_flag("max_allowed_replica_count", "6"); + ASSERT_TRUE(res.is_ok()); + ASSERT_EQ(FLAGS_max_allowed_replica_count, 6); + + // failed to set FLAGS_min_allowed_replica_count due to individual validation + res = update_flag("min_allowed_replica_count", "0"); + ASSERT_EQ(res.code(), ERR_INVALID_PARAMETERS); + ASSERT_EQ(FLAGS_min_allowed_replica_count, 2); + std::cout << res.description() << std::endl; + + // failed to set FLAGS_max_allowed_replica_count due to individual validation + res = update_flag("max_allowed_replica_count", "0"); + ASSERT_EQ(res.code(), ERR_INVALID_PARAMETERS); + ASSERT_EQ(FLAGS_max_allowed_replica_count, 6); + std::cout << res.description() << std::endl; + + // failed to set FLAGS_min_allowed_replica_count due to grouped validation + res = update_flag("min_allowed_replica_count", "7"); + ASSERT_EQ(res.code(), ERR_INVALID_PARAMETERS); + ASSERT_EQ(FLAGS_min_allowed_replica_count, 2); + std::cout << res.description() << std::endl; + + // failed to set FLAGS_max_allowed_replica_count due to grouped validation + res = update_flag("max_allowed_replica_count", "1"); + ASSERT_EQ(res.code(), ERR_INVALID_PARAMETERS); + ASSERT_EQ(FLAGS_max_allowed_replica_count, 6); + std::cout << res.description() << std::endl; + + // recover original FLAGS_min_allowed_replica_count + res = update_flag("min_allowed_replica_count", + std::to_string(reserved_min_allowed_replica_count)); + ASSERT_TRUE(res.is_ok()); + + // recover original FLAGS_max_allowed_replica_count + res = update_flag("max_allowed_replica_count", + std::to_string(reserved_max_allowed_replica_count)); + ASSERT_TRUE(res.is_ok()); } TEST_F(meta_app_operation_test, drop_app) From f682163121954a71bbadb525cc9670be6e51747e Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Tue, 14 Dec 2021 18:18:04 +0800 Subject: [PATCH 7/8] feat: restrict the replica count while creating app --- src/meta/server_state.cpp | 8 ++++---- src/meta/test/meta_app_operation_test.cpp | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/meta/server_state.cpp b/src/meta/server_state.cpp index 7e138b026d..795c89da34 100644 --- a/src/meta/server_state.cpp +++ b/src/meta/server_state.cpp @@ -81,7 +81,7 @@ DSN_DEFINE_validator(min_allowed_replica_count, [](int32_t allowed_replica_count DSN_DEFINE_group_validator(min_max_allowed_replica_count, [](std::string &message) -> bool { if (FLAGS_min_allowed_replica_count > FLAGS_max_allowed_replica_count) { message = fmt::format( - "min_max_allowed_replica_count({}) should be <= FLAGS_max_allowed_replica_count({})", + "FLAGS_min_allowed_replica_count({}) should be <= FLAGS_max_allowed_replica_count({})", FLAGS_min_allowed_replica_count, FLAGS_max_allowed_replica_count); return false; @@ -2938,13 +2938,13 @@ bool server_state::validate_target_max_replica_count(int32_t max_replica_count) auto alive_node_count = static_cast(_meta_svc->get_alive_node_count()); std::string hint_message; - bool is_valid = validate_target_max_replica_count_internal( + bool valid = validate_target_max_replica_count_internal( max_replica_count, alive_node_count, hint_message); - if (!is_valid) { + if (!valid) { derror_f("target max replica count is invalid: message={}", hint_message); } - return is_valid; + return valid; } } // namespace replication diff --git a/src/meta/test/meta_app_operation_test.cpp b/src/meta/test/meta_app_operation_test.cpp index 7623958065..764e5d4598 100644 --- a/src/meta/test/meta_app_operation_test.cpp +++ b/src/meta/test/meta_app_operation_test.cpp @@ -284,11 +284,13 @@ TEST_F(meta_app_operation_test, create_app) res = update_flag("min_allowed_replica_count", std::to_string(reserved_min_allowed_replica_count)); ASSERT_TRUE(res.is_ok()); + ASSERT_EQ(FLAGS_min_allowed_replica_count, reserved_min_allowed_replica_count); // recover original FLAGS_max_allowed_replica_count res = update_flag("max_allowed_replica_count", std::to_string(reserved_max_allowed_replica_count)); ASSERT_TRUE(res.is_ok()); + ASSERT_EQ(FLAGS_max_allowed_replica_count, reserved_max_allowed_replica_count); } TEST_F(meta_app_operation_test, drop_app) From b35411dc824b0053fef153550aba5faa367588c5 Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Tue, 14 Dec 2021 20:33:23 +0800 Subject: [PATCH 8/8] feat: restrict the replica count while creating app --- src/meta/server_state.cpp | 12 ++++-------- src/meta/test/meta_app_operation_test.cpp | 1 - 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/meta/server_state.cpp b/src/meta/server_state.cpp index 795c89da34..4b6ca4a59d 100644 --- a/src/meta/server_state.cpp +++ b/src/meta/server_state.cpp @@ -60,9 +60,7 @@ DSN_DEFINE_int32("meta_server", max_allowed_replica_count, 5, "max replica count allowed for any app of a cluster"); - DSN_TAG_VARIABLE(max_allowed_replica_count, FT_MUTABLE); - DSN_DEFINE_validator(max_allowed_replica_count, [](int32_t allowed_replica_count) -> bool { return allowed_replica_count > 0; }); @@ -71,19 +69,17 @@ DSN_DEFINE_int32("meta_server", min_allowed_replica_count, 1, "min replica count allowed for any app of a cluster"); - DSN_TAG_VARIABLE(min_allowed_replica_count, FT_MUTABLE); - DSN_DEFINE_validator(min_allowed_replica_count, [](int32_t allowed_replica_count) -> bool { return allowed_replica_count > 0; }); DSN_DEFINE_group_validator(min_max_allowed_replica_count, [](std::string &message) -> bool { if (FLAGS_min_allowed_replica_count > FLAGS_max_allowed_replica_count) { - message = fmt::format( - "FLAGS_min_allowed_replica_count({}) should be <= FLAGS_max_allowed_replica_count({})", - FLAGS_min_allowed_replica_count, - FLAGS_max_allowed_replica_count); + message = fmt::format("meta_server.min_allowed_replica_count({}) should be <= " + "meta_server.max_allowed_replica_count({})", + FLAGS_min_allowed_replica_count, + FLAGS_max_allowed_replica_count); return false; } return true; diff --git a/src/meta/test/meta_app_operation_test.cpp b/src/meta/test/meta_app_operation_test.cpp index 764e5d4598..2058bc6f30 100644 --- a/src/meta/test/meta_app_operation_test.cpp +++ b/src/meta/test/meta_app_operation_test.cpp @@ -27,7 +27,6 @@ namespace dsn { namespace replication { DSN_DECLARE_int32(min_allowed_replica_count); - DSN_DECLARE_int32(max_allowed_replica_count); class meta_app_operation_test : public meta_test_base