diff --git a/idl/bulk_load.thrift b/idl/bulk_load.thrift index 90fcf5b690..b999a3291f 100644 --- a/idl/bulk_load.thrift +++ b/idl/bulk_load.thrift @@ -98,6 +98,7 @@ struct bulk_load_request 7:bulk_load_status meta_bulk_load_status; 8:bool query_bulk_load_metadata; 9:string remote_root_path; + 10:optional dsn.host_port hp_primary; } struct bulk_load_response @@ -119,18 +120,20 @@ struct bulk_load_response 8:optional bool is_group_ingestion_finished; 9:optional bool is_group_bulk_load_context_cleaned_up; 10:optional bool is_group_bulk_load_paused; + 11:optional map hp_group_bulk_load_state; } // primary -> secondary struct group_bulk_load_request { - 1:string app_name; - 2:dsn.rpc_address target_address; - 3:metadata.replica_configuration config; - 4:string provider_name; - 5:string cluster_name; - 6:bulk_load_status meta_bulk_load_status; - 7:string remote_root_path; + 1:string app_name; + 2:dsn.rpc_address target; + 3:metadata.replica_configuration config; + 4:string provider_name; + 5:string cluster_name; + 6:bulk_load_status meta_bulk_load_status; + 7:string remote_root_path; + 8:optional dsn.host_port hp_target; } struct group_bulk_load_response @@ -218,6 +221,7 @@ struct query_bulk_load_response 6:list> bulk_load_states; 7:optional string hint_msg; 8:optional bool is_bulk_loading; + 9:optional list> hp_bulk_load_states; } struct clear_bulk_load_state_request diff --git a/idl/dsn.layer2.thrift b/idl/dsn.layer2.thrift index 086040c9cb..1975a165ff 100644 --- a/idl/dsn.layer2.thrift +++ b/idl/dsn.layer2.thrift @@ -41,6 +41,9 @@ struct partition_configuration 6:list last_drops; 7:i64 last_committed_decree; 8:i32 partition_flags; + 9:optional dsn.host_port hp_primary; + 10:optional list hp_secondaries; + 11:optional list hp_last_drops; } struct query_cfg_request diff --git a/idl/duplication.thrift b/idl/duplication.thrift index 90fcdfd096..5bf2566960 100644 --- a/idl/duplication.thrift +++ b/idl/duplication.thrift @@ -150,6 +150,7 @@ struct duplication_sync_request 1:dsn.rpc_address node; 2:map> confirm_list; + 3:dsn.host_port hp_node; } struct duplication_sync_response diff --git a/idl/meta_admin.thrift b/idl/meta_admin.thrift index f24a1834ba..dc852f9e4a 100644 --- a/idl/meta_admin.thrift +++ b/idl/meta_admin.thrift @@ -76,6 +76,7 @@ struct configuration_update_request // the `meta_split_status` will be set // only used when on_config_sync 6:optional metadata.split_status meta_split_status; + 7:optional dsn.host_port hp_node; } // meta server (config mgr) => primary | secondary (downgrade) (w/ new config) @@ -103,6 +104,7 @@ struct configuration_query_by_node_request 1:dsn.rpc_address node; 2:optional list stored_replicas; 3:optional replica_server_info info; + 4:optional dsn.host_port hp_node; } struct configuration_query_by_node_response @@ -117,6 +119,7 @@ struct configuration_recovery_request 1:list recovery_set; 2:bool skip_bad_nodes; 3:bool skip_lost_partitions; + 4:optional list hp_recovery_set; } struct configuration_recovery_response @@ -205,6 +208,7 @@ struct configuration_list_apps_response struct query_app_info_request { 1:dsn.rpc_address meta_server; + 2:optional dsn.host_port hp_meta_server; } struct query_app_info_response @@ -280,6 +284,7 @@ struct node_info { 1:node_status status = node_status.NS_INVALID; 2:dsn.rpc_address address; + 3:optional dsn.host_port hp_address; } struct configuration_list_nodes_request @@ -349,6 +354,8 @@ struct configuration_proposal_action // depricated now // new fields of this struct should start with 5 // 4:i64 period_ts; + 5:optional dsn.host_port hp_target; + 6:optional dsn.host_port hp_node; } struct configuration_balancer_request @@ -381,6 +388,7 @@ struct ddd_node_info 5:i64 ballot; // collected && ballot == -1 means replica not exist on this node 6:i64 last_committed_decree; 7:i64 last_prepared_decree; + 8:optional dsn.host_port hp_node; } struct ddd_partition_info diff --git a/idl/metadata.thrift b/idl/metadata.thrift index 5a7d3e4b3e..e0667411a3 100644 --- a/idl/metadata.thrift +++ b/idl/metadata.thrift @@ -98,6 +98,7 @@ struct replica_configuration // 2. false - secondary copy mutation in this prepare message asynchronously // NOTICE: it should always be false when update_local_configuration 7:optional bool split_sync_to_child = false; + 8:optional dsn.host_port hp_primary; } struct replica_info diff --git a/idl/partition_split.thrift b/idl/partition_split.thrift index a5724ea773..a59f6e40b7 100644 --- a/idl/partition_split.thrift +++ b/idl/partition_split.thrift @@ -99,6 +99,7 @@ struct notify_catch_up_request 2:dsn.gpid child_gpid; 3:i64 child_ballot; 4:dsn.rpc_address child_address; + 5:optional dsn.host_port hp_child_address; } struct notify_cacth_up_response @@ -112,10 +113,11 @@ struct notify_cacth_up_response // primary parent -> child replicas to update partition count struct update_child_group_partition_count_request { - 1:dsn.rpc_address target_address; + 1:dsn.rpc_address target; 2:i32 new_partition_count; 3:dsn.gpid child_pid; 4:i64 ballot; + 5:optional dsn.host_port hp_target; } struct update_child_group_partition_count_response @@ -129,10 +131,11 @@ struct update_child_group_partition_count_response // primary parent -> meta server, register child on meta_server struct register_child_request { - 1:dsn.layer2.app_info app; - 2:dsn.layer2.partition_configuration parent_config; - 3:dsn.layer2.partition_configuration child_config; - 4:dsn.rpc_address primary_address; + 1:dsn.layer2.app_info app; + 2:dsn.layer2.partition_configuration parent_config; + 3:dsn.layer2.partition_configuration child_config; + 4:dsn.rpc_address primary_address; + 5:optional dsn.host_port hp_primary; } struct register_child_response diff --git a/idl/replica_admin.thrift b/idl/replica_admin.thrift index ef6bf6f028..d72bf04a8a 100644 --- a/idl/replica_admin.thrift +++ b/idl/replica_admin.thrift @@ -70,6 +70,7 @@ struct query_disk_info_request { 1:dsn.rpc_address node; 2:string app_name; + 3:optional dsn.host_port hp_node; } // This response is from replica_server to client. diff --git a/run.sh b/run.sh index 54a8502f75..e97c73df3b 100755 --- a/run.sh +++ b/run.sh @@ -27,7 +27,8 @@ export REPORT_DIR="$ROOT/test_report" export THIRDPARTY_ROOT=$ROOT/thirdparty export LD_LIBRARY_PATH=$JAVA_HOME/jre/lib/amd64/server:${BUILD_LATEST_DIR}/output/lib:${THIRDPARTY_ROOT}/output/lib:${LD_LIBRARY_PATH} # Disable AddressSanitizerOneDefinitionRuleViolation, see https://github.com/google/sanitizers/issues/1017 for details. -export ASAN_OPTIONS=detect_odr_violation=0 +# Add parameters in order to be able to generate coredump file when run ASAN tests +export ASAN_OPTIONS=detect_odr_violation=0:abort_on_error=1:disable_coredump=0:unmap_shadow_on_exit=1 # See https://github.com/gperftools/gperftools/wiki/gperftools'-stacktrace-capturing-methods-and-their-issues. # Now we choose libgcc, because of https://github.com/apache/incubator-pegasus/issues/1685. export TCMALLOC_STACKTRACE_METHOD=libgcc # Can be generic_fp, generic_fp_unsafe, libunwind or libgcc @@ -241,6 +242,7 @@ function run_build() if [ ! -z "${SANITIZER}" ]; then CMAKE_OPTIONS="${CMAKE_OPTIONS} -DSANITIZER=${SANITIZER}" + echo "ASAN_OPTIONS=$ASAN_OPTIONS" fi MAKE_OPTIONS="-j$JOB_NUM" @@ -484,7 +486,9 @@ function run_test() # Update options if needed, this should be done before starting onebox to make new options take effect. if [ "${module}" == "recovery_test" ]; then master_count=1 - opts="meta_state_service_type=meta_state_service_simple;distributed_lock_service_type=distributed_lock_service_simple" + # all test case in recovery_test just run one meta_server, so we should change it + fqdn=`hostname -f` + opts="server_list=$fqdn:34601;meta_state_service_type=meta_state_service_simple;distributed_lock_service_type=distributed_lock_service_simple" fi if [ "${module}" == "backup_restore_test" ]; then opts="cold_backup_disabled=false;cold_backup_checkpoint_reserve_minutes=0;cold_backup_root=onebox" @@ -509,6 +513,20 @@ function run_test() # Run server test. pushd ${BUILD_LATEST_DIR}/bin/${module} + local function_tests=( + backup_restore_test + recovery_test + restore_test + base_api_test + throttle_test + bulk_load_test + detect_hotspot_test + partition_split_test + ) + # function_tests need client used meta_server_list to connect + if [[ "${function_tests[@]}" =~ "${module}" ]]; then + sed -i "s/@LOCAL_HOSTNAME@/${LOCAL_HOSTNAME}/g" ./config.ini + fi REPORT_DIR=${REPORT_DIR} TEST_BIN=${module} TEST_OPTS=${test_opts} ./run.sh if [ $? != 0 ]; then echo "run test \"$module\" in `pwd` failed" diff --git a/scripts/recompile_thrift.sh b/scripts/recompile_thrift.sh index 444a503483..52e5397be2 100755 --- a/scripts/recompile_thrift.sh +++ b/scripts/recompile_thrift.sh @@ -30,7 +30,7 @@ rm -rf $TMP_DIR mkdir -p $TMP_DIR $THIRDPARTY_ROOT/output/bin/thrift --gen cpp:moveable_types -out $TMP_DIR ../idl/rrdb.thrift -sed 's/#include "dsn_types.h"/#include "runtime\/rpc\/rpc_address.h"\n#include "runtime\/task\/task_code.h"\n#include "utils\/blob.h"/' $TMP_DIR/rrdb_types.h > ../src/include/rrdb/rrdb_types.h +sed 's/#include "dsn_types.h"/#include "runtime\/rpc\/rpc_address.h"\n#include "runtime\/rpc\/rpc_host_port.h"\n#include "runtime\/task\/task_code.h"\n#include "utils\/blob.h"/' $TMP_DIR/rrdb_types.h > ../src/include/rrdb/rrdb_types.h sed 's/#include "rrdb_types.h"/#include /' $TMP_DIR/rrdb_types.cpp > ../src/base/rrdb_types.cpp rm -rf $TMP_DIR diff --git a/src/client/partition_resolver.cpp b/src/client/partition_resolver.cpp index 1b8baa7a18..2b99e3031b 100644 --- a/src/client/partition_resolver.cpp +++ b/src/client/partition_resolver.cpp @@ -31,6 +31,7 @@ #include "partition_resolver_manager.h" #include "runtime/api_layer1.h" #include "runtime/api_task.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/task/task_spec.h" #include "utils/fmt_logging.h" #include "utils/threadpool_code.h" @@ -38,11 +39,14 @@ namespace dsn { namespace replication { /*static*/ -partition_resolver_ptr partition_resolver::get_resolver(const char *cluster_name, - const std::vector &meta_list, - const char *app_name) +partition_resolver_ptr +partition_resolver::get_resolver(const char *cluster_name, + const std::vector &meta_list, + const char *app_name, + const std::shared_ptr &dns_resolver) { - return partition_resolver_manager::instance().find_or_create(cluster_name, meta_list, app_name); + return partition_resolver_manager::instance().find_or_create( + cluster_name, meta_list, app_name, dns_resolver); } DEFINE_TASK_CODE(LPC_RPC_DELAY_CALL, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT) @@ -107,7 +111,7 @@ void partition_resolver::call_task(const rpc_response_task_ptr &t) t->replace_callback(std::move(new_callback)); resolve(hdr.client.partition_hash, - [t](resolve_result &&result) mutable { + [t, this](resolve_result &&result) mutable { if (result.err != ERR_OK) { t->enqueue(result.err, nullptr); return; @@ -124,7 +128,7 @@ void partition_resolver::call_task(const rpc_response_task_ptr &t) } hdr.gpid = result.pid; } - dsn_rpc_call(result.address, t.get()); + dsn_rpc_call(this->_dns_resolver->resolve_address(result.hp), t.get()); }, hdr.client.timeout_ms); } diff --git a/src/client/partition_resolver.h b/src/client/partition_resolver.h index b870b2c7a2..b110cec944 100644 --- a/src/client/partition_resolver.h +++ b/src/client/partition_resolver.h @@ -29,12 +29,13 @@ #include #include #include +#include #include #include #include #include "common/gpid.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" @@ -44,6 +45,7 @@ #include "utils/error_code.h" namespace dsn { +class dns_resolver; class task_tracker; namespace replication { @@ -53,8 +55,9 @@ class partition_resolver : public ref_counter public: static dsn::ref_ptr get_resolver(const char *cluster_name, - const std::vector &meta_list, - const char *app_name); + const std::vector &meta_list, + const char *app_name, + const std::shared_ptr &dns_resolver); template dsn::rpc_response_task_ptr call_op(dsn::task_code code, @@ -83,13 +86,15 @@ class partition_resolver : public ref_counter std::string get_app_name() const { return _app_name; } - dsn::rpc_address get_meta_server() const { return _meta_server; } + dsn::host_port get_meta_server() const { return _meta_server; } const char *log_prefix() const { return _app_name.c_str(); } protected: - partition_resolver(rpc_address meta_server, const char *app_name) - : _app_name(app_name), _meta_server(meta_server) + partition_resolver(host_port meta_server, + const char *app_name, + const std::shared_ptr &dns_resolver) + : _app_name(app_name), _meta_server(meta_server), _dns_resolver(dns_resolver) { } @@ -103,13 +108,13 @@ class partition_resolver : public ref_counter ///< should call resolve_async in this case error_code err; ///< IPv4 of the target to send request to - rpc_address address; + host_port hp; ///< global partition indentity dsn::gpid pid; }; /** - * resolve partition_hash into IP or group addresses to know what to connect next + * resolve partition_hash into IP or group host_port to know what to connect next * * \param partition_hash the partition hash * \param callback callback invoked on completion or timeout @@ -127,7 +132,7 @@ class partition_resolver : public ref_counter \param partition_index zero-based index of the partition. \param err error code - this is usually to trigger new round of address resolve + this is usually to trigger new round of host_port resolve */ virtual void on_access_failure(int partition_index, error_code err) = 0; @@ -144,7 +149,8 @@ class partition_resolver : public ref_counter std::string _cluster_name; std::string _app_name; - rpc_address _meta_server; + host_port _meta_server; + std::shared_ptr _dns_resolver; }; typedef ref_ptr partition_resolver_ptr; diff --git a/src/client/partition_resolver_manager.cpp b/src/client/partition_resolver_manager.cpp index 2c0ae7f012..cc82b063a8 100644 --- a/src/client/partition_resolver_manager.cpp +++ b/src/client/partition_resolver_manager.cpp @@ -29,12 +29,14 @@ #include "client/partition_resolver.h" #include "partition_resolver_manager.h" #include "partition_resolver_simple.h" -#include "runtime/rpc/group_address.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/group_host_port.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/autoref_ptr.h" #include "utils/fmt_logging.h" namespace dsn { +class dns_resolver; + namespace replication { template @@ -53,22 +55,25 @@ bool vector_equal(const std::vector &a, const std::vector &b) return true; } -partition_resolver_ptr partition_resolver_manager::find_or_create( - const char *cluster_name, const std::vector &meta_list, const char *app_name) +partition_resolver_ptr +partition_resolver_manager::find_or_create(const char *cluster_name, + const std::vector &meta_list, + const char *app_name, + const std::shared_ptr &dns_resolver) { dsn::zauto_lock l(_lock); std::map &app_map = _resolvers[cluster_name]; partition_resolver_ptr &ptr = app_map[app_name]; if (ptr == nullptr) { - dsn::rpc_address meta_group; + dsn::host_port meta_group; meta_group.assign_group(cluster_name); - meta_group.group_address()->add_list(meta_list); - ptr = new partition_resolver_simple(meta_group, app_name); + meta_group.group_host_port()->add_list(meta_list); + ptr = new partition_resolver_simple(meta_group, app_name, dns_resolver); return ptr; } else { - dsn::rpc_address meta_group = ptr->get_meta_server(); - const std::vector &existing_list = meta_group.group_address()->members(); + dsn::host_port meta_group = ptr->get_meta_server(); + const std::vector &existing_list = meta_group.group_host_port()->members(); if (!vector_equal(meta_list, existing_list)) { LOG_ERROR("meta list not match for cluster({})", cluster_name); return nullptr; diff --git a/src/client/partition_resolver_manager.h b/src/client/partition_resolver_manager.h index 97e895cec7..82332371fe 100644 --- a/src/client/partition_resolver_manager.h +++ b/src/client/partition_resolver_manager.h @@ -27,6 +27,7 @@ #pragma once #include +#include #include #include @@ -35,7 +36,8 @@ #include "utils/zlocks.h" namespace dsn { -class rpc_address; +class dns_resolver; +class host_port; namespace replication { @@ -43,8 +45,9 @@ class partition_resolver_manager : public dsn::utils::singleton &meta_list, - const char *app_name); + const std::vector &meta_list, + const char *app_name, + const std::shared_ptr &dns_resolver); private: dsn::zlock _lock; diff --git a/src/client/partition_resolver_simple.cpp b/src/client/partition_resolver_simple.cpp index a0fd30224d..1454d200d1 100644 --- a/src/client/partition_resolver_simple.cpp +++ b/src/client/partition_resolver_simple.cpp @@ -35,6 +35,7 @@ #include "dsn.layer2_types.h" #include "partition_resolver_simple.h" #include "runtime/api_layer1.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" @@ -48,8 +49,9 @@ namespace dsn { namespace replication { -partition_resolver_simple::partition_resolver_simple(rpc_address meta_server, const char *app_name) - : partition_resolver(meta_server, app_name), +partition_resolver_simple::partition_resolver_simple( + host_port meta_server, const char *app_name, const std::shared_ptr &dns_resolver) + : partition_resolver(meta_server, app_name, dns_resolver), _app_id(-1), _app_partition_count(-1), _app_is_stateful(true) @@ -63,12 +65,12 @@ void partition_resolver_simple::resolve(uint64_t partition_hash, int idx = -1; if (_app_partition_count != -1) { idx = get_partition_index(_app_partition_count, partition_hash); - rpc_address target; - auto err = get_address(idx, target); + host_port target; + auto err = get_host_port(idx, target); if (dsn_unlikely(err == ERR_CHILD_NOT_READY)) { // child partition is not ready, its requests should be sent to parent partition idx -= _app_partition_count / 2; - err = get_address(idx, target); + err = get_host_port(idx, target); } if (dsn_likely(err == ERR_OK)) { callback(resolve_result{ERR_OK, target, {_app_id, idx}}); @@ -128,7 +130,7 @@ void partition_resolver_simple::clear_all_pending_requests() pc.second->query_config_task->cancel(true); for (auto &rc : pc.second->requests) { - end_request(std::move(rc), ERR_TIMEOUT, rpc_address()); + end_request(std::move(rc), ERR_TIMEOUT, host_port()); } delete pc.second; } @@ -137,12 +139,12 @@ void partition_resolver_simple::clear_all_pending_requests() void partition_resolver_simple::on_timeout(request_context_ptr &&rc) const { - end_request(std::move(rc), ERR_TIMEOUT, rpc_address(), true); + end_request(std::move(rc), ERR_TIMEOUT, host_port(), true); } void partition_resolver_simple::end_request(request_context_ptr &&request, error_code err, - rpc_address addr, + host_port hp, bool called_by_timer) const { zauto_lock l(request->lock); @@ -153,7 +155,7 @@ void partition_resolver_simple::end_request(request_context_ptr &&request, if (!called_by_timer && request->timeout_timer != nullptr) request->timeout_timer->cancel(false); - request->callback(resolve_result{err, addr, {_app_id, request->partition_index}}); + request->callback(resolve_result{err, hp, {_app_id, request->partition_index}}); request->completed = true; } @@ -164,13 +166,13 @@ void partition_resolver_simple::call(request_context_ptr &&request, bool from_me { int pindex = request->partition_index; if (-1 != pindex) { - // fill target address if possible - rpc_address addr; - auto err = get_address(pindex, addr); + // fill target host_port if possible + host_port hp; + auto err = get_host_port(pindex, hp); - // target address known + // target host_port known if (err == ERR_OK) { - end_request(std::move(request), ERR_OK, addr); + end_request(std::move(request), ERR_OK, hp); return; } } @@ -180,7 +182,7 @@ void partition_resolver_simple::call(request_context_ptr &&request, bool from_me // timeout will happen very soon, no way to get the rpc call done if (nts + 100 >= request->timeout_ts_us) // within 100 us { - end_request(std::move(request), ERR_TIMEOUT, rpc_address()); + end_request(std::move(request), ERR_TIMEOUT, host_port()); return; } @@ -260,7 +262,7 @@ task_ptr partition_resolver_simple::query_config(int partition_index, int timeou marshall(msg, req); return rpc::call( - _meta_server, + _dns_resolver->resolve_address(_meta_server), msg, &_tracker, [this, partition_index](error_code err, dsn::message_ex *req, dsn::message_ex *resp) { @@ -303,9 +305,10 @@ void partition_resolver_simple::query_config_reply(error_code err, for (auto it = resp.partitions.begin(); it != resp.partitions.end(); ++it) { auto &new_config = *it; - LOG_DEBUG_PREFIX("query config reply, gpid = {}, ballot = {}, primary = {}", + LOG_DEBUG_PREFIX("query config reply, gpid = {}, ballot = {}, primary = {}({})", new_config.pid, new_config.ballot, + new_config.hp_primary, new_config.primary); auto it2 = _config_cache.find(new_config.pid.get_partition_index()); @@ -393,16 +396,16 @@ void partition_resolver_simple::handle_pending_requests(std::dequepartition_index, addr); + host_port hp; + err = get_host_port(req->partition_index, hp); if (err == ERR_OK) { - end_request(std::move(req), err, addr); + end_request(std::move(req), err, hp); } else { call(std::move(req), true); } } else if (err == ERR_HANDLER_NOT_FOUND || err == ERR_APP_NOT_EXIST || err == ERR_OPERATION_DISABLED) { - end_request(std::move(req), err, rpc_address()); + end_request(std::move(req), err, host_port()); } else { call(std::move(req), true); } @@ -411,20 +414,20 @@ void partition_resolver_simple::handle_pending_requests(std::dequesecond->config); - if (addr.is_invalid()) { + hp = get_host_port(it->second->config); + if (hp.is_invalid()) { return ERR_IO_PENDING; } else { return ERR_OK; diff --git a/src/client/partition_resolver_simple.h b/src/client/partition_resolver_simple.h index 2ab1fb70c2..ac105f7672 100644 --- a/src/client/partition_resolver_simple.h +++ b/src/client/partition_resolver_simple.h @@ -34,7 +34,7 @@ #include "client/partition_resolver.h" #include "common/serialization_helper/dsn.layer2_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task.h" #include "runtime/task/task_tracker.h" #include "utils/autoref_ptr.h" @@ -42,6 +42,7 @@ #include "utils/zlocks.h" namespace dsn { +class dns_resolver; class message_ex; namespace replication { @@ -49,7 +50,9 @@ namespace replication { class partition_resolver_simple : public partition_resolver { public: - partition_resolver_simple(rpc_address meta_server, const char *app_name); + partition_resolver_simple(host_port meta_server, + const char *app_name, + const std::shared_ptr &dns_resolver); virtual ~partition_resolver_simple(); @@ -109,8 +112,8 @@ class partition_resolver_simple : public partition_resolver private: // local routines - rpc_address get_address(const partition_configuration &config) const; - error_code get_address(int partition_index, /*out*/ rpc_address &addr); + host_port get_host_port(const partition_configuration &config) const; + error_code get_host_port(int partition_index, /*out*/ host_port &hp); void handle_pending_requests(std::deque &reqs, error_code err); void clear_all_pending_requests(); @@ -120,7 +123,7 @@ class partition_resolver_simple : public partition_resolver // request_context_ptr rc); void end_request(request_context_ptr &&request, error_code err, - rpc_address addr, + host_port addr, bool called_by_timer = false) const; void on_timeout(request_context_ptr &&rc) const; diff --git a/src/client/replication_ddl_client.cpp b/src/client/replication_ddl_client.cpp index a98dcbf8e7..95de5b0557 100644 --- a/src/client/replication_ddl_client.cpp +++ b/src/client/replication_ddl_client.cpp @@ -48,7 +48,8 @@ #include "fmt/format.h" #include "meta/meta_rpc_types.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/group_address.h" +#include "runtime/rpc/group_host_port.h" +#include "runtime/rpc/rpc_address.h" #include "utils/error_code.h" #include "utils/fmt_logging.h" #include "utils/output_utils.h" @@ -81,11 +82,12 @@ namespace replication { using tp_output_format = ::dsn::utils::table_printer::output_format; -replication_ddl_client::replication_ddl_client(const std::vector &meta_servers) +replication_ddl_client::replication_ddl_client(const std::vector &meta_servers) + : _dns_resolver(new dns_resolver()) { _meta_server.assign_group("meta-servers"); for (const auto &m : meta_servers) { - if (!_meta_server.group_address()->add(m)) { + if (!_meta_server.group_host_port()->add(m)) { LOG_WARNING("duplicate adress {}", m); } } @@ -93,6 +95,38 @@ replication_ddl_client::replication_ddl_client(const std::vector(); + + auto resp_task = request_meta(RPC_CM_CLUSTER_INFO, req); + resp_task->wait(); + if (resp_task->error() != dsn::ERR_OK) { + LOG_ERROR("get cluster_info failed!"); + return; + } + + configuration_cluster_info_response resp; + ::dsn::unmarshall(resp_task->get_response(), resp); + if (resp.err != dsn::ERR_OK) { + LOG_ERROR("get cluster_info failed!"); + return; + } + + for (int i = 0; i < resp.keys.size(); i++) { + if (resp.keys[i] == "primary_meta_server") { + dsn::host_port hp; + hp.from_string(resp.values[i]); + if (_meta_server.group_host_port()->contains(hp)) { + _meta_server.group_host_port()->set_leader(hp); + } else { + LOG_ERROR("meta_servers not contains {}", hp); + } + break; + } + } +} + dsn::error_code replication_ddl_client::wait_app_ready(const std::string &app_name, int partition_count, int max_replica_count) @@ -129,7 +163,8 @@ dsn::error_code replication_ddl_client::wait_app_ready(const std::string &app_na int ready_count = 0; for (int i = 0; i < partition_count; i++) { const partition_configuration &pc = query_resp.partitions[i]; - if (!pc.primary.is_invalid() && (pc.secondaries.size() + 1 >= max_replica_count)) { + if (!pc.hp_primary.is_invalid() && + (pc.hp_secondaries.size() + 1 >= max_replica_count)) { ready_count++; } } @@ -418,11 +453,11 @@ dsn::error_code replication_ddl_client::list_apps(const dsn::app_status::type st for (int i = 0; i < partitions.size(); i++) { const dsn::partition_configuration &p = partitions[i]; int replica_count = 0; - if (!p.primary.is_invalid()) { + if (!p.hp_primary.is_invalid()) { replica_count++; } - replica_count += p.secondaries.size(); - if (!p.primary.is_invalid()) { + replica_count += p.hp_secondaries.size(); + if (!p.hp_primary.is_invalid()) { if (replica_count >= p.max_replica_count) fully_healthy++; else if (replica_count < 2) @@ -470,7 +505,7 @@ dsn::error_code replication_ddl_client::list_apps(const dsn::app_status::type st dsn::error_code replication_ddl_client::list_nodes( const dsn::replication::node_status::type status, - std::map &nodes) + std::map &nodes) { auto req = std::make_shared(); req->status = status; @@ -486,8 +521,10 @@ dsn::error_code replication_ddl_client::list_nodes( return resp.err; } - for (const dsn::replication::node_info &n : resp.infos) { - nodes[n.address] = n.status; + for (const auto &n : resp.infos) { + host_port hp; + GET_HOST_PORT(n, address, hp); + nodes[hp] = n.status; } return dsn::ERR_OK; @@ -520,13 +557,13 @@ dsn::error_code replication_ddl_client::list_nodes(const dsn::replication::node_ const std::string &file_name, bool resolve_ip) { - std::map nodes; + std::map nodes; auto r = list_nodes(status, nodes); if (r != dsn::ERR_OK) { return r; } - std::map tmp_map; + std::map tmp_map; int alive_node_count = 0; for (auto &kv : nodes) { if (kv.second == dsn::replication::node_status::NS_ALIVE) @@ -535,7 +572,7 @@ dsn::error_code replication_ddl_client::list_nodes(const dsn::replication::node_ status_str = status_str.substr(status_str.find("NS_") + 3); tmp_map.emplace( kv.first, - list_nodes_helper(host_name_resolve(resolve_ip, kv.first.to_std_string()), status_str)); + list_nodes_helper(host_name_resolve(resolve_ip, kv.first.to_string()), status_str)); } if (detailed) { @@ -556,14 +593,14 @@ dsn::error_code replication_ddl_client::list_nodes(const dsn::replication::node_ for (int i = 0; i < partitions.size(); i++) { const dsn::partition_configuration &p = partitions[i]; - if (!p.primary.is_invalid()) { - auto find = tmp_map.find(p.primary); + if (!p.hp_primary.is_invalid()) { + auto find = tmp_map.find(p.hp_primary); if (find != tmp_map.end()) { find->second.primary_count++; } } - for (int j = 0; j < p.secondaries.size(); j++) { - auto find = tmp_map.find(p.secondaries[j]); + for (int j = 0; j < p.hp_secondaries.size(); j++) { + auto find = tmp_map.find(p.hp_secondaries[j]); if (find != tmp_map.end()) { find->second.secondary_count++; } @@ -742,7 +779,7 @@ dsn::error_code replication_ddl_client::list_app(const std::string &app_name, tp_details.add_column("replica_count"); tp_details.add_column("primary"); tp_details.add_column("secondaries"); - std::map> node_stat; + std::map> node_stat; int total_prim_count = 0; int total_sec_count = 0; @@ -751,14 +788,14 @@ dsn::error_code replication_ddl_client::list_app(const std::string &app_name, int read_unhealthy = 0; for (const auto &p : partitions) { int replica_count = 0; - if (!p.primary.is_invalid()) { + if (!p.hp_primary.is_invalid()) { replica_count++; - node_stat[p.primary].first++; + node_stat[p.hp_primary].first++; total_prim_count++; } - replica_count += p.secondaries.size(); - total_sec_count += p.secondaries.size(); - if (!p.primary.is_invalid()) { + replica_count += p.hp_secondaries.size(); + total_sec_count += p.hp_secondaries.size(); + if (!p.hp_primary.is_invalid()) { if (replica_count >= p.max_replica_count) fully_healthy++; else if (replica_count < 2) @@ -772,17 +809,15 @@ dsn::error_code replication_ddl_client::list_app(const std::string &app_name, std::stringstream oss; oss << replica_count << "/" << p.max_replica_count; tp_details.append_data(oss.str()); - tp_details.append_data( - (p.primary.is_invalid() ? "-" : host_name_resolve(resolve_ip, - p.primary.to_std_string()))); + tp_details.append_data((p.hp_primary.is_invalid() ? "-" : p.hp_primary.to_string())); oss.str(""); oss << "["; // TODO (yingchun) join - for (int j = 0; j < p.secondaries.size(); j++) { + for (int j = 0; j < p.hp_secondaries.size(); j++) { if (j != 0) oss << ","; - oss << host_name_resolve(resolve_ip, p.secondaries[j].to_std_string()); - node_stat[p.secondaries[j]].second++; + oss << host_name_resolve(resolve_ip, p.hp_secondaries[j].to_string()); + node_stat[p.hp_secondaries[j]].second++; } oss << "]"; tp_details.append_data(oss.str()); @@ -796,7 +831,7 @@ dsn::error_code replication_ddl_client::list_app(const std::string &app_name, tp_nodes.add_column("secondary"); tp_nodes.add_column("total"); for (auto &kv : node_stat) { - tp_nodes.add_row(host_name_resolve(resolve_ip, kv.first.to_std_string())); + tp_nodes.add_row(host_name_resolve(resolve_ip, kv.first.to_string())); tp_nodes.append_data(kv.second.first); tp_nodes.append_data(kv.second.second); tp_nodes.append_data(kv.second.first + kv.second.second); @@ -885,7 +920,7 @@ replication_ddl_client::send_balancer_proposal(const configuration_balancer_requ return resp.err; } -dsn::error_code replication_ddl_client::do_recovery(const std::vector &replica_nodes, +dsn::error_code replication_ddl_client::do_recovery(const std::vector &replica_nodes, int wait_seconds, bool skip_bad_nodes, bool skip_lost_partitions, @@ -904,15 +939,17 @@ dsn::error_code replication_ddl_client::do_recovery(const std::vector(); req->recovery_set.clear(); - for (const dsn::rpc_address &node : replica_nodes) { - if (std::find(req->recovery_set.begin(), req->recovery_set.end(), node) != - req->recovery_set.end()) { - out << "duplicate replica node " << node << ", just ingore it" << std::endl; + req->__set_hp_recovery_set(std::vector()); + for (const auto &node : replica_nodes) { + if (std::find(req->hp_recovery_set.begin(), req->hp_recovery_set.end(), node) != + req->hp_recovery_set.end()) { + out << "duplicate replica node " << node.to_string() << ", just ingore it" << std::endl; } else { - req->recovery_set.push_back(node); + req->hp_recovery_set.push_back(node); + req->recovery_set.push_back(_dns_resolver->resolve_address(node)); } } - if (req->recovery_set.empty()) { + if (req->hp_recovery_set.empty()) { out << "node set for recovery it empty" << std::endl; return ERR_INVALID_PARAMETERS; } @@ -924,8 +961,8 @@ dsn::error_code replication_ddl_client::do_recovery(const std::vectorrecovery_set) { - out << node << std::endl; + for (auto &node : req->hp_recovery_set) { + out << node.to_string() << std::endl; } out << "=============================" << std::endl; @@ -1055,6 +1092,7 @@ dsn::error_code replication_ddl_client::add_backup_policy(const std::string &pol error_with replication_ddl_client::backup_app( int32_t app_id, const std::string &backup_provider_type, const std::string &backup_path) { + set_meta_servers_leader(); auto req = std::make_unique(); req->app_id = app_id; req->backup_provider_type = backup_provider_type; @@ -1436,7 +1474,7 @@ void replication_ddl_client::end_meta_request(const rpc_response_task_ptr &callb return; } - rpc::call(_meta_server, + rpc::call(_dns_resolver->resolve_address(_meta_server), request, &_tracker, [this, attempt_count, callback]( @@ -1571,14 +1609,15 @@ replication_ddl_client::ddd_diagnose(gpid pid, std::vector & } void replication_ddl_client::query_disk_info( - const std::vector &targets, + const std::vector &targets, const std::string &app_name, - /*out*/ std::map> &resps) + /*out*/ std::map> &resps) { - std::map query_disk_info_rpcs; + std::map query_disk_info_rpcs; for (const auto &target : targets) { auto request = std::make_unique(); - request->node = target; + request->node = _dns_resolver->resolve_address(target); + request->__set_hp_node(target); request->app_name = app_name; query_disk_info_rpcs.emplace(target, query_disk_info_rpc(std::move(request), RPC_QUERY_DISK_INFO)); @@ -1629,14 +1668,14 @@ replication_ddl_client::clear_bulk_load(const std::string &app_name) return call_rpc_sync(clear_bulk_load_rpc(std::move(req), RPC_CM_CLEAR_BULK_LOAD)); } -error_code replication_ddl_client::detect_hotkey(const dsn::rpc_address &target, +error_code replication_ddl_client::detect_hotkey(const dsn::host_port &target, detect_hotkey_request &req, detect_hotkey_response &resp) { - std::map detect_hotkey_rpcs; + std::map detect_hotkey_rpcs; auto request = std::make_unique(req); detect_hotkey_rpcs.emplace(target, detect_hotkey_rpc(std::move(request), RPC_DETECT_HOTKEY)); - std::map> resps; + std::map> resps; call_rpcs_sync(detect_hotkey_rpcs, resps); resp = resps.begin()->second.get_value(); return resps.begin()->second.get_error().code(); @@ -1694,16 +1733,16 @@ replication_ddl_client::query_partition_split(const std::string &app_name) return call_rpc_sync(query_split_rpc(std::move(req), RPC_CM_QUERY_PARTITION_SPLIT)); } -error_with -replication_ddl_client::add_new_disk(const rpc_address &target_node, const std::string &disk_str) +error_with replication_ddl_client::add_new_disk(const host_port &target_node, + const std::string &disk_str) { auto req = std::make_unique(); req->disk_str = disk_str; - std::map add_new_disk_rpcs; + std::map add_new_disk_rpcs; add_new_disk_rpcs.emplace(target_node, add_new_disk_rpc(std::move(req), RPC_ADD_NEW_DISK)); - std::map> resps; + std::map> resps; call_rpcs_sync(add_new_disk_rpcs, resps); return resps.begin()->second.get_value(); } diff --git a/src/client/replication_ddl_client.h b/src/client/replication_ddl_client.h index 7a32dc8510..61ea6b8e88 100644 --- a/src/client/replication_ddl_client.h +++ b/src/client/replication_ddl_client.h @@ -43,8 +43,9 @@ #include "meta_admin_types.h" #include "partition_split_types.h" #include "replica_admin_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_holder.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" @@ -73,7 +74,7 @@ class start_backup_app_response; class replication_ddl_client { public: - replication_ddl_client(const std::vector &meta_servers); + replication_ddl_client(const std::vector &meta_servers); ~replication_ddl_client(); dsn::error_code create_app(const std::string &app_name, @@ -109,7 +110,7 @@ class replication_ddl_client dsn::error_code list_nodes(const dsn::replication::node_status::type status, - std::map &nodes); + std::map &nodes); dsn::error_code cluster_name(int64_t timeout_ms, std::string &cluster_name); @@ -134,7 +135,7 @@ class replication_ddl_client dsn::error_code wait_app_ready(const std::string &app_name, int partition_count, int max_replica_count); - dsn::error_code do_recovery(const std::vector &replica_nodes, + dsn::error_code do_recovery(const std::vector &replica_nodes, int wait_seconds, bool skip_bad_nodes, bool skip_lost_partitions, @@ -206,10 +207,10 @@ class replication_ddl_client dsn::error_code ddd_diagnose(gpid pid, std::vector &ddd_partitions); - void query_disk_info( - const std::vector &targets, - const std::string &app_name, - /*out*/ std::map> &resps); + void + query_disk_info(const std::vector &targets, + const std::string &app_name, + /*out*/ std::map> &resps); error_with start_bulk_load(const std::string &app_name, const std::string &cluster_name, @@ -224,7 +225,7 @@ class replication_ddl_client error_with clear_bulk_load(const std::string &app_name); - error_code detect_hotkey(const dsn::rpc_address &target, + error_code detect_hotkey(const dsn::host_port &target, detect_hotkey_request &req, detect_hotkey_response &resp); @@ -245,7 +246,7 @@ class replication_ddl_client error_with query_partition_split(const std::string &app_name); - error_with add_new_disk(const rpc_address &target_node, + error_with add_new_disk(const host_port &target_node, const std::string &disk_str); error_with @@ -264,6 +265,7 @@ class replication_ddl_client set_max_replica_count(const std::string &app_name, int32_t max_replica_count); void set_max_wait_app_ready_secs(uint32_t max_wait_secs) { _max_wait_secs = max_wait_secs; } + void set_meta_servers_leader(); private: bool static valid_app_char(int c); @@ -285,7 +287,7 @@ class replication_ddl_client auto task = dsn::rpc::create_rpc_response_task(msg, nullptr, empty_rpc_handler, reply_thread_hash); - rpc::call(_meta_server, + rpc::call(_dns_resolver->resolve_address(_meta_server), msg, &_tracker, [this, task]( @@ -370,7 +372,7 @@ class replication_ddl_client static constexpr int MAX_RETRY = 2; error_code err = ERR_UNKNOWN; for (int retry = 0; retry < MAX_RETRY; retry++) { - task_ptr task = rpc.call(_meta_server, + task_ptr task = rpc.call(_dns_resolver->resolve_address(_meta_server), &_tracker, [&err](error_code code) { err = code; }, reply_thread_hash); @@ -387,31 +389,32 @@ class replication_ddl_client /// Send request to multi replica server synchronously. template - void call_rpcs_sync(std::map &rpcs, - std::map> &resps, + void call_rpcs_sync(std::map &rpcs, + std::map> &resps, int reply_thread_hash = 0, bool enable_retry = true) { dsn::task_tracker tracker; error_code err = ERR_UNKNOWN; for (auto &rpc : rpcs) { - rpc.second.call( - rpc.first, &tracker, [&err, &resps, &rpcs, &rpc](error_code code) mutable { - err = code; - if (err == dsn::ERR_OK) { - resps.emplace(rpc.first, std::move(rpc.second.response())); - rpcs.erase(rpc.first); - } else { - resps.emplace( - rpc.first, - std::move(error_s::make(err, "unable to send rpc to server"))); - } - }); + rpc.second.call(_dns_resolver->resolve_address(rpc.first), + &tracker, + [&err, &resps, &rpcs, &rpc](error_code code) mutable { + err = code; + if (err == dsn::ERR_OK) { + resps.emplace(rpc.first, std::move(rpc.second.response())); + rpcs.erase(rpc.first); + } else { + resps.emplace(rpc.first, + std::move(error_s::make( + err, "unable to send rpc to server"))); + } + }); } tracker.wait_outstanding_tasks(); if (enable_retry && rpcs.size() > 0) { - std::map> retry_resps; + std::map> retry_resps; call_rpcs_sync(rpcs, retry_resps, reply_thread_hash, false); for (auto &resp : retry_resps) { resps.emplace(resp.first, std::move(resp.second)); @@ -420,7 +423,7 @@ class replication_ddl_client } private: - dsn::rpc_address _meta_server; + dsn::host_port _meta_server; dsn::task_tracker _tracker; uint32_t _max_wait_secs = 3600; // Wait at most 1 hour by default. @@ -439,6 +442,8 @@ class replication_ddl_client } std::deque _mock_errors; + std::unique_ptr _dns_resolver; + typedef rpc_holder detect_hotkey_rpc; typedef rpc_holder query_disk_info_rpc; typedef rpc_holder add_new_disk_rpc; diff --git a/src/client/test/ddl_client_test.cpp b/src/client/test/ddl_client_test.cpp index 1240caf9b5..7662c6bb36 100644 --- a/src/client/test/ddl_client_test.cpp +++ b/src/client/test/ddl_client_test.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "client/replication_ddl_client.h" @@ -26,7 +27,7 @@ #include "gtest/gtest.h" #include "meta_admin_types.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task.h" #include "utils/autoref_ptr.h" #include "utils/error_code.h" @@ -103,7 +104,7 @@ TEST(DDLClientTest, RetryMetaRequest) dsn::ERR_BUSY_CREATING}, }; - std::vector meta_list = {{"127.0.0.1", 34601}}; + std::vector meta_list = {host_port("localhost", 34601)}; auto req = std::make_shared(); for (const auto &test : tests) { fail::setup(); diff --git a/src/client_lib/pegasus_client_factory_impl.cpp b/src/client_lib/pegasus_client_factory_impl.cpp index a590e52553..f84d8ce0b5 100644 --- a/src/client_lib/pegasus_client_factory_impl.cpp +++ b/src/client_lib/pegasus_client_factory_impl.cpp @@ -63,11 +63,11 @@ pegasus_client *pegasus_client_factory_impl::get_client(const char *cluster_name const char *app_name) { if (cluster_name == nullptr || cluster_name[0] == '\0') { - LOG_ERROR("invalid parameter 'cluster_name'"); + LOG_ERROR("invalid parameter 'cluster_name' {}", cluster_name); return nullptr; } if (app_name == nullptr || app_name[0] == '\0') { - LOG_ERROR("invalid parameter 'app_name'"); + LOG_ERROR("invalid parameter 'app_name' {}", app_name); return nullptr; } diff --git a/src/client_lib/pegasus_client_impl.cpp b/src/client_lib/pegasus_client_impl.cpp index 177477beb4..af760e5ae3 100644 --- a/src/client_lib/pegasus_client_impl.cpp +++ b/src/client_lib/pegasus_client_impl.cpp @@ -34,7 +34,8 @@ #include "pegasus_key_schema.h" #include "pegasus_utils.h" #include "rrdb/rrdb.client.h" -#include "runtime/rpc/group_address.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/group_host_port.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" #include "runtime/task/task_code.h" @@ -61,16 +62,16 @@ std::unordered_map pegasus_client_impl::_client_error_to_strin std::unordered_map pegasus_client_impl::_server_error_to_client; pegasus_client_impl::pegasus_client_impl(const char *cluster_name, const char *app_name) - : _cluster_name(cluster_name), _app_name(app_name) + : _cluster_name(cluster_name), _app_name(app_name), _dns_resolver(new dsn::dns_resolver()) { - std::vector meta_servers; + std::vector meta_servers; dsn::replication::replica_helper::load_meta_servers( meta_servers, PEGASUS_CLUSTER_SECTION_NAME.c_str(), cluster_name); CHECK_GT(meta_servers.size(), 0); _meta_server.assign_group("meta-servers"); - _meta_server.group_address()->add_list(meta_servers); + _meta_server.group_host_port()->add_list(meta_servers); - _client = new ::dsn::apps::rrdb_client(cluster_name, meta_servers, app_name); + _client = new ::dsn::apps::rrdb_client(cluster_name, meta_servers, app_name, _dns_resolver); } pegasus_client_impl::~pegasus_client_impl() { delete _client; } @@ -1254,7 +1255,7 @@ void pegasus_client_impl::async_get_unordered_scanners( query_cfg_request req; req.app_name = _app_name; - ::dsn::rpc::call(_meta_server, + ::dsn::rpc::call(_dns_resolver->resolve_address(_meta_server), RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX, req, nullptr, diff --git a/src/client_lib/pegasus_client_impl.h b/src/client_lib/pegasus_client_impl.h index 52c0787c7c..20e05716d2 100644 --- a/src/client_lib/pegasus_client_impl.h +++ b/src/client_lib/pegasus_client_impl.h @@ -32,11 +32,12 @@ #include #include "rrdb/rrdb_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/blob.h" #include "utils/zlocks.h" namespace dsn { +class dns_resolver; class error_code; class message_ex; class task_tracker; @@ -364,7 +365,7 @@ class pegasus_client_impl : public pegasus_client private: std::string _cluster_name; std::string _app_name; - ::dsn::rpc_address _meta_server; + ::dsn::host_port _meta_server; ::dsn::apps::rrdb_client *_client; /// @@ -379,6 +380,8 @@ class pegasus_client_impl : public pegasus_client /// the map is initialized in init_error() which will be called on client lib initailization. /// static std::unordered_map _server_error_to_client; + + std::shared_ptr _dns_resolver; }; } // namespace client } // namespace pegasus diff --git a/src/common/consensus.thrift b/src/common/consensus.thrift index ec862b3364..48cd9b0445 100644 --- a/src/common/consensus.thrift +++ b/src/common/consensus.thrift @@ -143,6 +143,7 @@ struct learn_request // be duplicated (ie. max_gced_decree < confirmed_decree), if not, // learnee will copy the missing logs. 7:optional i64 max_gced_decree; + 8:optional dsn.host_port hp_learner; } struct learn_response @@ -156,6 +157,7 @@ struct learn_response 7:dsn.rpc_address address; // learnee's address 8:string base_local_dir; // base dir of files on learnee 9:optional string replica_disk_tag; // the disk tag of learnee located + 10:optional dsn.host_port hp_address; // learnee's address } struct learn_notify_response @@ -180,6 +182,7 @@ struct group_check_request // Used to deliver child gpid and meta_split_status during partition split 6:optional dsn.gpid child_gpid; 7:optional metadata.split_status meta_split_status; + 8:optional dsn.host_port hp_node; } struct group_check_response @@ -195,5 +198,6 @@ struct group_check_response // if secondary pause or cancel split succeed, is_split_stopped = true 8:optional bool is_split_stopped; 9:optional metadata.disk_status disk_status = metadata.disk_status.NORMAL; + 10:optional dsn.host_port hp_node; } diff --git a/src/common/fs_manager.cpp b/src/common/fs_manager.cpp index 88b1da6603..775f9b816e 100644 --- a/src/common/fs_manager.cpp +++ b/src/common/fs_manager.cpp @@ -269,7 +269,7 @@ void fs_manager::add_replica(const gpid &pid, const std::string &pid_dir) const auto &dn = get_dir_node(pid_dir); if (dsn_unlikely(nullptr == dn)) { LOG_ERROR( - "{}: dir({}) of gpid({}) haven't registered", dsn_primary_address(), pid_dir, pid); + "{}: dir({}) of gpid({}) haven't registered", dsn_primary_host_port(), pid_dir, pid); return; } @@ -281,11 +281,11 @@ void fs_manager::add_replica(const gpid &pid, const std::string &pid_dir) } if (!emplace_success) { LOG_WARNING( - "{}: gpid({}) already in the dir_node({})", dsn_primary_address(), pid, dn->tag); + "{}: gpid({}) already in the dir_node({})", dsn_primary_host_port(), pid, dn->tag); return; } - LOG_INFO("{}: add gpid({}) to dir_node({})", dsn_primary_address(), pid, dn->tag); + LOG_INFO("{}: add gpid({}) to dir_node({})", dsn_primary_host_port(), pid, dn->tag); } dir_node *fs_manager::find_best_dir_for_new_replica(const gpid &pid) const @@ -319,7 +319,7 @@ dir_node *fs_manager::find_best_dir_for_new_replica(const gpid &pid) const if (selected != nullptr) { LOG_INFO( "{}: put pid({}) to dir({}), which has {} replicas of current app, {} replicas totally", - dsn_primary_address(), + dsn_primary_host_port(), pid, selected->tag, least_app_replicas_count, @@ -359,7 +359,7 @@ void fs_manager::remove_replica(const gpid &pid) pid, dn->tag); if (r != 0) { - LOG_INFO("{}: remove gpid({}) from dir({})", dsn_primary_address(), pid, dn->tag); + LOG_INFO("{}: remove gpid({}) from dir({})", dsn_primary_host_port(), pid, dn->tag); } remove_count += r; } diff --git a/src/common/json_helper.h b/src/common/json_helper.h index 291909bc84..8e282a5994 100644 --- a/src/common/json_helper.h +++ b/src/common/json_helper.h @@ -712,7 +712,10 @@ NON_MEMBER_JSON_SERIALIZATION(dsn::partition_configuration, secondaries, last_drops, last_committed_decree, - partition_flags) + partition_flags, + hp_primary, + hp_secondaries, + hp_last_drops) NON_MEMBER_JSON_SERIALIZATION(dsn::app_info, status, diff --git a/src/common/replication_common.cpp b/src/common/replication_common.cpp index 1585aefe54..6c0a3e5d3b 100644 --- a/src/common/replication_common.cpp +++ b/src/common/replication_common.cpp @@ -32,12 +32,15 @@ #include #include #include +#include +#include #include "common/gpid.h" #include "common/replica_envs.h" #include "common/replication_other_types.h" #include "dsn.layer2_types.h" #include "fmt/core.h" +#include "runtime/rpc/rpc_address.h" #include "runtime/service_app.h" #include "utils/config_api.h" #include "utils/filesystem.h" @@ -161,33 +164,22 @@ int32_t replication_options::app_mutation_2pc_min_replica_count(int32_t app_max_ } } -/*static*/ bool replica_helper::remove_node(::dsn::rpc_address node, - /*inout*/ std::vector<::dsn::rpc_address> &nodeList) -{ - auto it = std::find(nodeList.begin(), nodeList.end(), node); - if (it != nodeList.end()) { - nodeList.erase(it); - return true; - } else { - return false; - } -} - /*static*/ bool replica_helper::get_replica_config(const partition_configuration &partition_config, - ::dsn::rpc_address node, + ::dsn::host_port node, /*out*/ replica_configuration &replica_config) { replica_config.pid = partition_config.pid; replica_config.primary = partition_config.primary; replica_config.ballot = partition_config.ballot; replica_config.learner_signature = invalid_signature; + replica_config.__set_hp_primary(partition_config.hp_primary); - if (node == partition_config.primary) { + if (node == partition_config.hp_primary) { replica_config.status = partition_status::PS_PRIMARY; return true; - } else if (std::find(partition_config.secondaries.begin(), - partition_config.secondaries.end(), - node) != partition_config.secondaries.end()) { + } else if (std::find(partition_config.hp_secondaries.begin(), + partition_config.hp_secondaries.end(), + node) != partition_config.hp_secondaries.end()) { replica_config.status = partition_status::PS_SECONDARY; return true; } else { @@ -196,7 +188,7 @@ int32_t replication_options::app_mutation_2pc_min_replica_count(int32_t app_max_ } } -bool replica_helper::load_meta_servers(/*out*/ std::vector &servers, +bool replica_helper::load_meta_servers(/*out*/ std::vector &servers, const char *section, const char *key) { @@ -204,8 +196,9 @@ bool replica_helper::load_meta_servers(/*out*/ std::vector &se std::string server_list = dsn_config_get_value_string(section, key, "", ""); std::vector lv; ::dsn::utils::split_args(server_list.c_str(), lv, ','); + std::unordered_set server_host_ports; for (auto &s : lv) { - ::dsn::rpc_address addr; + ::dsn::host_port hp; std::vector hostname_port; uint32_t ip = 0; utils::split_args(s.c_str(), hostname_port, ':'); @@ -221,19 +214,20 @@ bool replica_helper::load_meta_servers(/*out*/ std::vector &se s, section, key); - if (0 != (ip = ::dsn::rpc_address::ipv4_from_host(hostname_port[0].c_str()))) { - addr.assign_ipv4(ip, static_cast(port_num)); - } else if (!addr.from_string_ipv4(s.c_str())) { - LOG_ERROR("invalid address '{}' specified in config [{}].{}", s, section, key); - return false; + hp = host_port(hostname_port[0], static_cast(port_num)); + if (server_host_ports.insert(hp).second) { + servers.push_back(hp); } - // TODO(yingchun): check there is no duplicates - servers.push_back(addr); } if (servers.empty()) { LOG_ERROR("no meta server specified in config [{}].{}", section, key); return false; } + + if (servers.size() != lv.size()) { + LOG_ERROR("server_list {} have duplicate server", server_list); + return false; + } return true; } diff --git a/src/common/replication_common.h b/src/common/replication_common.h index 3771b2c249..0072844630 100644 --- a/src/common/replication_common.h +++ b/src/common/replication_common.h @@ -32,7 +32,7 @@ #include #include "metadata_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_holder.h" #include "runtime/task/task.h" @@ -45,8 +45,8 @@ class query_app_info_response; class query_replica_info_request; class query_replica_info_response; -typedef std::unordered_map<::dsn::rpc_address, partition_status::type> node_statuses; -typedef std::unordered_map<::dsn::rpc_address, dsn::task_ptr> node_tasks; +typedef std::unordered_map<::dsn::host_port, partition_status::type> node_statuses; +typedef std::unordered_map<::dsn::host_port, dsn::task_ptr> node_tasks; typedef rpc_holder update_app_env_rpc; @@ -56,7 +56,7 @@ typedef rpc_holder quer class replication_options { public: - std::vector<::dsn::rpc_address> meta_servers; + std::vector<::dsn::host_port> meta_servers; std::string app_name; std::string app_dir; diff --git a/src/common/replication_other_types.h b/src/common/replication_other_types.h index b5f62205c3..6f80cbbd93 100644 --- a/src/common/replication_other_types.h +++ b/src/common/replication_other_types.h @@ -36,6 +36,8 @@ #include "consensus_types.h" #include "replica_admin_types.h" #include "common/replication_enums.h" +#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" namespace dsn { namespace replication { @@ -49,16 +51,17 @@ typedef int64_t decree; #define invalid_offset (-1LL) #define invalid_signature 0 -inline bool is_primary(const partition_configuration &pc, const rpc_address &node) +inline bool is_primary(const partition_configuration &pc, const host_port &node) { - return !node.is_invalid() && pc.primary == node; + return !node.is_invalid() && pc.hp_primary == node; } -inline bool is_secondary(const partition_configuration &pc, const rpc_address &node) +inline bool is_secondary(const partition_configuration &pc, const host_port &node) { return !node.is_invalid() && - std::find(pc.secondaries.begin(), pc.secondaries.end(), node) != pc.secondaries.end(); + std::find(pc.hp_secondaries.begin(), pc.hp_secondaries.end(), node) != + pc.hp_secondaries.end(); } -inline bool is_member(const partition_configuration &pc, const rpc_address &node) +inline bool is_member(const partition_configuration &pc, const host_port &node) { return is_primary(pc, node) || is_secondary(pc, node); } @@ -66,26 +69,37 @@ inline bool is_partition_config_equal(const partition_configuration &pc1, const partition_configuration &pc2) { // secondaries no need to be same order - for (const rpc_address &addr : pc1.secondaries) + for (const host_port &addr : pc1.hp_secondaries) if (!is_secondary(pc2, addr)) return false; // last_drops is not considered into equality check return pc1.ballot == pc2.ballot && pc1.pid == pc2.pid && pc1.max_replica_count == pc2.max_replica_count && pc1.primary == pc2.primary && - pc1.secondaries.size() == pc2.secondaries.size() && + pc1.hp_primary == pc2.hp_primary && pc1.secondaries.size() == pc2.secondaries.size() && + pc1.hp_secondaries.size() == pc2.hp_secondaries.size() && pc1.last_committed_decree == pc2.last_committed_decree; } class replica_helper { public: - static bool remove_node(::dsn::rpc_address node, - /*inout*/ std::vector<::dsn::rpc_address> &nodeList); + template + static bool remove_node(const T node, + /*inout*/ std::vector &nodes) + { + auto it = std::find(nodes.begin(), nodes.end(), node); + if (it != nodes.end()) { + nodes.erase(it); + return true; + } else { + return false; + } + } static bool get_replica_config(const partition_configuration &partition_config, - ::dsn::rpc_address node, + ::dsn::host_port node, /*out*/ replica_configuration &replica_config); // true if meta_list's value of config is valid, otherwise return false - static bool load_meta_servers(/*out*/ std::vector &servers, + static bool load_meta_servers(/*out*/ std::vector &servers, const char *section = "meta_server", const char *key = "server_list"); }; diff --git a/src/failure_detector/failure_detector.cpp b/src/failure_detector/failure_detector.cpp index af264d21f1..e6993f22b5 100644 --- a/src/failure_detector/failure_detector.cpp +++ b/src/failure_detector/failure_detector.cpp @@ -37,6 +37,8 @@ #include "failure_detector/fd.code.definition.h" #include "fd_types.h" #include "runtime/api_layer1.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/rpc_address.h" #include "runtime/serverlet.h" #include "runtime/task/async_calls.h" #include "runtime/task/task_spec.h" @@ -53,7 +55,8 @@ METRIC_DEFINE_counter(server, namespace dsn { namespace fd { -failure_detector::failure_detector() : METRIC_VAR_INIT_server(beacon_failed_count) +failure_detector::failure_detector(const std::shared_ptr &resolver) + : METRIC_VAR_INIT_server(beacon_failed_count), _dns_resolver(resolver) { dsn::threadpool_code pool = task_spec::get(LPC_BEACON_CHECK.code())->pool_code; task_spec::get(RPC_FD_FAILURE_DETECTOR_PING.code())->pool_code = pool; @@ -119,7 +122,7 @@ void failure_detector::stop() _workers.clear(); } -void failure_detector::register_master(::dsn::rpc_address target) +void failure_detector::register_master(::dsn::host_port target) { bool setup_timer = false; @@ -153,8 +156,8 @@ void failure_detector::register_master(::dsn::rpc_address target) } } -bool failure_detector::switch_master(::dsn::rpc_address from, - ::dsn::rpc_address to, +bool failure_detector::switch_master(::dsn::host_port from, + ::dsn::host_port to, uint32_t delay_milliseconds) { /* the caller of switch master shoud lock necessarily to protect _masters */ @@ -192,7 +195,7 @@ bool failure_detector::switch_master(::dsn::rpc_address from, bool failure_detector::is_time_greater_than(uint64_t ts, uint64_t base) { return ts > base; } -void failure_detector::report(::dsn::rpc_address node, bool is_master, bool is_connected) +void failure_detector::report(::dsn::host_port node, bool is_master, bool is_connected) { LOG_INFO( "{} {}connected: {}", is_master ? "master" : "worker", is_connected ? "" : "dis", node); @@ -216,7 +219,7 @@ void failure_detector::check_all_records() return; } - std::vector expire; + std::vector expire; { zauto_lock l(_lock); @@ -302,28 +305,28 @@ void failure_detector::check_all_records() } } -void failure_detector::add_allow_list(::dsn::rpc_address node) +void failure_detector::add_allow_list(::dsn::host_port node) { zauto_lock l(_lock); _allow_list.insert(node); } -bool failure_detector::remove_from_allow_list(::dsn::rpc_address node) +bool failure_detector::remove_from_allow_list(::dsn::host_port node) { zauto_lock l(_lock); return _allow_list.erase(node) > 0; } -void failure_detector::set_allow_list(const std::vector &replica_addrs) +void failure_detector::set_allow_list(const std::vector &replica_hps) { CHECK(!_is_started, "FD is already started, the allow list should really not be modified"); - std::vector nodes; - for (auto &addr : replica_addrs) { - rpc_address node; - if (!node.from_string_ipv4(addr.c_str())) { + std::vector nodes; + for (auto &hp : replica_hps) { + host_port node; + if (!node.from_string(hp.c_str())) { LOG_WARNING("replica_white_list has invalid ip {}, the allow list won't be modified", - addr); + hp); return; } nodes.push_back(node); @@ -351,33 +354,38 @@ std::string failure_detector::get_allow_list(const std::vector &arg void failure_detector::on_ping_internal(const beacon_msg &beacon, /*out*/ beacon_ack &ack) { + host_port hp_from_addr, hp_to_addr; + GET_HOST_PORT(beacon, from_addr, hp_from_addr); + GET_HOST_PORT(beacon, to_addr, hp_to_addr); + ack.time = beacon.time; ack.this_node = beacon.to_addr; + ack.__set_hp_this_node(hp_to_addr); ack.primary_node = dsn_primary_address(); + ack.__set_hp_primary_node(dsn_primary_host_port()); ack.is_master = true; ack.allowed = true; zauto_lock l(_lock); uint64_t now = dsn_now_ms(); - auto node = beacon.from_addr; - worker_map::iterator itr = _workers.find(node); + worker_map::iterator itr = _workers.find(hp_from_addr); if (itr == _workers.end()) { // if is a new worker, check allow list first if need - if (_use_allow_list && _allow_list.find(node) == _allow_list.end()) { - LOG_WARNING("new worker[{}] is rejected", node); + if (_use_allow_list && _allow_list.find(hp_from_addr) == _allow_list.end()) { + LOG_WARNING("new worker[{}] is rejected", hp_from_addr); ack.allowed = false; return; } // create new entry for node - worker_record record(node, now); + worker_record record(hp_from_addr, now); record.is_alive = true; - _workers.insert(std::make_pair(node, record)); + _workers.insert(std::make_pair(hp_from_addr, record)); - report(node, false, true); - on_worker_connected(node); + report(hp_from_addr, false, true); + on_worker_connected(hp_from_addr); } else if (is_time_greater_than(now, itr->second.last_beacon_recv_time)) { // update last_beacon_recv_time itr->second.last_beacon_recv_time = now; @@ -389,8 +397,8 @@ void failure_detector::on_ping_internal(const beacon_msg &beacon, /*out*/ beacon if (itr->second.is_alive == false) { itr->second.is_alive = true; - report(node, false, true); - on_worker_connected(node); + report(hp_from_addr, false, true); + on_worker_connected(hp_from_addr); } } else { LOG_INFO("now[{}] <= last_recv_time[{}]", now, itr->second.last_beacon_recv_time); @@ -414,33 +422,40 @@ bool failure_detector::end_ping_internal(::dsn::error_code err, const beacon_ack /* * the caller of the end_ping_internal should lock necessarily!!! */ + host_port hp_this_node, hp_primary_node; + GET_HOST_PORT(ack, this_node, hp_this_node); + GET_HOST_PORT(ack, primary_node, hp_primary_node); + uint64_t beacon_send_time = ack.time; - auto node = ack.this_node; if (err != ERR_OK) { LOG_WARNING("ping master({}) failed, timeout_ms = {}, err = {}", - node, + hp_this_node, _beacon_timeout_milliseconds, err); METRIC_VAR_INCREMENT(beacon_failed_count); } - master_map::iterator itr = _masters.find(node); + master_map::iterator itr = _masters.find(hp_this_node); if (itr == _masters.end()) { LOG_WARNING("received beacon ack without corresponding master, ignore it, " - "remote_master[{}], local_worker[{}]", - node, + "remote_master[{}({})], local_worker[{}({})]", + hp_this_node, + ack.this_node, + dsn_primary_host_port(), dsn_primary_address()); return false; } master_record &record = itr->second; if (!ack.allowed) { - LOG_WARNING( - "worker rejected, stop sending beacon message, remote_master[{}], local_worker[{}]", - node, - dsn_primary_address()); + LOG_WARNING("worker rejected, stop sending beacon message, remote_master[{}({})], " + "local_worker[{}({})]", + hp_this_node, + ack.this_node, + dsn_primary_host_port(), + dsn_primary_address()); record.rejected = true; record.send_beacon_timer->cancel(true); return false; @@ -461,8 +476,11 @@ bool failure_detector::end_ping_internal(::dsn::error_code err, const beacon_ack // if ack is not from master meta, worker should not update its last send time if (!ack.is_master) { - LOG_WARNING( - "node[{}] is not master, ack.primary_node[{}] is real master", node, ack.primary_node); + LOG_WARNING("node[{}({})] is not master, ack.primary_node[{}({})] is real master", + hp_this_node, + ack.this_node, + hp_primary_node, + ack.primary_node); return true; } @@ -479,15 +497,15 @@ bool failure_detector::end_ping_internal(::dsn::error_code err, const beacon_ack if (!record.is_alive && is_time_greater_than(now, record.last_send_time_for_beacon_with_ack) && now - record.last_send_time_for_beacon_with_ack <= _lease_milliseconds) { // report master connected - report(node, true, true); + report(hp_this_node, true, true); itr->second.is_alive = true; - on_master_connected(node); + on_master_connected(hp_this_node); } return true; } -bool failure_detector::unregister_master(::dsn::rpc_address node) +bool failure_detector::unregister_master(::dsn::host_port node) { zauto_lock l(_lock); auto it = _masters.find(node); @@ -503,7 +521,7 @@ bool failure_detector::unregister_master(::dsn::rpc_address node) } } -bool failure_detector::is_master_connected(::dsn::rpc_address node) const +bool failure_detector::is_master_connected(::dsn::host_port node) const { zauto_lock l(_lock); auto it = _masters.find(node); @@ -513,7 +531,7 @@ bool failure_detector::is_master_connected(::dsn::rpc_address node) const return false; } -void failure_detector::register_worker(::dsn::rpc_address target, bool is_connected) +void failure_detector::register_worker(::dsn::host_port target, bool is_connected) { /* * callers should use the fd::_lock necessarily @@ -529,7 +547,7 @@ void failure_detector::register_worker(::dsn::rpc_address target, bool is_connec } } -bool failure_detector::unregister_worker(::dsn::rpc_address node) +bool failure_detector::unregister_worker(::dsn::host_port node) { /* * callers should use the fd::_lock necessarily @@ -555,7 +573,7 @@ void failure_detector::clear_workers() _workers.clear(); } -bool failure_detector::is_worker_connected(::dsn::rpc_address node) const +bool failure_detector::is_worker_connected(::dsn::host_port node) const { zauto_lock l(_lock); auto it = _workers.find(node); @@ -565,18 +583,25 @@ bool failure_detector::is_worker_connected(::dsn::rpc_address node) const return false; } -void failure_detector::send_beacon(::dsn::rpc_address target, uint64_t time) +void failure_detector::send_beacon(::dsn::host_port target, uint64_t time) { + auto addr = _dns_resolver->resolve_address(target); beacon_msg beacon; beacon.time = time; beacon.from_addr = dsn_primary_address(); - beacon.to_addr = target; + beacon.__set_hp_from_addr(dsn_primary_host_port()); + beacon.to_addr = addr; + beacon.__set_hp_to_addr(target); beacon.__set_start_time(static_cast(dsn::utils::process_start_millis())); - LOG_INFO( - "send ping message, from[{}], to[{}], time[{}]", beacon.from_addr, beacon.to_addr, time); + LOG_INFO("send ping message, from[{}({})], to[{}({})], time[{}]", + beacon.hp_from_addr, + beacon.from_addr, + beacon.hp_to_addr, + beacon.to_addr, + time); - ::dsn::rpc::call(target, + ::dsn::rpc::call(addr, RPC_FD_FAILURE_DETECTOR_PING, beacon, &_tracker, @@ -586,6 +611,8 @@ void failure_detector::send_beacon(::dsn::rpc_address target, uint64_t time) ack.time = beacon.time; ack.this_node = beacon.to_addr; ack.primary_node.set_invalid(); + ack.__set_hp_this_node(beacon.hp_to_addr); + ack.__set_hp_primary_node(host_port()); ack.is_master = false; ack.allowed = true; end_ping(err, ack, nullptr); diff --git a/src/failure_detector/failure_detector.h b/src/failure_detector/failure_detector.h index b1c896360f..2c8d98723a 100644 --- a/src/failure_detector/failure_detector.h +++ b/src/failure_detector/failure_detector.h @@ -35,7 +35,7 @@ #include "failure_detector/fd.client.h" #include "failure_detector/fd.server.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task.h" #include "runtime/task/task_code.h" #include "runtime/task/task_tracker.h" @@ -46,6 +46,7 @@ namespace dsn { class command_deregister; +class dns_resolver; template class rpc_replier; @@ -63,12 +64,12 @@ class failure_detector_callback virtual ~failure_detector_callback() {} // worker side - virtual void on_master_disconnected(const std::vector<::dsn::rpc_address> &nodes) = 0; - virtual void on_master_connected(::dsn::rpc_address node) = 0; + virtual void on_master_disconnected(const std::vector<::dsn::host_port> &nodes) = 0; + virtual void on_master_connected(::dsn::host_port node) = 0; // master side - virtual void on_worker_disconnected(const std::vector<::dsn::rpc_address> &nodes) = 0; - virtual void on_worker_connected(::dsn::rpc_address node) = 0; + virtual void on_worker_disconnected(const std::vector<::dsn::host_port> &nodes) = 0; + virtual void on_worker_connected(::dsn::host_port node) = 0; }; // The interface for a perfect failure detector. @@ -99,7 +100,7 @@ class failure_detector : public failure_detector_service, public failure_detector_callback { public: - failure_detector(); + failure_detector(const std::shared_ptr &resolver); virtual ~failure_detector(); virtual void on_ping(const beacon_msg &beacon, ::dsn::rpc_replier &reply); @@ -121,32 +122,32 @@ class failure_detector : public failure_detector_service, uint32_t get_lease_ms() const { return _lease_milliseconds; } uint32_t get_grace_ms() const { return _grace_milliseconds; } - void register_master(::dsn::rpc_address target); + void register_master(::dsn::host_port target); - bool switch_master(::dsn::rpc_address from, ::dsn::rpc_address to, uint32_t delay_milliseconds); + bool switch_master(::dsn::host_port from, ::dsn::host_port to, uint32_t delay_milliseconds); - bool unregister_master(::dsn::rpc_address node); + bool unregister_master(::dsn::host_port node); - virtual bool is_master_connected(::dsn::rpc_address node) const; + virtual bool is_master_connected(::dsn::host_port node) const; // ATTENTION: be very careful to set is_connected to false as // workers are always considered *connected* initially which is ok even when workers think // master is disconnected // Considering workers *disconnected* initially is *dangerous* coz it may violate the invariance // when workers think they are online - void register_worker(::dsn::rpc_address node, bool is_connected = true); + void register_worker(::dsn::host_port node, bool is_connected = true); - bool unregister_worker(::dsn::rpc_address node); + bool unregister_worker(::dsn::host_port node); void clear_workers(); - virtual bool is_worker_connected(::dsn::rpc_address node) const; + virtual bool is_worker_connected(::dsn::host_port node) const; - void add_allow_list(::dsn::rpc_address node); + void add_allow_list(::dsn::host_port node); - bool remove_from_allow_list(::dsn::rpc_address node); + bool remove_from_allow_list(::dsn::host_port node); - void set_allow_list(const std::vector &replica_addrs); + void set_allow_list(const std::vector &replica_hps); std::string get_allow_list(const std::vector &args) const; @@ -154,6 +155,8 @@ class failure_detector : public failure_detector_service, int master_count() const { return static_cast(_masters.size()); } + std::shared_ptr get_dns_resolver() const { return _dns_resolver; } + protected: void on_ping_internal(const beacon_msg &beacon, /*out*/ beacon_ack &ack); @@ -162,7 +165,7 @@ class failure_detector : public failure_detector_service, bool is_time_greater_than(uint64_t ts, uint64_t base); - void report(::dsn::rpc_address node, bool is_master, bool is_connected); + void report(::dsn::host_port node, bool is_master, bool is_connected); private: void check_all_records(); @@ -171,7 +174,7 @@ class failure_detector : public failure_detector_service, class master_record { public: - ::dsn::rpc_address node; + ::dsn::host_port node; uint64_t last_send_time_for_beacon_with_ack; bool is_alive; bool rejected; @@ -179,7 +182,7 @@ class failure_detector : public failure_detector_service, // masters are always considered *disconnected* initially which is ok even when master // thinks workers are connected - master_record(::dsn::rpc_address n, uint64_t last_send_time_for_beacon_with_ack_) + master_record(::dsn::host_port n, uint64_t last_send_time_for_beacon_with_ack_) { node = n; last_send_time_for_beacon_with_ack = last_send_time_for_beacon_with_ack_; @@ -191,13 +194,13 @@ class failure_detector : public failure_detector_service, class worker_record { public: - ::dsn::rpc_address node; + ::dsn::host_port node; uint64_t last_beacon_recv_time; bool is_alive; // workers are always considered *connected* initially which is ok even when workers think // master is disconnected - worker_record(::dsn::rpc_address node, uint64_t last_beacon_recv_time) + worker_record(::dsn::host_port node, uint64_t last_beacon_recv_time) { this->node = node; this->last_beacon_recv_time = last_beacon_recv_time; @@ -206,11 +209,11 @@ class failure_detector : public failure_detector_service, }; private: - typedef std::unordered_map<::dsn::rpc_address, master_record> master_map; - typedef std::unordered_map<::dsn::rpc_address, worker_record> worker_map; + typedef std::unordered_map<::dsn::host_port, master_record> master_map; + typedef std::unordered_map<::dsn::host_port, worker_record> worker_map; // allow list are set on machine name (port can vary) - typedef std::unordered_set<::dsn::rpc_address> allow_list; + typedef std::unordered_set<::dsn::host_port> allow_list; master_map _masters; worker_map _workers; @@ -234,8 +237,10 @@ class failure_detector : public failure_detector_service, mutable zlock _lock; dsn::task_tracker _tracker; + std::shared_ptr _dns_resolver; + // subClass can rewrite these method. - virtual void send_beacon(::dsn::rpc_address node, uint64_t time); + virtual void send_beacon(::dsn::host_port node, uint64_t time); }; } } // end namespace diff --git a/src/failure_detector/failure_detector_multimaster.cpp b/src/failure_detector/failure_detector_multimaster.cpp index 66b5d190e4..88c506ebfa 100644 --- a/src/failure_detector/failure_detector_multimaster.cpp +++ b/src/failure_detector/failure_detector_multimaster.cpp @@ -29,92 +29,102 @@ #include "failure_detector/failure_detector_multimaster.h" #include "fd_types.h" -#include "runtime/rpc/group_address.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/error_code.h" #include "utils/rand.h" namespace dsn { +class dns_resolver; + namespace dist { slave_failure_detector_with_multimaster::slave_failure_detector_with_multimaster( - std::vector<::dsn::rpc_address> &meta_servers, + const std::shared_ptr &resolver, + std::vector<::dsn::host_port> &meta_servers, std::function &&master_disconnected_callback, std::function &&master_connected_callback) + : dsn::fd::failure_detector(resolver) { _meta_servers.assign_group("meta-servers"); for (const auto &s : meta_servers) { - if (!_meta_servers.group_address()->add(s)) { + if (!_meta_servers.group_host_port()->add(s)) { LOG_WARNING("duplicate adress {}", s); } } - _meta_servers.group_address()->set_leader( + _meta_servers.group_host_port()->set_leader( meta_servers[rand::next_u32(0, (uint32_t)meta_servers.size() - 1)]); // ATTENTION: here we disable dsn_group_set_update_leader_automatically to avoid // failure detecting logic is affected by rpc failure or rpc forwarding. - _meta_servers.group_address()->set_update_leader_automatically(false); + _meta_servers.group_host_port()->set_update_leader_automatically(false); _master_disconnected_callback = std::move(master_disconnected_callback); _master_connected_callback = std::move(master_connected_callback); } -void slave_failure_detector_with_multimaster::set_leader_for_test(rpc_address meta) +void slave_failure_detector_with_multimaster::set_leader_for_test(host_port meta) { - _meta_servers.group_address()->set_leader(meta); + _meta_servers.group_host_port()->set_leader(meta); } void slave_failure_detector_with_multimaster::end_ping(::dsn::error_code err, const fd::beacon_ack &ack, void *) { - LOG_INFO("end ping result, error[{}], time[{}], ack.this_node[{}], ack.primary_node[{}], " - "ack.is_master[{}], ack.allowed[{}]", - err, - ack.time, - ack.this_node, - ack.primary_node, - ack.is_master ? "true" : "false", - ack.allowed ? "true" : "false"); + host_port hp_this_node, hp_primary_node; + GET_HOST_PORT(ack, this_node, hp_this_node); + GET_HOST_PORT(ack, primary_node, hp_primary_node); + + LOG_INFO( + "end ping result, error[{}], time[{}], ack.this_node[{}({})], ack.primary_node[{}({})], " + "ack.is_master[{}], ack.allowed[{}]", + err, + ack.time, + hp_this_node, + ack.this_node, + hp_primary_node, + ack.primary_node, + ack.is_master ? "true" : "false", + ack.allowed ? "true" : "false"); zauto_lock l(failure_detector::_lock); if (!failure_detector::end_ping_internal(err, ack)) return; - CHECK_EQ(ack.this_node, _meta_servers.group_address()->leader()); + CHECK_EQ(hp_this_node, _meta_servers.group_host_port()->leader()); if (ERR_OK != err) { - rpc_address next = _meta_servers.group_address()->next(ack.this_node); - if (next != ack.this_node) { - _meta_servers.group_address()->set_leader(next); + host_port next = _meta_servers.group_host_port()->next(hp_this_node); + if (next != hp_this_node) { + _meta_servers.group_host_port()->set_leader(next); // do not start next send_beacon() immediately to avoid send rpc too frequently - switch_master(ack.this_node, next, 1000); + switch_master(hp_this_node, next, 1000); } } else { if (ack.is_master) { // do nothing - } else if (ack.primary_node.is_invalid()) { - rpc_address next = _meta_servers.group_address()->next(ack.this_node); - if (next != ack.this_node) { - _meta_servers.group_address()->set_leader(next); + } else if (hp_primary_node.is_invalid()) { + host_port next = _meta_servers.group_host_port()->next(hp_this_node); + if (next != hp_this_node) { + _meta_servers.group_host_port()->set_leader(next); // do not start next send_beacon() immediately to avoid send rpc too frequently - switch_master(ack.this_node, next, 1000); + switch_master(hp_this_node, next, 1000); } } else { - _meta_servers.group_address()->set_leader(ack.primary_node); + _meta_servers.group_host_port()->set_leader(hp_primary_node); // start next send_beacon() immediately because the leader is possibly right. - switch_master(ack.this_node, ack.primary_node, 0); + switch_master(hp_this_node, hp_primary_node, 0); } } } // client side void slave_failure_detector_with_multimaster::on_master_disconnected( - const std::vector<::dsn::rpc_address> &nodes) + const std::vector<::dsn::host_port> &nodes) { bool primary_disconnected = false; - rpc_address leader = _meta_servers.group_address()->leader(); + host_port leader = _meta_servers.group_host_port()->leader(); for (auto it = nodes.begin(); it != nodes.end(); ++it) { if (leader == *it) primary_disconnected = true; @@ -125,13 +135,13 @@ void slave_failure_detector_with_multimaster::on_master_disconnected( } } -void slave_failure_detector_with_multimaster::on_master_connected(::dsn::rpc_address node) +void slave_failure_detector_with_multimaster::on_master_connected(::dsn::host_port node) { /* * well, this is called in on_ping_internal, which is called by rep::end_ping. * So this function is called in the lock context of fd::_lock */ - bool is_primary = (_meta_servers.group_address()->leader() == node); + bool is_primary = (_meta_servers.group_host_port()->leader() == node); if (is_primary) { _master_connected_callback(); } diff --git a/src/failure_detector/failure_detector_multimaster.h b/src/failure_detector/failure_detector_multimaster.h index bfaedfcadf..7f475bbed9 100644 --- a/src/failure_detector/failure_detector_multimaster.h +++ b/src/failure_detector/failure_detector_multimaster.h @@ -27,16 +27,19 @@ #pragma once #include +#include #include #include "failure_detector/failure_detector.h" -#include "runtime/rpc/group_address.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/group_host_port.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/fmt_logging.h" #include "utils/zlocks.h" namespace dsn { +class dns_resolver; class error_code; + namespace fd { class beacon_ack; } // namespace fd @@ -46,7 +49,8 @@ namespace dist { class slave_failure_detector_with_multimaster : public dsn::fd::failure_detector { public: - slave_failure_detector_with_multimaster(std::vector<::dsn::rpc_address> &meta_servers, + slave_failure_detector_with_multimaster(const std::shared_ptr &resolver, + std::vector<::dsn::host_port> &meta_servers, std::function &&master_disconnected_callback, std::function &&master_connected_callback); virtual ~slave_failure_detector_with_multimaster() {} @@ -54,35 +58,35 @@ class slave_failure_detector_with_multimaster : public dsn::fd::failure_detector void end_ping(::dsn::error_code err, const fd::beacon_ack &ack, void *context) override; // client side - void on_master_disconnected(const std::vector<::dsn::rpc_address> &nodes) override; - void on_master_connected(::dsn::rpc_address node) override; + void on_master_disconnected(const std::vector<::dsn::host_port> &nodes) override; + void on_master_connected(::dsn::host_port node) override; // server side - void on_worker_disconnected(const std::vector<::dsn::rpc_address> &nodes) override + void on_worker_disconnected(const std::vector<::dsn::host_port> &nodes) override { CHECK(false, "invalid execution flow"); } - void on_worker_connected(::dsn::rpc_address node) override + void on_worker_connected(::dsn::host_port node) override { CHECK(false, "invalid execution flow"); } - ::dsn::rpc_address current_server_contact() const; - ::dsn::rpc_address get_servers() const { return _meta_servers; } + ::dsn::host_port current_server_contact() const; + host_port get_servers() const { return _meta_servers; } - void set_leader_for_test(dsn::rpc_address meta); + void set_leader_for_test(dsn::host_port meta); private: - dsn::rpc_address _meta_servers; + host_port _meta_servers; std::function _master_disconnected_callback; std::function _master_connected_callback; }; //------------------ inline implementation -------------------------------- -inline ::dsn::rpc_address slave_failure_detector_with_multimaster::current_server_contact() const +inline ::dsn::host_port slave_failure_detector_with_multimaster::current_server_contact() const { zauto_lock l(failure_detector::_lock); - return _meta_servers.group_address()->leader(); + return _meta_servers.group_host_port()->leader(); } } } // end namespace diff --git a/src/failure_detector/fd.thrift b/src/failure_detector/fd.thrift index a85a38a67b..12054106ea 100644 --- a/src/failure_detector/fd.thrift +++ b/src/failure_detector/fd.thrift @@ -30,23 +30,28 @@ namespace cpp dsn.fd struct beacon_msg { - 1: i64 time; - 2: dsn.rpc_address from_addr; - 3: dsn.rpc_address to_addr; - 4: optional i64 start_time; + 1: i64 time; + 2: dsn.rpc_address from_addr; + 3: dsn.rpc_address to_addr; + 4: optional i64 start_time; + 5: optional dsn.host_port hp_from_addr; + 6: optional dsn.host_port hp_to_addr; } struct beacon_ack { - 1: i64 time; - 2: dsn.rpc_address this_node; - 3: dsn.rpc_address primary_node; - 4: bool is_master; - 5: bool allowed; + 1: i64 time; + 2: dsn.rpc_address this_node; + 3: dsn.rpc_address primary_node; + 4: bool is_master; + 5: bool allowed; + 6: optional dsn.host_port hp_this_node; + 7: optional dsn.host_port hp_primary_node; } struct config_master_message { - 1: dsn.rpc_address master; - 2: bool is_register; + 1: dsn.rpc_address master; + 2: bool is_register; + 3: optional dsn.host_port hp_master; } diff --git a/src/failure_detector/test/failure_detector.cpp b/src/failure_detector/test/failure_detector.cpp index 42b3cbb783..0aebe0a878 100644 --- a/src/failure_detector/test/failure_detector.cpp +++ b/src/failure_detector/test/failure_detector.cpp @@ -48,9 +48,11 @@ #include "meta/meta_server_failure_detector.h" #include "replica/replica_stub.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/group_address.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/group_host_port.h" #include "runtime/rpc/network.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/serverlet.h" #include "runtime/service_app.h" @@ -83,11 +85,11 @@ class worker_fd_test : public ::dsn::dist::slave_failure_detector_with_multimast private: volatile bool _send_ping_switch; /* this function only triggerd once*/ - std::function _connected_cb; - std::function &)> _disconnected_cb; + std::function _connected_cb; + std::function &)> _disconnected_cb; protected: - virtual void send_beacon(::dsn::rpc_address node, uint64_t time) override + virtual void send_beacon(::dsn::host_port node, uint64_t time) override { if (_send_ping_switch) failure_detector::send_beacon(node, time); @@ -96,29 +98,31 @@ class worker_fd_test : public ::dsn::dist::slave_failure_detector_with_multimast } } - virtual void on_master_disconnected(const std::vector &nodes) override + virtual void on_master_disconnected(const std::vector &nodes) override { if (_disconnected_cb) _disconnected_cb(nodes); } - virtual void on_master_connected(rpc_address node) override + virtual void on_master_connected(host_port node) override { if (_connected_cb) _connected_cb(node); } public: - worker_fd_test(replication::replica_stub *stub, std::vector &meta_servers) - : slave_failure_detector_with_multimaster(meta_servers, + worker_fd_test(replication::replica_stub *stub, std::vector &meta_servers) + : slave_failure_detector_with_multimaster(stub == nullptr ? std::make_shared() + : stub->get_dns_resolver(), + meta_servers, [=]() { stub->on_meta_server_disconnected(); }, [=]() { stub->on_meta_server_connected(); }) { _send_ping_switch = false; } void toggle_send_ping(bool toggle) { _send_ping_switch = toggle; } - void when_connected(const std::function &func) { _connected_cb = func; } - void when_disconnected(const std::function &nodes)> &func) + void when_connected(const std::function &func) { _connected_cb = func; } + void when_disconnected(const std::function &nodes)> &func) { _disconnected_cb = func; } @@ -132,8 +136,8 @@ class worker_fd_test : public ::dsn::dist::slave_failure_detector_with_multimast class master_fd_test : public replication::meta_server_failure_detector { private: - std::function _connected_cb; - std::function &)> _disconnected_cb; + std::function _connected_cb; + std::function &)> _disconnected_cb; volatile bool _response_ping_switch; public: @@ -149,27 +153,28 @@ class master_fd_test : public replication::meta_server_failure_detector } } - virtual void on_worker_disconnected(const std::vector &worker_list) override + virtual void on_worker_disconnected(const std::vector &worker_list) override { if (_disconnected_cb) _disconnected_cb(worker_list); } - virtual void on_worker_connected(rpc_address node) override + virtual void on_worker_connected(host_port node) override { if (_connected_cb) _connected_cb(node); } - master_fd_test() : meta_server_failure_detector(rpc_address(), false) + master_fd_test(const std::shared_ptr<::dsn::dns_resolver> &resolver) + : meta_server_failure_detector(resolver, host_port(), false) { _response_ping_switch = true; } void toggle_response_ping(bool toggle) { _response_ping_switch = toggle; } - void when_connected(const std::function &func) { _connected_cb = func; } - void when_disconnected(const std::function &nodes)> &func) + void when_connected(const std::function &func) { _connected_cb = func; } + void when_disconnected(const std::function &nodes)> &func) { _disconnected_cb = func; } - void test_register_worker(rpc_address node) + void test_register_worker(host_port node) { zauto_lock l(failure_detector::_lock); register_worker(node); @@ -188,9 +193,9 @@ class test_worker : public service_app, public serverlet error_code start(const std::vector &args) override { - std::vector master_group; + std::vector master_group; for (int i = 0; i < 3; ++i) - master_group.push_back(rpc_address("localhost", MPORT_START + i)); + master_group.push_back(host_port("localhost", MPORT_START + i)); _worker_fd = new worker_fd_test(nullptr, master_group); _worker_fd->start(1, 1, 9, 10); ++started_apps; @@ -207,10 +212,14 @@ class test_worker : public service_app, public serverlet LOG_DEBUG("master config, request: {}, type: {}", request.master, request.is_register ? "reg" : "unreg"); + + host_port hp_master; + GET_HOST_PORT(request, master, hp_master); + if (request.is_register) - _worker_fd->register_master(request.master); + _worker_fd->register_master(hp_master); else - _worker_fd->unregister_master(request.master); + _worker_fd->unregister_master(hp_master); response = true; } @@ -228,8 +237,8 @@ class test_master : public service_app { FLAGS_stable_rs_min_running_seconds = 10; FLAGS_max_succssive_unstable_restart = 10; - - _master_fd = new master_fd_test(); + std::shared_ptr<::dsn::dns_resolver> dns_resolver(new ::dsn::dns_resolver()); + _master_fd = new master_fd_test(dns_resolver); _master_fd->set_options(&_opts); bool use_allow_list = false; if (args.size() >= 3 && args[1] == "whitelist") { @@ -238,7 +247,7 @@ class test_master : public service_app for (auto &port : ports) { rpc_address addr; addr.assign_ipv4(network::get_local_ipv4(), std::stoi(port)); - _master_fd->add_allow_list(addr); + _master_fd->add_allow_list(host_port(addr)); } use_allow_list = true; } @@ -307,21 +316,22 @@ bool get_worker_and_master(test_worker *&worker, std::vector &mas void master_group_set_leader(std::vector &master_group, int leader_index) { - rpc_address leader_addr("localhost", MPORT_START + leader_index); + host_port leader_hp("localhost", MPORT_START + leader_index); int i = 0; for (test_master *&master : master_group) { - master->fd()->set_leader_for_test(leader_addr, leader_index == i); + master->fd()->set_leader_for_test(leader_hp, leader_index == i); i++; } } void worker_set_leader(test_worker *worker, int leader_contact) { - worker->fd()->set_leader_for_test(rpc_address("localhost", MPORT_START + leader_contact)); + worker->fd()->set_leader_for_test(host_port("localhost", MPORT_START + leader_contact)); config_master_message msg; msg.master = rpc_address("localhost", MPORT_START + leader_contact); msg.is_register = true; + msg.__set_hp_master(host_port(msg.master)); error_code err; bool response; std::tie(err, response) = rpc::call_wait( @@ -331,11 +341,13 @@ void worker_set_leader(test_worker *worker, int leader_contact) void clear(test_worker *worker, std::vector masters) { - rpc_address leader = worker->fd()->get_servers().group_address()->leader(); + auto hp = worker->fd()->get_servers().group_host_port()->leader(); + rpc_address leader = worker->fd()->get_dns_resolver()->resolve_address(hp); config_master_message msg; msg.master = leader; msg.is_register = false; + msg.__set_hp_master(hp); error_code err; bool response; std::tie(err, response) = rpc::call_wait( @@ -356,18 +368,17 @@ void finish(test_worker *worker, test_master *master, int master_index) std::atomic_int wait_count; wait_count.store(2); worker->fd()->when_disconnected( - [&wait_count, master_index](const std::vector &addr_list) mutable { + [&wait_count, master_index](const std::vector &addr_list) mutable { ASSERT_EQ(addr_list.size(), 1); ASSERT_EQ(addr_list[0].port(), MPORT_START + master_index); --wait_count; }); - master->fd()->when_disconnected( - [&wait_count](const std::vector &addr_list) mutable { - ASSERT_EQ(addr_list.size(), 1); - ASSERT_EQ(addr_list[0].port(), WPORT); - --wait_count; - }); + master->fd()->when_disconnected([&wait_count](const std::vector &addr_list) mutable { + ASSERT_EQ(addr_list.size(), 1); + ASSERT_EQ(addr_list[0].port(), WPORT); + --wait_count; + }); // we don't send any ping message now worker->fd()->toggle_send_ping(false); @@ -392,11 +403,11 @@ TEST(fd, dummy_connect_disconnect) // simply wait for two connected std::atomic_int wait_count; wait_count.store(2); - worker->fd()->when_connected([&wait_count](rpc_address leader) mutable { + worker->fd()->when_connected([&wait_count](host_port leader) mutable { ASSERT_EQ(leader.port(), MPORT_START); --wait_count; }); - leader->fd()->when_connected([&wait_count](rpc_address worker_addr) mutable { + leader->fd()->when_connected([&wait_count](host_port worker_addr) mutable { ASSERT_EQ(worker_addr.port(), WPORT); --wait_count; }); @@ -426,8 +437,8 @@ TEST(fd, master_redirect) wait_count.store(2); /* although we contact to the first master, but in the end we must connect to the right leader */ - worker->fd()->when_connected([&wait_count](rpc_address leader) mutable { --wait_count; }); - leader->fd()->when_connected([&wait_count](rpc_address worker_addr) mutable { + worker->fd()->when_connected([&wait_count](host_port leader) mutable { --wait_count; }); + leader->fd()->when_connected([&wait_count](host_port worker_addr) mutable { ASSERT_EQ(worker_addr.port(), WPORT); --wait_count; }); @@ -463,7 +474,7 @@ TEST(fd, switch_new_master_suddenly) std::atomic_int wait_count; wait_count.store(2); - auto cb = [&wait_count](rpc_address) mutable { --wait_count; }; + auto cb = [&wait_count](host_port) mutable { --wait_count; }; worker->fd()->when_connected(cb); tst_master->fd()->when_connected(cb); @@ -482,7 +493,7 @@ TEST(fd, switch_new_master_suddenly) */ tst_master->fd()->clear_workers(); wait_count.store(1); - tst_master->fd()->when_connected([&wait_count](rpc_address addr) mutable { + tst_master->fd()->when_connected([&wait_count](host_port addr) mutable { ASSERT_EQ(addr.port(), WPORT); --wait_count; }); @@ -518,7 +529,7 @@ TEST(fd, old_master_died) std::atomic_int wait_count; wait_count.store(2); - auto cb = [&wait_count](rpc_address) mutable { --wait_count; }; + auto cb = [&wait_count](host_port) mutable { --wait_count; }; worker->fd()->when_connected(cb); tst_master->fd()->when_connected(cb); @@ -529,7 +540,7 @@ TEST(fd, old_master_died) worker->fd()->when_connected(nullptr); tst_master->fd()->when_connected(nullptr); - worker->fd()->when_disconnected([](const std::vector &masters_list) { + worker->fd()->when_disconnected([](const std::vector &masters_list) { ASSERT_EQ(masters_list.size(), 1); LOG_DEBUG("disconnect from master: {}", masters_list[0]); }); @@ -544,7 +555,7 @@ TEST(fd, old_master_died) tst_master->fd()->clear_workers(); wait_count.store(1); - tst_master->fd()->when_connected([&wait_count](rpc_address addr) mutable { + tst_master->fd()->when_connected([&wait_count](host_port addr) mutable { EXPECT_EQ(addr.port(), WPORT); --wait_count; }); @@ -580,7 +591,7 @@ TEST(fd, worker_died_when_switch_master) std::atomic_int wait_count; wait_count.store(2); - auto cb = [&wait_count](rpc_address) mutable { --wait_count; }; + auto cb = [&wait_count](host_port) mutable { --wait_count; }; worker->fd()->when_connected(cb); tst_master->fd()->when_connected(cb); @@ -600,19 +611,19 @@ TEST(fd, worker_died_when_switch_master) wait_count.store(2); tst_master->fd()->when_disconnected( - [&wait_count](const std::vector &worker_list) mutable { + [&wait_count](const std::vector &worker_list) mutable { ASSERT_EQ(worker_list.size(), 1); ASSERT_EQ(worker_list[0].port(), WPORT); wait_count--; }); worker->fd()->when_disconnected( - [&wait_count](const std::vector &master_list) mutable { + [&wait_count](const std::vector &master_list) mutable { ASSERT_EQ(master_list.size(), 1); wait_count--; }); /* we assume the worker is alive */ - tst_master->fd()->test_register_worker(rpc_address("localhost", WPORT)); + tst_master->fd()->test_register_worker(host_port("localhost", WPORT)); master_group_set_leader(masters, index); /* then stop the worker*/ @@ -657,14 +668,16 @@ TEST(fd, update_stability) msg.time = dsn_now_ms(); msg.__isset.start_time = true; msg.start_time = 1000; + msg.__set_hp_from_addr(host_port("localhost", 123)); + msg.__set_hp_to_addr(host_port("localhost", MPORT_START)); // first on ping fd->on_ping(msg, r); ASSERT_EQ(1, smap->size()); - ASSERT_NE(smap->end(), smap->find(msg.from_addr)); + ASSERT_NE(smap->end(), smap->find(msg.hp_from_addr)); replication::meta_server_failure_detector::worker_stability &ws = - smap->find(msg.from_addr)->second; + smap->find(msg.hp_from_addr)->second; ASSERT_EQ(0, ws.unstable_restart_count); ASSERT_EQ(msg.start_time, ws.last_start_time_ms); ASSERT_TRUE(r.is_empty()); @@ -732,7 +745,7 @@ TEST(fd, update_stability) ASSERT_FALSE(r.is_empty()); // reset stat - fd->reset_stability_stat(msg.from_addr); + fd->reset_stability_stat(msg.hp_from_addr); ASSERT_EQ(msg.start_time, ws.last_start_time_ms); ASSERT_EQ(0, ws.unstable_restart_count); } @@ -751,7 +764,7 @@ TEST(fd, not_in_whitelist) std::atomic_int wait_count; wait_count.store(1); - auto cb = [&wait_count](rpc_address) mutable { --wait_count; }; + auto cb = [&wait_count](host_port) mutable { --wait_count; }; worker->fd()->when_connected(cb); worker->fd()->toggle_send_ping(true); diff --git a/src/geo/test/geo_test.cpp b/src/geo/test/geo_test.cpp index 69074ac886..5016e7abb0 100644 --- a/src/geo/test/geo_test.cpp +++ b/src/geo/test/geo_test.cpp @@ -40,7 +40,7 @@ #include "geo/lib/geo_client.h" #include "gtest/gtest.h" #include "pegasus/client.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/blob.h" #include "utils/error_code.h" #include "utils/flags.h" @@ -61,7 +61,7 @@ class geo_client_test : public ::testing::Test public: geo_client_test() { - std::vector meta_list; + std::vector meta_list; bool ok = dsn::replication::replica_helper::load_meta_servers( meta_list, PEGASUS_CLUSTER_SECTION_NAME.c_str(), "onebox"); CHECK(ok, "load_meta_servers failed"); diff --git a/src/include/rrdb/rrdb.client.h b/src/include/rrdb/rrdb.client.h index 8792c08e7f..9e62acde87 100644 --- a/src/include/rrdb/rrdb.client.h +++ b/src/include/rrdb/rrdb.client.h @@ -25,6 +25,7 @@ #include "duplication_internal_types.h" #include "rrdb.code.definition.h" #include "rrdb_types.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_holder.h" #include "runtime/task/task_tracker.h" #include "utils/optional.h" @@ -39,11 +40,12 @@ class rrdb_client public: rrdb_client() {} explicit rrdb_client(const char *cluster_name, - const std::vector &meta_list, - const char *app_name) + const std::vector &meta_list, + const char *app_name, + const std::shared_ptr &dns_resolver) { - _resolver = - dsn::replication::partition_resolver::get_resolver(cluster_name, meta_list, app_name); + _resolver = dsn::replication::partition_resolver::get_resolver( + cluster_name, meta_list, app_name, dns_resolver); } ~rrdb_client() { _tracker.cancel_outstanding_tasks(); } diff --git a/src/meta/app_balance_policy.cpp b/src/meta/app_balance_policy.cpp index 972f8ba5a1..0607715946 100644 --- a/src/meta/app_balance_policy.cpp +++ b/src/meta/app_balance_policy.cpp @@ -25,12 +25,13 @@ #include "app_balance_policy.h" #include "common/gpid.h" #include "meta/load_balance_policy.h" +#include "meta/meta_service.h" #include "metadata_types.h" #include "utils/flags.h" #include "utils/fmt_logging.h" namespace dsn { -class rpc_address; +class dns_resolver; namespace replication { DSN_DEFINE_bool(meta_server, balancer_in_turn, false, "balance the apps one-by-one/concurrently"); @@ -80,6 +81,7 @@ void app_balance_policy::balance(bool checker, const meta_view *global_view, mig { init(global_view, list); const app_mapper &apps = *_global_view->apps; + if (!execute_balance(apps, checker, _balancer_in_turn, @@ -129,7 +131,7 @@ bool app_balance_policy::copy_secondary(const std::shared_ptr &app, b int replicas_low = app->partition_count / _alive_nodes; std::unique_ptr operation = std::make_unique( - app, apps, nodes, address_vec, address_id, replicas_low); + app, apps, nodes, host_port_vec, host_port_id, _svc->get_dns_resolver(), replicas_low); return operation->start(_migration_result); } @@ -137,17 +139,19 @@ copy_secondary_operation::copy_secondary_operation( const std::shared_ptr app, const app_mapper &apps, node_mapper &nodes, - const std::vector &address_vec, - const std::unordered_map &address_id, + const std::vector &host_port_vec, + const std::unordered_map &host_port_id, + const std::shared_ptr &resolver, int replicas_low) - : copy_replica_operation(app, apps, nodes, address_vec, address_id), _replicas_low(replicas_low) + : copy_replica_operation(app, apps, nodes, host_port_vec, host_port_id, resolver), + _replicas_low(replicas_low) { } bool copy_secondary_operation::can_continue() { - int id_min = *_ordered_address_ids.begin(); - int id_max = *_ordered_address_ids.rbegin(); + int id_min = *_ordered_host_port_ids.begin(); + int id_max = *_ordered_host_port_ids.rbegin(); if (_partition_counts[id_max] <= _replicas_low || _partition_counts[id_max] - _partition_counts[id_min] <= 1) { LOG_INFO("{}: stop copy secondary coz it will be balanced later", _app->get_logname()); @@ -163,8 +167,8 @@ int copy_secondary_operation::get_partition_count(const node_state &ns) const bool copy_secondary_operation::can_select(gpid pid, migration_list *result) { - int id_max = *_ordered_address_ids.rbegin(); - const node_state &max_ns = _nodes.at(_address_vec[id_max]); + int id_max = *_ordered_host_port_ids.rbegin(); + const node_state &max_ns = _nodes.at(_host_port_vec[id_max]); if (max_ns.served_as(pid) == partition_status::PS_PRIMARY) { LOG_DEBUG("{}: skip gpid({}.{}) coz it is primary", _app->get_logname(), @@ -182,8 +186,8 @@ bool copy_secondary_operation::can_select(gpid pid, migration_list *result) return false; } - int id_min = *_ordered_address_ids.begin(); - const node_state &min_ns = _nodes.at(_address_vec[id_min]); + int id_min = *_ordered_host_port_ids.begin(); + const node_state &min_ns = _nodes.at(_host_port_vec[id_min]); if (min_ns.served_as(pid) != partition_status::PS_INACTIVE) { LOG_DEBUG("{}: skip gpid({}.{}) coz it is already a member on the target node", _app->get_logname(), diff --git a/src/meta/app_balance_policy.h b/src/meta/app_balance_policy.h index 595e4408fc..ad4e114816 100644 --- a/src/meta/app_balance_policy.h +++ b/src/meta/app_balance_policy.h @@ -27,8 +27,9 @@ #include "utils/command_manager.h" namespace dsn { +class dns_resolver; class gpid; -class rpc_address; +class host_port; namespace replication { class meta_service; @@ -59,8 +60,9 @@ class copy_secondary_operation : public copy_replica_operation copy_secondary_operation(const std::shared_ptr app, const app_mapper &apps, node_mapper &nodes, - const std::vector &address_vec, - const std::unordered_map &address_id, + const std::vector &address_vec, + const std::unordered_map &address_id, + const std::shared_ptr &resolver, int replicas_low); ~copy_secondary_operation() = default; diff --git a/src/meta/backup_engine.cpp b/src/meta/backup_engine.cpp index 88791eb5e3..e551b0d68a 100644 --- a/src/meta/backup_engine.cpp +++ b/src/meta/backup_engine.cpp @@ -36,8 +36,9 @@ #include "meta/meta_data.h" #include "meta/meta_service.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_holder.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/async_calls.h" #include "runtime/task/task.h" #include "runtime/task/task_code.h" @@ -54,8 +55,13 @@ namespace dsn { namespace replication { -backup_engine::backup_engine(backup_service *service) - : _backup_service(service), _block_service(nullptr), _backup_path(""), _is_backup_failed(false) +backup_engine::backup_engine(backup_service *service, + const std::shared_ptr &dns_resolver) + : _backup_service(service), + _block_service(nullptr), + _backup_path(""), + _is_backup_failed(false), + _dns_resolver(dns_resolver) { } @@ -169,7 +175,7 @@ error_code backup_engine::backup_app_meta() void backup_engine::backup_app_partition(const gpid &pid) { - dsn::rpc_address partition_primary; + dsn::host_port partition_primary; { zauto_read_lock l; _backup_service->get_state()->lock_read(l); @@ -181,7 +187,7 @@ void backup_engine::backup_app_partition(const gpid &pid) _is_backup_failed = true; return; } - partition_primary = app->partitions[pid.get_partition_index()].primary; + partition_primary = app->partitions[pid.get_partition_index()].hp_primary; } if (partition_primary.is_invalid()) { @@ -214,10 +220,11 @@ void backup_engine::backup_app_partition(const gpid &pid) pid, partition_primary); backup_rpc rpc(std::move(req), RPC_COLD_BACKUP, 10000_ms, 0, pid.thread_hash()); - rpc.call( - partition_primary, &_tracker, [this, rpc, pid, partition_primary](error_code err) mutable { - on_backup_reply(err, rpc.response(), pid, partition_primary); - }); + rpc.call(_dns_resolver->resolve_address(partition_primary), + &_tracker, + [this, rpc, pid, partition_primary](error_code err) mutable { + on_backup_reply(err, rpc.response(), pid, partition_primary); + }); zauto_lock l(_lock); _backup_status[pid.get_partition_index()] = backup_status::ALIVE; @@ -251,7 +258,7 @@ inline void backup_engine::retry_backup(const dsn::gpid pid) void backup_engine::on_backup_reply(const error_code err, const backup_response &response, const gpid pid, - const rpc_address &primary) + const host_port &primary) { { zauto_lock l(_lock); diff --git a/src/meta/backup_engine.h b/src/meta/backup_engine.h index 20033c5da0..4aeacc4eee 100644 --- a/src/meta/backup_engine.h +++ b/src/meta/backup_engine.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "backup_types.h" @@ -30,8 +31,10 @@ namespace dsn { class blob; +class dns_resolver; class gpid; -class rpc_address; +class host_port; + namespace dist { namespace block_service { class block_filesystem; @@ -67,7 +70,7 @@ class backup_service; class backup_engine { public: - backup_engine(backup_service *service); + backup_engine(backup_service *service, const std::shared_ptr &dns_resolver); ~backup_engine(); error_code init_backup(int32_t app_id); @@ -96,7 +99,7 @@ class backup_engine void on_backup_reply(error_code err, const backup_response &response, gpid pid, - const rpc_address &primary); + const host_port &primary); void write_backup_info(); void complete_current_backup(); void handle_replica_backup_failed(const backup_response &response, const gpid pid); @@ -119,6 +122,8 @@ class backup_engine app_backup_info _cur_backup; // partition_id -> backup_status std::map _backup_status; + + std::shared_ptr _dns_resolver; }; } // namespace replication diff --git a/src/meta/cluster_balance_policy.cpp b/src/meta/cluster_balance_policy.cpp index febfff5355..0e4051f78e 100644 --- a/src/meta/cluster_balance_policy.cpp +++ b/src/meta/cluster_balance_policy.cpp @@ -26,13 +26,14 @@ #include "dsn.layer2_types.h" #include "meta/load_balance_policy.h" +#include "meta/meta_service.h" +#include "runtime/rpc/dns_resolver.h" #include "utils/flags.h" #include "utils/fmt_logging.h" #include "utils/utils.h" namespace dsn { namespace replication { -class meta_service; DSN_DEFINE_uint32(meta_server, balance_op_count_per_round, @@ -64,7 +65,7 @@ uint32_t get_partition_count(const node_state &ns, balance_type type, int32_t ap return (uint32_t)count; } -uint32_t get_skew(const std::map &count_map) +uint32_t get_skew(const std::map &count_map) { uint32_t min = UINT_MAX, max = 0; for (const auto &kv : count_map) { @@ -78,11 +79,11 @@ uint32_t get_skew(const std::map &count_map) return max - min; } -void get_min_max_set(const std::map &node_count_map, - /*out*/ std::set &min_set, - /*out*/ std::set &max_set) +void get_min_max_set(const std::map &node_count_map, + /*out*/ std::set &min_set, + /*out*/ std::set &max_set) { - std::multimap count_multimap = utils::flip_map(node_count_map); + std::multimap count_multimap = utils::flip_map(node_count_map); auto range = count_multimap.equal_range(count_multimap.begin()->first); for (auto iter = range.first; iter != range.second; ++iter) { @@ -222,14 +223,14 @@ bool cluster_balance_policy::get_app_migration_info(std::shared_ptr a info.app_name = app->app_name; info.partitions.resize(app->partitions.size()); for (auto i = 0; i < app->partitions.size(); ++i) { - std::map pstatus_map; - pstatus_map[app->partitions[i].primary] = partition_status::PS_PRIMARY; - if (app->partitions[i].secondaries.size() != app->partitions[i].max_replica_count - 1) { + std::map pstatus_map; + pstatus_map[app->partitions[i].hp_primary] = partition_status::PS_PRIMARY; + if (app->partitions[i].hp_secondaries.size() != app->partitions[i].max_replica_count - 1) { // partition is unhealthy return false; } - for (const auto &addr : app->partitions[i].secondaries) { - pstatus_map[addr] = partition_status::PS_SECONDARY; + for (const auto &hp : app->partitions[i].hp_secondaries) { + pstatus_map[hp] = partition_status::PS_SECONDARY; } info.partitions[i] = pstatus_map; } @@ -237,7 +238,7 @@ bool cluster_balance_policy::get_app_migration_info(std::shared_ptr a for (const auto &it : nodes) { const node_state &ns = it.second; auto count = get_partition_count(ns, type, app->app_id); - info.replicas_count[ns.addr()] = count; + info.replicas_count[ns.host_port()] = count; } return true; @@ -247,12 +248,12 @@ void cluster_balance_policy::get_node_migration_info(const node_state &ns, const app_mapper &apps, /*out*/ node_migration_info &info) { - info.address = ns.addr(); + info.hp = ns.host_port(); for (const auto &iter : apps) { std::shared_ptr app = iter.second; for (const auto &context : app->helpers->contexts) { std::string disk_tag; - if (!context.get_disk_tag(ns.addr(), disk_tag)) { + if (!context.get_disk_tag(ns.host_port(), disk_tag)) { continue; } auto pid = context.config_owner->pid; @@ -290,8 +291,8 @@ bool cluster_balance_policy::get_next_move(const cluster_migration_info &cluster * a move that improves the app skew and the cluster skew, if possible. If * not, attempt to pick a move that improves the app skew. **/ - std::set cluster_min_count_nodes; - std::set cluster_max_count_nodes; + std::set cluster_min_count_nodes; + std::set cluster_max_count_nodes; get_min_max_set(cluster_info.replicas_count, cluster_min_count_nodes, cluster_max_count_nodes); bool found = false; @@ -303,8 +304,8 @@ bool cluster_balance_policy::get_next_move(const cluster_migration_info &cluster continue; } auto app_map = it->second.replicas_count; - std::set app_min_count_nodes; - std::set app_max_count_nodes; + std::set app_min_count_nodes; + std::set app_max_count_nodes; get_min_max_set(app_map, app_min_count_nodes, app_max_count_nodes); /** @@ -312,9 +313,9 @@ bool cluster_balance_policy::get_next_move(const cluster_migration_info &cluster * with the replica servers most loaded overall, and likewise for least loaded. * These are our ideal candidates for moving from and to, respectively. **/ - std::set app_cluster_min_set = + std::set app_cluster_min_set = utils::get_intersection(app_min_count_nodes, cluster_min_count_nodes); - std::set app_cluster_max_set = + std::set app_cluster_max_set = utils::get_intersection(app_max_count_nodes, cluster_max_count_nodes); /** @@ -323,7 +324,7 @@ bool cluster_balance_policy::get_next_move(const cluster_migration_info &cluster * replicas of the app. Moving a replica in that case might keep the * cluster skew the same or make it worse while keeping the app balanced. **/ - std::multimap app_count_multimap = utils::flip_map(app_map); + std::multimap app_count_multimap = utils::flip_map(app_map); if (app_count_multimap.rbegin()->first <= app_count_multimap.begin()->first + 1 && (app_cluster_min_set.empty() || app_cluster_max_set.empty())) { LOG_INFO("do not move replicas of a balanced app({}) if the least (most) loaded " @@ -356,8 +357,8 @@ auto select_random(const S &s, size_t n) } bool cluster_balance_policy::pick_up_move(const cluster_migration_info &cluster_info, - const std::set &max_nodes, - const std::set &min_nodes, + const std::set &max_nodes, + const std::set &min_nodes, const int32_t app_id, const partition_set &selected_pid, /*out*/ move_info &move_info) @@ -373,19 +374,19 @@ bool cluster_balance_policy::pick_up_move(const cluster_migration_info &cluster_ max_load_disk.node, max_load_disk.disk_tag, max_load_disk.partitions.size()); - for (const auto &node_addr : min_nodes) { + for (const auto &node_hp : min_nodes) { gpid picked_pid; if (pick_up_partition( - cluster_info, node_addr, max_load_disk.partitions, selected_pid, picked_pid)) { + cluster_info, node_hp, max_load_disk.partitions, selected_pid, picked_pid)) { move_info.pid = picked_pid; move_info.source_node = max_load_disk.node; move_info.source_disk_tag = max_load_disk.disk_tag; - move_info.target_node = node_addr; + move_info.target_node = node_hp; move_info.type = cluster_info.type; LOG_INFO("partition[{}] will migrate from {} to {}", picked_pid, max_load_disk.node, - node_addr); + node_hp); return true; } } @@ -398,22 +399,22 @@ bool cluster_balance_policy::pick_up_move(const cluster_migration_info &cluster_ void cluster_balance_policy::get_max_load_disk_set( const cluster_migration_info &cluster_info, - const std::set &max_nodes, + const std::set &max_nodes, const int32_t app_id, /*out*/ std::set &max_load_disk_set) { // key: partition count (app_disk_info.partitions.size()) // value: app_disk_info structure std::multimap app_disk_info_multimap; - for (const auto &node_addr : max_nodes) { + for (const auto &node_hp : max_nodes) { // key: disk_tag - // value: partition set for app(app id=app_id) in node(addr=node_addr) + // value: partition set for app(app id=app_id) in node(hp=node_hp) std::map disk_partitions = - get_disk_partitions_map(cluster_info, node_addr, app_id); + get_disk_partitions_map(cluster_info, node_hp, app_id); for (const auto &kv : disk_partitions) { app_disk_info info; info.app_id = app_id; - info.node = node_addr; + info.node = node_hp; info.disk_tag = kv.first; info.partitions = kv.second; app_disk_info_multimap.insert( @@ -427,11 +428,11 @@ void cluster_balance_policy::get_max_load_disk_set( } std::map cluster_balance_policy::get_disk_partitions_map( - const cluster_migration_info &cluster_info, const rpc_address &addr, const int32_t app_id) + const cluster_migration_info &cluster_info, const host_port &hp, const int32_t app_id) { std::map disk_partitions; auto app_iter = cluster_info.apps_info.find(app_id); - auto node_iter = cluster_info.nodes_info.find(addr); + auto node_iter = cluster_info.nodes_info.find(hp); if (app_iter == cluster_info.apps_info.end() || node_iter == cluster_info.nodes_info.end()) { return disk_partitions; } @@ -447,7 +448,7 @@ std::map cluster_balance_policy::get_disk_partitions continue; } auto status_map = app_partition[pid.get_partition_index()]; - auto iter = status_map.find(addr); + auto iter = status_map.find(hp); if (iter != status_map.end() && iter->second == status) { disk_partitions[disk_tag].insert(pid); } @@ -457,7 +458,7 @@ std::map cluster_balance_policy::get_disk_partitions } bool cluster_balance_policy::pick_up_partition(const cluster_migration_info &cluster_info, - const rpc_address &min_node_addr, + const host_port &min_node_hp, const partition_set &max_load_partitions, const partition_set &selected_pid, /*out*/ gpid &picked_pid) @@ -476,7 +477,7 @@ bool cluster_balance_policy::pick_up_partition(const cluster_migration_info &clu // partition has already been primary or secondary on min_node app_migration_info info = iter->second; - if (info.get_partition_status(pid.get_partition_index(), min_node_addr) != + if (info.get_partition_status(pid.get_partition_index(), min_node_hp) != partition_status::PS_INACTIVE) { continue; } @@ -494,7 +495,7 @@ bool cluster_balance_policy::apply_move(const move_info &move, /*out*/ cluster_migration_info &cluster_info) { int32_t app_id = move.pid.get_app_id(); - rpc_address source = move.source_node, target = move.target_node; + host_port source = move.source_node, target = move.target_node; if (cluster_info.apps_skew.find(app_id) == cluster_info.apps_skew.end() || cluster_info.replicas_count.find(source) == cluster_info.replicas_count.end() || cluster_info.replicas_count.find(target) == cluster_info.replicas_count.end() || @@ -512,10 +513,10 @@ bool cluster_balance_policy::apply_move(const move_info &move, app_info.replicas_count[target]++; auto &pmap = app_info.partitions[move.pid.get_partition_index()]; - rpc_address primary_addr; + host_port primary_hp; for (const auto &kv : pmap) { if (kv.second == partition_status::PS_PRIMARY) { - primary_addr = kv.first; + primary_hp = kv.first; } } auto status = cluster_info.type == balance_type::COPY_SECONDARY ? partition_status::PS_SECONDARY @@ -544,10 +545,15 @@ bool cluster_balance_policy::apply_move(const move_info &move, // add into migration list and selected_pid partition_configuration pc; pc.pid = move.pid; - pc.primary = primary_addr; - list[move.pid] = generate_balancer_request(*_global_view->apps, pc, move.type, source, target); + pc.hp_primary = primary_hp; + const auto &source_addr = _svc->get_dns_resolver()->resolve_address(source); + const auto &target_addr = _svc->get_dns_resolver()->resolve_address(target); + list[move.pid] = generate_balancer_request( + *_global_view->apps, pc, move.type, source_addr, target_addr, source, target); _migration_result->emplace( - move.pid, generate_balancer_request(*_global_view->apps, pc, move.type, source, target)); + move.pid, + generate_balancer_request( + *_global_view->apps, pc, move.type, source_addr, target_addr, source, target)); selected_pids.insert(move.pid); cluster_info.apps_skew[app_id] = get_skew(app_info.replicas_count); diff --git a/src/meta/cluster_balance_policy.h b/src/meta/cluster_balance_policy.h index 474935d897..196bf5a9b0 100644 --- a/src/meta/cluster_balance_policy.h +++ b/src/meta/cluster_balance_policy.h @@ -32,17 +32,17 @@ #include "load_balance_policy.h" #include "meta/meta_data.h" #include "metadata_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" namespace dsn { namespace replication { class meta_service; uint32_t get_partition_count(const node_state &ns, balance_type type, int32_t app_id); -uint32_t get_skew(const std::map &count_map); -void get_min_max_set(const std::map &node_count_map, - /*out*/ std::set &min_set, - /*out*/ std::set &max_set); +uint32_t get_skew(const std::map &count_map); +void get_min_max_set(const std::map &node_count_map, + /*out*/ std::set &min_set, + /*out*/ std::set &max_set); class cluster_balance_policy : public load_balance_policy { @@ -79,19 +79,19 @@ class cluster_balance_policy : public load_balance_policy const partition_set &selected_pid, /*out*/ move_info &next_move); bool pick_up_move(const cluster_migration_info &cluster_info, - const std::set &max_nodes, - const std::set &min_nodes, + const std::set &max_nodes, + const std::set &min_nodes, const int32_t app_id, const partition_set &selected_pid, /*out*/ move_info &move_info); void get_max_load_disk_set(const cluster_migration_info &cluster_info, - const std::set &max_nodes, + const std::set &max_nodes, const int32_t app_id, /*out*/ std::set &max_load_disk_set); std::map get_disk_partitions_map( - const cluster_migration_info &cluster_info, const rpc_address &addr, const int32_t app_id); + const cluster_migration_info &cluster_info, const host_port &node, const int32_t app_id); bool pick_up_partition(const cluster_migration_info &cluster_info, - const rpc_address &min_node_addr, + const host_port &min_node_hp, const partition_set &max_load_partitions, const partition_set &selected_pid, /*out*/ gpid &picked_pid); @@ -104,8 +104,8 @@ class cluster_balance_policy : public load_balance_policy { int32_t app_id; std::string app_name; - std::vector> partitions; - std::map replicas_count; + std::vector> partitions; + std::map replicas_count; bool operator<(const app_migration_info &another) const { if (app_id < another.app_id) @@ -116,10 +116,10 @@ class cluster_balance_policy : public load_balance_policy { return app_id == another.app_id; } - partition_status::type get_partition_status(int32_t pidx, rpc_address addr) + partition_status::type get_partition_status(int32_t pidx, host_port node) { for (const auto &kv : partitions[pidx]) { - if (kv.first == addr) { + if (kv.first == node) { return kv.second; } } @@ -129,18 +129,12 @@ class cluster_balance_policy : public load_balance_policy struct node_migration_info { - rpc_address address; + host_port hp; // key-disk tag, value-partition set std::map partitions; partition_set future_partitions; - bool operator<(const node_migration_info &another) const - { - return address < another.address; - } - bool operator==(const node_migration_info &another) const - { - return address == another.address; - } + bool operator<(const node_migration_info &another) const { return hp < another.hp; } + bool operator==(const node_migration_info &another) const { return hp == another.hp; } }; struct cluster_migration_info @@ -148,14 +142,14 @@ class cluster_balance_policy : public load_balance_policy balance_type type; std::map apps_skew; std::map apps_info; - std::map nodes_info; - std::map replicas_count; + std::map nodes_info; + std::map replicas_count; }; struct app_disk_info { int32_t app_id; - rpc_address node; + host_port node; std::string disk_tag; partition_set partitions; bool operator==(const app_disk_info &another) const @@ -174,9 +168,9 @@ class cluster_balance_policy : public load_balance_policy struct move_info { gpid pid; - rpc_address source_node; + host_port source_node; std::string source_disk_tag; - rpc_address target_node; + host_port target_node; balance_type type; }; diff --git a/src/meta/duplication/duplication_info.cpp b/src/meta/duplication/duplication_info.cpp index 7f9a295361..1735269157 100644 --- a/src/meta/duplication/duplication_info.cpp +++ b/src/meta/duplication/duplication_info.cpp @@ -212,7 +212,7 @@ duplication_info_s_ptr duplication_info::decode_from_blob(dupid_t dup_id, if (!json::json_forwarder::decode(json, info)) { return nullptr; } - std::vector meta_list; + std::vector meta_list; if (!dsn::replication::replica_helper::load_meta_servers( meta_list, duplication_constants::kClustersSectionName.c_str(), info.remote.c_str())) { return nullptr; diff --git a/src/meta/duplication/duplication_info.h b/src/meta/duplication/duplication_info.h index 279a57ce68..1c190ef9d9 100644 --- a/src/meta/duplication/duplication_info.h +++ b/src/meta/duplication/duplication_info.h @@ -31,7 +31,7 @@ #include "common/json_helper.h" #include "common/replication_other_types.h" #include "duplication_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/blob.h" #include "utils/error_code.h" #include "utils/fmt_logging.h" @@ -58,7 +58,7 @@ class duplication_info int32_t partition_count, uint64_t create_now_ms, std::string follower_cluster_name, - std::vector &&follower_cluster_metas, + std::vector &&follower_cluster_metas, std::string meta_store_path) : id(dupid), app_id(appid), @@ -247,7 +247,7 @@ class duplication_info const int32_t partition_count{0}; const std::string follower_cluster_name; - const std::vector follower_cluster_metas; + const std::vector follower_cluster_metas; const std::string store_path; // store path on meta service = get_duplication_path(app, dupid) const uint64_t create_timestamp_ms{0}; // the time when this dup is created. const std::string prefix_for_log; diff --git a/src/meta/duplication/meta_duplication_service.cpp b/src/meta/duplication/meta_duplication_service.cpp index 094dda6985..42cab1c672 100644 --- a/src/meta/duplication/meta_duplication_service.cpp +++ b/src/meta/duplication/meta_duplication_service.cpp @@ -34,8 +34,10 @@ #include "meta_duplication_service.h" #include "metadata_types.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/group_address.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/group_host_port.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" @@ -176,7 +178,7 @@ void meta_duplication_service::add_duplication(duplication_add_rpc rpc) return; } - std::vector meta_list; + std::vector meta_list; if (!dsn::replication::replica_helper::load_meta_servers( meta_list, duplication_constants::kClustersSectionName.c_str(), @@ -273,7 +275,7 @@ void meta_duplication_service::duplication_sync(duplication_sync_rpc rpc) auto &response = rpc.response(); response.err = ERR_OK; - node_state *ns = get_node_state(_state->_nodes, request.node, false); + node_state *ns = get_node_state(_state->_nodes, host_port(request.node), false); if (ns == nullptr) { LOG_WARNING("node({}) is not found in meta server", request.node); response.err = ERR_OBJECT_NOT_FOUND; @@ -358,14 +360,14 @@ void meta_duplication_service::create_follower_app_for_duplication( request.options.envs.emplace(duplication_constants::kDuplicationEnvMasterMetasKey, _meta_svc->get_meta_list_string()); - rpc_address meta_servers; + host_port meta_servers; meta_servers.assign_group(dup->follower_cluster_name.c_str()); - meta_servers.group_address()->add_list(dup->follower_cluster_metas); + meta_servers.group_host_port()->add_list(dup->follower_cluster_metas); dsn::message_ex *msg = dsn::message_ex::create_request(RPC_CM_CREATE_APP); dsn::marshall(msg, request); rpc::call( - meta_servers, + _dns_resolver->resolve_address(meta_servers), msg, _meta_svc->tracker(), [=](error_code err, configuration_create_app_response &&resp) mutable { @@ -406,16 +408,16 @@ void meta_duplication_service::create_follower_app_for_duplication( void meta_duplication_service::check_follower_app_if_create_completed( const std::shared_ptr &dup) { - rpc_address meta_servers; + host_port meta_servers; meta_servers.assign_group(dup->follower_cluster_name.c_str()); - meta_servers.group_address()->add_list(dup->follower_cluster_metas); + meta_servers.group_host_port()->add_list(dup->follower_cluster_metas); query_cfg_request meta_config_request; meta_config_request.app_name = dup->app_name; dsn::message_ex *msg = dsn::message_ex::create_request(RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX); dsn::marshall(msg, meta_config_request); - rpc::call(meta_servers, + rpc::call(_dns_resolver->resolve_address(meta_servers), msg, _meta_svc->tracker(), [=](error_code err, query_cfg_response &&resp) mutable { @@ -425,8 +427,12 @@ void meta_duplication_service::check_follower_app_if_create_completed( while (count-- > 0) { partition_configuration p; p.primary = rpc_address("127.0.0.1", 34801); - p.secondaries.emplace_back(rpc_address("127.0.0.2", 34801)); - p.secondaries.emplace_back(rpc_address("127.0.0.3", 34801)); + p.secondaries.emplace_back(rpc_address("127.0.0.1", 34802)); + p.secondaries.emplace_back(rpc_address("127.0.0.1", 34803)); + p.__set_hp_primary(host_port("localhost", 34801)); + p.__set_hp_secondaries(std::vector()); + p.hp_secondaries.emplace_back(host_port("localhost", 34802)); + p.hp_secondaries.emplace_back(host_port("localhost", 34803)); resp.partitions.emplace_back(p); } }); @@ -439,17 +445,17 @@ void meta_duplication_service::check_follower_app_if_create_completed( query_err = ERR_INCONSISTENT_STATE; } else { for (const auto &partition : resp.partitions) { - if (partition.primary.is_invalid()) { + if (partition.hp_primary.is_invalid()) { query_err = ERR_INACTIVE_STATE; break; } - if (partition.secondaries.empty()) { + if (partition.hp_secondaries.empty()) { query_err = ERR_NOT_ENOUGH_MEMBER; break; } - for (const auto &secondary : partition.secondaries) { + for (const auto &secondary : partition.hp_secondaries) { if (secondary.is_invalid()) { query_err = ERR_INACTIVE_STATE; break; @@ -522,7 +528,7 @@ void meta_duplication_service::do_update_partition_confirmed( std::shared_ptr meta_duplication_service::new_dup_from_init(const std::string &follower_cluster_name, - std::vector &&follower_cluster_metas, + std::vector &&follower_cluster_metas, std::shared_ptr &app) const { duplication_info_s_ptr dup; diff --git a/src/meta/duplication/meta_duplication_service.h b/src/meta/duplication/meta_duplication_service.h index e9aa2f4c80..7abc6b0ab5 100644 --- a/src/meta/duplication/meta_duplication_service.h +++ b/src/meta/duplication/meta_duplication_service.h @@ -31,7 +31,8 @@ #include "utils/fmt_logging.h" namespace dsn { -class rpc_address; +class dns_resolver; +class host_port; class zrwlock_nr; namespace replication { @@ -58,7 +59,10 @@ class meta_service; class meta_duplication_service { public: - meta_duplication_service(server_state *ss, meta_service *ms) : _state(ss), _meta_svc(ms) + meta_duplication_service(server_state *ss, + meta_service *ms, + const std::shared_ptr &resolver) + : _state(ss), _meta_svc(ms), _dns_resolver(resolver) { CHECK_NOTNULL(_state, "_state should not be null"); CHECK_NOTNULL(_meta_svc, "_meta_svc should not be null"); @@ -122,7 +126,7 @@ class meta_duplication_service // Thread-Safe std::shared_ptr new_dup_from_init(const std::string &follower_cluster_name, - std::vector &&follower_cluster_metas, + std::vector &&follower_cluster_metas, std::shared_ptr &app) const; // get lock to protect access of app table @@ -148,6 +152,8 @@ class meta_duplication_service server_state *_state; meta_service *_meta_svc; + + std::shared_ptr _dns_resolver; }; } // namespace replication diff --git a/src/meta/greedy_load_balancer.cpp b/src/meta/greedy_load_balancer.cpp index dc8ef4f9d9..0c6702435e 100644 --- a/src/meta/greedy_load_balancer.cpp +++ b/src/meta/greedy_load_balancer.cpp @@ -42,7 +42,7 @@ #include "meta/table_metrics.h" #include "meta_admin_types.h" #include "meta_data.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/command_manager.h" #include "utils/flags.h" #include "utils/fmt_logging.h" @@ -144,7 +144,7 @@ void greedy_load_balancer::score(meta_view view, double &primary_stddev, double bool greedy_load_balancer::all_replica_infos_collected(const node_state &ns) { - dsn::rpc_address n = ns.addr(); + const auto &n = ns.host_port(); return ns.for_each_partition([this, n](const dsn::gpid &pid) { config_context &cc = *get_config_context(*(t_global_view->apps), pid); if (cc.find_from_serving(n) == cc.serving.end()) { diff --git a/src/meta/greedy_load_balancer.h b/src/meta/greedy_load_balancer.h index d11801b2bd..45710963a2 100644 --- a/src/meta/greedy_load_balancer.h +++ b/src/meta/greedy_load_balancer.h @@ -33,9 +33,11 @@ #include "meta/meta_data.h" #include "meta_admin_types.h" #include "server_load_balancer.h" +#include "utils/fmt_utils.h" namespace dsn { class command_deregister; +class host_port; class rpc_address; namespace replication { @@ -83,15 +85,22 @@ class greedy_load_balancer : public server_load_balancer bool all_replica_infos_collected(const node_state &ns); }; -inline configuration_proposal_action -new_proposal_action(const rpc_address &target, const rpc_address &node, config_type::type type) +inline configuration_proposal_action new_proposal_action(const rpc_address &target, + const rpc_address &node, + const host_port &hp_target, + const host_port &hp_node, + config_type::type type) { configuration_proposal_action act; act.__set_target(target); act.__set_node(node); + act.__set_hp_target(hp_target); + act.__set_hp_node(hp_node); act.__set_type(type); return act; } } // namespace replication } // namespace dsn + +USER_DEFINED_STRUCTURE_FORMATTER(::dsn::replication::configuration_proposal_action); diff --git a/src/meta/load_balance_policy.cpp b/src/meta/load_balance_policy.cpp index 59d30dd2e4..c3d2bc9d7f 100644 --- a/src/meta/load_balance_policy.cpp +++ b/src/meta/load_balance_policy.cpp @@ -28,7 +28,9 @@ #include "dsn.layer2_types.h" #include "meta/greedy_load_balancer.h" #include "meta/meta_data.h" +#include "meta/meta_service.h" #include "meta_admin_types.h" +#include "runtime/rpc/dns_resolver.h" #include "utils/command_manager.h" #include "utils/fail_point.h" #include "utils/flags.h" @@ -38,10 +40,12 @@ #include "utils/strings.h" namespace dsn { +class rpc_address; + namespace replication { DSN_DECLARE_uint64(min_live_node_count_for_unfreeze); -void dump_disk_load(app_id id, const rpc_address &node, bool only_primary, const disk_load &load) +void dump_disk_load(app_id id, const host_port &node, bool only_primary, const disk_load &load) { std::ostringstream load_string; load_string << std::endl << "<<<<<<<<<<" << std::endl; @@ -62,7 +66,7 @@ void dump_disk_load(app_id id, const rpc_address &node, bool only_primary, const bool calc_disk_load(node_mapper &nodes, const app_mapper &apps, app_id id, - const rpc_address &node, + const host_port &node, bool only_primary, /*out*/ disk_load &load) { @@ -95,13 +99,12 @@ bool calc_disk_load(node_mapper &nodes, } } -std::unordered_map -get_node_loads(const std::shared_ptr &app, - const app_mapper &apps, - node_mapper &nodes, - bool only_primary) +std::unordered_map get_node_loads(const std::shared_ptr &app, + const app_mapper &apps, + node_mapper &nodes, + bool only_primary) { - std::unordered_map node_loads; + std::unordered_map node_loads; for (auto iter = nodes.begin(); iter != nodes.end(); ++iter) { if (!calc_disk_load( nodes, apps, app->app_id, iter->first, only_primary, node_loads[iter->first])) { @@ -115,7 +118,7 @@ get_node_loads(const std::shared_ptr &app, return node_loads; } -const std::string &get_disk_tag(const app_mapper &apps, const rpc_address &node, const gpid &pid) +const std::string &get_disk_tag(const app_mapper &apps, const host_port &node, const gpid &pid) { const config_context &cc = *get_config_context(apps, pid); auto iter = cc.find_from_serving(node); @@ -128,7 +131,9 @@ generate_balancer_request(const app_mapper &apps, const partition_configuration &pc, const balance_type &type, const rpc_address &from, - const rpc_address &to) + const rpc_address &to, + const host_port &hp_from, + const host_port &hp_to) { FAIL_POINT_INJECT_F("generate_balancer_request", [](absl::string_view name) { return nullptr; }); @@ -141,38 +146,40 @@ generate_balancer_request(const app_mapper &apps, case balance_type::MOVE_PRIMARY: ans = "move_primary"; result.balance_type = balancer_request_type::move_primary; + result.action_list.emplace_back(new_proposal_action( + from, from, hp_from, hp_from, config_type::CT_DOWNGRADE_TO_SECONDARY)); result.action_list.emplace_back( - new_proposal_action(from, from, config_type::CT_DOWNGRADE_TO_SECONDARY)); - result.action_list.emplace_back( - new_proposal_action(to, to, config_type::CT_UPGRADE_TO_PRIMARY)); + new_proposal_action(to, to, hp_to, hp_to, config_type::CT_UPGRADE_TO_PRIMARY)); break; case balance_type::COPY_PRIMARY: ans = "copy_primary"; result.balance_type = balancer_request_type::copy_primary; result.action_list.emplace_back( - new_proposal_action(from, to, config_type::CT_ADD_SECONDARY_FOR_LB)); + new_proposal_action(from, to, hp_from, hp_to, config_type::CT_ADD_SECONDARY_FOR_LB)); + result.action_list.emplace_back(new_proposal_action( + from, from, hp_from, hp_from, config_type::CT_DOWNGRADE_TO_SECONDARY)); result.action_list.emplace_back( - new_proposal_action(from, from, config_type::CT_DOWNGRADE_TO_SECONDARY)); + new_proposal_action(to, to, hp_to, hp_to, config_type::CT_UPGRADE_TO_PRIMARY)); result.action_list.emplace_back( - new_proposal_action(to, to, config_type::CT_UPGRADE_TO_PRIMARY)); - result.action_list.emplace_back(new_proposal_action(to, from, config_type::CT_REMOVE)); + new_proposal_action(to, from, hp_to, hp_from, config_type::CT_REMOVE)); break; case balance_type::COPY_SECONDARY: ans = "copy_secondary"; result.balance_type = balancer_request_type::copy_secondary; + result.action_list.emplace_back(new_proposal_action( + pc.primary, to, pc.hp_primary, hp_to, config_type::CT_ADD_SECONDARY_FOR_LB)); result.action_list.emplace_back( - new_proposal_action(pc.primary, to, config_type::CT_ADD_SECONDARY_FOR_LB)); - result.action_list.emplace_back( - new_proposal_action(pc.primary, from, config_type::CT_REMOVE)); + new_proposal_action(pc.primary, from, pc.hp_primary, hp_from, config_type::CT_REMOVE)); break; default: CHECK(false, ""); } - LOG_INFO("generate balancer: {} {} from {} of disk_tag({}) to {}", + LOG_INFO("generate balancer: {} {} from {}({}) of disk_tag({}) to {}", pc.pid, ans, + hp_from, from, - get_disk_tag(apps, from, pc.pid), + get_disk_tag(apps, hp_from, pc.pid), to); return std::make_shared(std::move(result)); } @@ -211,7 +218,7 @@ bool load_balance_policy::primary_balance(const std::shared_ptr &app, "too few alive nodes will lead to freeze"); LOG_INFO("primary balancer for app({}:{})", app->app_name, app->app_id); - auto graph = ford_fulkerson::builder(app, *_global_view->nodes, address_id).build(); + auto graph = ford_fulkerson::builder(app, *_global_view->nodes, host_port_id).build(); if (nullptr == graph) { LOG_DEBUG("the primaries are balanced for app({}:{})", app->app_name, app->app_id); return true; @@ -239,8 +246,15 @@ bool load_balance_policy::copy_primary(const std::shared_ptr &app, const app_mapper &apps = *_global_view->apps; int replicas_low = app->partition_count / _alive_nodes; - std::unique_ptr operation = std::make_unique( - app, apps, nodes, address_vec, address_id, still_have_less_than_average, replicas_low); + std::unique_ptr operation = + std::make_unique(app, + apps, + nodes, + host_port_vec, + host_port_id, + still_have_less_than_average, + replicas_low, + _svc->get_dns_resolver()); return operation->start(_migration_result); } @@ -257,17 +271,17 @@ bool load_balance_policy::move_primary(std::unique_ptr path) int current = path->_prev.back(); if (!calc_disk_load( - nodes, apps, path->_app->app_id, address_vec[current], true, *current_load)) { + nodes, apps, path->_app->app_id, host_port_vec[current], true, *current_load)) { LOG_WARNING("stop move primary as some replica infos aren't collected, node({}), app({})", - address_vec[current], + host_port_vec[current].to_string(), path->_app->get_logname()); return false; } int plan_moving = path->_flow.back(); while (path->_prev[current] != 0) { - rpc_address from = address_vec[path->_prev[current]]; - rpc_address to = address_vec[current]; + host_port from = host_port_vec[path->_prev[current]]; + host_port to = host_port_vec[current]; if (!calc_disk_load(nodes, apps, path->_app->app_id, from, true, *prev_load)) { LOG_WARNING( "stop move primary as some replica infos aren't collected, node({}), app({})", @@ -285,8 +299,8 @@ bool load_balance_policy::move_primary(std::unique_ptr path) } void load_balance_policy::start_moving_primary(const std::shared_ptr &app, - const rpc_address &from, - const rpc_address &to, + const host_port &from, + const host_port &to, int plan_moving, disk_load *prev_load, disk_load *current_load) @@ -307,8 +321,13 @@ void load_balance_policy::start_moving_primary(const std::shared_ptr const partition_configuration &pc = app->partitions[selected.get_partition_index()]; auto balancer_result = _migration_result->emplace( selected, - generate_balancer_request( - *_global_view->apps, pc, balance_type::MOVE_PRIMARY, from, to)); + generate_balancer_request(*_global_view->apps, + pc, + balance_type::MOVE_PRIMARY, + _svc->get_dns_resolver()->resolve_address(from), + _svc->get_dns_resolver()->resolve_address(to), + from, + to)); CHECK(balancer_result.second, "gpid({}) already inserted as an action", selected); --(*prev_load)[get_disk_tag(*_global_view->apps, from, selected)]; @@ -317,7 +336,7 @@ void load_balance_policy::start_moving_primary(const std::shared_ptr } std::list load_balance_policy::calc_potential_moving( - const std::shared_ptr &app, const rpc_address &from, const rpc_address &to) + const std::shared_ptr &app, const host_port &from, const host_port &to) { std::list potential_moving; const node_state &ns = _global_view->nodes->find(from)->second; @@ -334,8 +353,8 @@ std::list load_balance_policy::calc_potential_moving( dsn::gpid load_balance_policy::select_moving(std::list &potential_moving, disk_load *prev_load, disk_load *current_load, - rpc_address from, - rpc_address to) + host_port from, + host_port to) { std::list::iterator selected = potential_moving.end(); int max = std::numeric_limits::min(); @@ -470,27 +489,28 @@ void load_balance_policy::number_nodes(const node_mapper &nodes) { int current_id = 1; - address_id.clear(); - address_vec.resize(_alive_nodes + 2); + host_port_id.clear(); + host_port_vec.resize(_alive_nodes + 2); for (auto iter = nodes.begin(); iter != nodes.end(); ++iter) { - CHECK(!iter->first.is_invalid() && !iter->second.addr().is_invalid(), "invalid address"); + CHECK(!iter->first.is_invalid() && !iter->second.host_port().is_invalid(), + "invalid address"); CHECK(iter->second.alive(), "dead node"); - address_id[iter->first] = current_id; - address_vec[current_id] = iter->first; + host_port_id[iter->first] = current_id; + host_port_vec[current_id] = iter->first; ++current_id; } } ford_fulkerson::ford_fulkerson(const std::shared_ptr &app, const node_mapper &nodes, - const std::unordered_map &address_id, + const std::unordered_map &host_port_id, uint32_t higher_count, uint32_t lower_count, int replicas_low) : _app(app), _nodes(nodes), - _address_id(address_id), + _host_port_id(host_port_id), _higher_count(higher_count), _lower_count(lower_count), _replicas_low(replicas_low) @@ -525,7 +545,7 @@ void ford_fulkerson::make_graph() _graph_nodes = _nodes.size() + 2; _network.resize(_graph_nodes, std::vector(_graph_nodes, 0)); for (const auto &node : _nodes) { - int node_id = _address_id.at(node.first); + int node_id = _host_port_id.at(node.first); add_edge(node_id, node.second); update_decree(node_id, node.second); } @@ -546,9 +566,9 @@ void ford_fulkerson::update_decree(int node_id, const node_state &ns) { ns.for_each_primary(_app->app_id, [&, this](const gpid &pid) { const partition_configuration &pc = _app->partitions[pid.get_partition_index()]; - for (const auto &secondary : pc.secondaries) { - auto i = _address_id.find(secondary); - CHECK(i != _address_id.end(), "invalid secondary address, address = {}", secondary); + for (const auto &secondary : pc.hp_secondaries) { + auto i = _host_port_id.find(secondary); + CHECK(i != _host_port_id.end(), "invalid secondary address, address = {}", secondary); _network[node_id][i->second]++; } return true; @@ -618,15 +638,21 @@ copy_replica_operation::copy_replica_operation( const std::shared_ptr app, const app_mapper &apps, node_mapper &nodes, - const std::vector &address_vec, - const std::unordered_map &address_id) - : _app(app), _apps(apps), _nodes(nodes), _address_vec(address_vec), _address_id(address_id) + const std::vector &host_port_vec, + const std::unordered_map &host_port_id, + const std::shared_ptr &resolver) + : _app(app), + _apps(apps), + _nodes(nodes), + _host_port_vec(host_port_vec), + _host_port_id(host_port_id), + _dns_resolver(resolver) { } bool copy_replica_operation::start(migration_list *result) { - init_ordered_address_ids(); + init_ordered_host_port_ids(); _node_loads = get_node_loads(_app, _apps, _nodes, only_copy_primary()); if (_node_loads.size() != _nodes.size()) { return false; @@ -640,9 +666,9 @@ bool copy_replica_operation::start(migration_list *result) gpid selected_pid = select_partition(result); if (selected_pid.get_app_id() != -1) { copy_once(selected_pid, result); - update_ordered_address_ids(); + update_ordered_host_port_ids(); } else { - _ordered_address_ids.erase(--_ordered_address_ids.end()); + _ordered_host_port_ids.erase(--_ordered_host_port_ids.end()); } } return true; @@ -650,8 +676,8 @@ bool copy_replica_operation::start(migration_list *result) const partition_set *copy_replica_operation::get_all_partitions() { - int id_max = *_ordered_address_ids.rbegin(); - const node_state &ns = _nodes.find(_address_vec[id_max])->second; + int id_max = *_ordered_host_port_ids.rbegin(); + const node_state &ns = _nodes.find(_host_port_vec[id_max])->second; const partition_set *partitions = ns.partitions(_app->app_id, only_copy_primary()); return partitions; } @@ -659,8 +685,8 @@ const partition_set *copy_replica_operation::get_all_partitions() gpid copy_replica_operation::select_max_load_gpid(const partition_set *partitions, migration_list *result) { - int id_max = *_ordered_address_ids.rbegin(); - const disk_load &load_on_max = _node_loads.at(_address_vec[id_max]); + int id_max = *_ordered_host_port_ids.rbegin(); + const disk_load &load_on_max = _node_loads.at(_host_port_vec[id_max]); gpid selected_pid(-1, -1); int max_load = -1; @@ -669,7 +695,7 @@ gpid copy_replica_operation::select_max_load_gpid(const partition_set *partition continue; } - const std::string &disk_tag = get_disk_tag(_apps, _address_vec[id_max], pid); + const std::string &disk_tag = get_disk_tag(_apps, _host_port_vec[id_max], pid); auto load = load_on_max.at(disk_tag); if (load > max_load) { selected_pid = pid; @@ -681,33 +707,39 @@ gpid copy_replica_operation::select_max_load_gpid(const partition_set *partition void copy_replica_operation::copy_once(gpid selected_pid, migration_list *result) { - auto from = _address_vec[*_ordered_address_ids.rbegin()]; - auto to = _address_vec[*_ordered_address_ids.begin()]; + auto from = _host_port_vec[*_ordered_host_port_ids.rbegin()]; + auto to = _host_port_vec[*_ordered_host_port_ids.begin()]; auto pc = _app->partitions[selected_pid.get_partition_index()]; - auto request = generate_balancer_request(_apps, pc, get_balance_type(), from, to); + auto request = generate_balancer_request(_apps, + pc, + get_balance_type(), + _dns_resolver->resolve_address(from), + _dns_resolver->resolve_address(to), + from, + to); result->emplace(selected_pid, request); } -void copy_replica_operation::update_ordered_address_ids() +void copy_replica_operation::update_ordered_host_port_ids() { - int id_min = *_ordered_address_ids.begin(); - int id_max = *_ordered_address_ids.rbegin(); + int id_min = *_ordered_host_port_ids.begin(); + int id_max = *_ordered_host_port_ids.rbegin(); --_partition_counts[id_max]; ++_partition_counts[id_min]; - _ordered_address_ids.erase(_ordered_address_ids.begin()); - _ordered_address_ids.erase(--_ordered_address_ids.end()); + _ordered_host_port_ids.erase(_ordered_host_port_ids.begin()); + _ordered_host_port_ids.erase(--_ordered_host_port_ids.end()); - _ordered_address_ids.insert(id_max); - _ordered_address_ids.insert(id_min); + _ordered_host_port_ids.insert(id_max); + _ordered_host_port_ids.insert(id_min); } -void copy_replica_operation::init_ordered_address_ids() +void copy_replica_operation::init_ordered_host_port_ids() { - _partition_counts.resize(_address_vec.size(), 0); + _partition_counts.resize(_host_port_vec.size(), 0); for (const auto &iter : _nodes) { - auto id = _address_id.at(iter.first); + auto id = _host_port_id.at(iter.first); _partition_counts[id] = get_partition_count(iter.second); } @@ -718,19 +750,21 @@ void copy_replica_operation::init_ordered_address_ids() : left < right; }); for (const auto &iter : _nodes) { - auto id = _address_id.at(iter.first); + auto id = _host_port_id.at(iter.first); ordered_queue.insert(id); } - _ordered_address_ids.swap(ordered_queue); + _ordered_host_port_ids.swap(ordered_queue); } gpid copy_replica_operation::select_partition(migration_list *result) { const partition_set *partitions = get_all_partitions(); - int id_max = *_ordered_address_ids.rbegin(); - const node_state &ns = _nodes.find(_address_vec[id_max])->second; - CHECK(partitions != nullptr && !partitions->empty(), "max load({}) shouldn't empty", ns.addr()); + int id_max = *_ordered_host_port_ids.rbegin(); + const node_state &ns = _nodes.find(_host_port_vec[id_max])->second; + CHECK(partitions != nullptr && !partitions->empty(), + "max load({}) shouldn't empty", + ns.host_port()); return select_max_load_gpid(partitions, result); } @@ -739,11 +773,12 @@ copy_primary_operation::copy_primary_operation( const std::shared_ptr app, const app_mapper &apps, node_mapper &nodes, - const std::vector &address_vec, - const std::unordered_map &address_id, + const std::vector &host_port_vec, + const std::unordered_map &host_port_id, bool have_lower_than_average, - int replicas_low) - : copy_replica_operation(app, apps, nodes, address_vec, address_id) + int replicas_low, + const std::shared_ptr &resolver) + : copy_replica_operation(app, apps, nodes, host_port_vec, host_port_id, resolver) { _have_lower_than_average = have_lower_than_average; _replicas_low = replicas_low; @@ -761,14 +796,14 @@ bool copy_primary_operation::can_select(gpid pid, migration_list *result) bool copy_primary_operation::can_continue() { - int id_min = *_ordered_address_ids.begin(); + int id_min = *_ordered_host_port_ids.begin(); if (_have_lower_than_average && _partition_counts[id_min] >= _replicas_low) { LOG_INFO("{}: stop the copy due to primaries on all nodes will reach low later.", _app->get_logname()); return false; } - int id_max = *_ordered_address_ids.rbegin(); + int id_max = *_ordered_host_port_ids.rbegin(); if (!_have_lower_than_average && _partition_counts[id_max] - _partition_counts[id_min] <= 1) { LOG_INFO("{}: stop the copy due to the primary will be balanced later.", _app->get_logname()); diff --git a/src/meta/load_balance_policy.h b/src/meta/load_balance_policy.h index f0e06bf0b6..ee901a94dd 100644 --- a/src/meta/load_balance_policy.h +++ b/src/meta/load_balance_policy.h @@ -33,16 +33,19 @@ #include "common/gpid.h" #include "common/replication_other_types.h" #include "meta_data.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/enum_helper.h" #include "utils/zlocks.h" namespace dsn { class command_deregister; +class dns_resolver; class partition_configuration; +class rpc_address; namespace replication { class configuration_balancer_request; +class meta_service; // disk_tag->primary_count/total_count_on_this_disk typedef std::map disk_load; @@ -63,18 +66,19 @@ ENUM_END(balance_type) bool calc_disk_load(node_mapper &nodes, const app_mapper &apps, app_id id, - const rpc_address &node, + const host_port &node, bool only_primary, /*out*/ disk_load &load); -const std::string &get_disk_tag(const app_mapper &apps, const rpc_address &node, const gpid &pid); +const std::string &get_disk_tag(const app_mapper &apps, const host_port &node, const gpid &pid); std::shared_ptr generate_balancer_request(const app_mapper &apps, const partition_configuration &pc, const balance_type &type, const rpc_address &from, - const rpc_address &to); + const rpc_address &to, + const host_port &hp_from, + const host_port &hp_to); -class meta_service; struct flow_path; class load_balance_policy @@ -105,8 +109,8 @@ class load_balance_policy int _alive_nodes; // this is used to assign an integer id for every node // and these are generated from the above data, which are tempory too - std::unordered_map address_id; - std::vector address_vec; + std::unordered_map host_port_id; + std::vector host_port_vec; // the app set which won't be re-balanced dsn::zrwlock_nr _balancer_ignored_apps_lock; // { @@ -116,19 +120,19 @@ class load_balance_policy private: void start_moving_primary(const std::shared_ptr &app, - const rpc_address &from, - const rpc_address &to, + const host_port &from, + const host_port &to, int plan_moving, disk_load *prev_load, disk_load *current_load); std::list calc_potential_moving(const std::shared_ptr &app, - const rpc_address &from, - const rpc_address &to); + const host_port &from, + const host_port &to); dsn::gpid select_moving(std::list &potential_moving, disk_load *prev_load, disk_load *current_load, - rpc_address from, - rpc_address to); + host_port from, + host_port to); void number_nodes(const node_mapper &nodes); std::string remote_command_balancer_ignored_app_ids(const std::vector &args); @@ -160,7 +164,7 @@ class ford_fulkerson ford_fulkerson() = delete; ford_fulkerson(const std::shared_ptr &app, const node_mapper &nodes, - const std::unordered_map &address_id, + const std::unordered_map &host_port_id, uint32_t higher_count, uint32_t lower_count, int replicas_low); @@ -174,8 +178,8 @@ class ford_fulkerson public: builder(const std::shared_ptr &app, const node_mapper &nodes, - const std::unordered_map &address_id) - : _app(app), _nodes(nodes), _address_id(address_id) + const std::unordered_map &host_port_id) + : _app(app), _nodes(nodes), _host_port_id(host_port_id) { } @@ -199,13 +203,13 @@ class ford_fulkerson return nullptr; } return std::make_unique( - _app, _nodes, _address_id, higher_count, lower_count, replicas_low); + _app, _nodes, _host_port_id, higher_count, lower_count, replicas_low); } private: const std::shared_ptr &_app; const node_mapper &_nodes; - const std::unordered_map &_address_id; + const std::unordered_map &_host_port_id; }; private: @@ -224,7 +228,7 @@ class ford_fulkerson const std::shared_ptr &_app; const node_mapper &_nodes; - const std::unordered_map &_address_id; + const std::unordered_map &_host_port_id; uint32_t _higher_count; uint32_t _lower_count; int _replicas_low; @@ -244,35 +248,38 @@ class copy_replica_operation copy_replica_operation(const std::shared_ptr app, const app_mapper &apps, node_mapper &nodes, - const std::vector &address_vec, - const std::unordered_map &address_id); + const std::vector &host_port_vec, + const std::unordered_map &host_port_id, + const std::shared_ptr &resolver); virtual ~copy_replica_operation() = default; bool start(migration_list *result); protected: - void init_ordered_address_ids(); + void init_ordered_host_port_ids(); virtual int get_partition_count(const node_state &ns) const = 0; gpid select_partition(migration_list *result); const partition_set *get_all_partitions(); gpid select_max_load_gpid(const partition_set *partitions, migration_list *result); void copy_once(gpid selected_pid, migration_list *result); - void update_ordered_address_ids(); + void update_ordered_host_port_ids(); virtual bool only_copy_primary() = 0; virtual bool can_select(gpid pid, migration_list *result) = 0; virtual bool can_continue() = 0; virtual balance_type get_balance_type() = 0; - std::set> _ordered_address_ids; + std::set> _ordered_host_port_ids; const std::shared_ptr _app; const app_mapper &_apps; node_mapper &_nodes; - const std::vector &_address_vec; - const std::unordered_map &_address_id; - std::unordered_map _node_loads; + const std::vector &_host_port_vec; + const std::unordered_map &_host_port_id; + std::unordered_map _node_loads; std::vector _partition_counts; + std::shared_ptr _dns_resolver; + FRIEND_TEST(copy_primary_operation, misc); FRIEND_TEST(copy_replica_operation, get_all_partitions); }; @@ -283,10 +290,11 @@ class copy_primary_operation : public copy_replica_operation copy_primary_operation(const std::shared_ptr app, const app_mapper &apps, node_mapper &nodes, - const std::vector &address_vec, - const std::unordered_map &address_id, + const std::vector &host_port_vec, + const std::unordered_map &host_port_id, bool have_lower_than_average, - int replicas_low); + int replicas_low, + const std::shared_ptr &resolver); ~copy_primary_operation() = default; private: diff --git a/src/meta/meta_backup_service.cpp b/src/meta/meta_backup_service.cpp index 2ed5425c5f..5f73647179 100644 --- a/src/meta/meta_backup_service.cpp +++ b/src/meta/meta_backup_service.cpp @@ -37,7 +37,7 @@ #include "meta_backup_service.h" #include "meta_service.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_holder.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" @@ -134,7 +134,7 @@ void policy_context::start_backup_app_meta_unlocked(int32_t app_id) int total_partitions = iter->second; for (int32_t pidx = 0; pidx < total_partitions; ++pidx) { update_partition_progress_unlocked( - gpid(app_id, pidx), cold_backup_constant::PROGRESS_FINISHED, dsn::rpc_address()); + gpid(app_id, pidx), cold_backup_constant::PROGRESS_FINISHED, dsn::host_port()); } return; } @@ -437,7 +437,7 @@ void policy_context::write_backup_info_unlocked(const backup_info &b_info, bool policy_context::update_partition_progress_unlocked(gpid pid, int32_t progress, - const rpc_address &source) + const host_port &source) { int32_t &local_progress = _progress.partition_progress[pid]; if (local_progress == cold_backup_constant::PROGRESS_FINISHED) { @@ -488,7 +488,7 @@ void policy_context::record_partition_checkpoint_size_unlock(const gpid &pid, in void policy_context::start_backup_partition_unlocked(gpid pid) { - dsn::rpc_address partition_primary; + dsn::host_port partition_primary; { // check app and partition status zauto_read_lock l; @@ -500,10 +500,10 @@ void policy_context::start_backup_partition_unlocked(gpid pid) "{}: app {} is not available, skip to backup it.", _backup_sig, pid.get_app_id()); _progress.is_app_skipped[pid.get_app_id()] = true; update_partition_progress_unlocked( - pid, cold_backup_constant::PROGRESS_FINISHED, dsn::rpc_address()); + pid, cold_backup_constant::PROGRESS_FINISHED, dsn::host_port()); return; } - partition_primary = app->partitions[pid.get_partition_index()].primary; + partition_primary = app->partitions[pid.get_partition_index()].hp_primary; } if (partition_primary.is_invalid()) { LOG_WARNING("{}: partition {} doesn't have a primary now, retry to backup it later", @@ -544,7 +544,7 @@ void policy_context::start_backup_partition_unlocked(gpid pid) void policy_context::on_backup_reply(error_code err, backup_response &&response, gpid pid, - const rpc_address &primary) + const host_port &primary) { LOG_INFO( "{}: receive backup response for partition {} from server {}.", _backup_sig, pid, primary); @@ -1695,7 +1695,8 @@ void backup_service::start_backup_app(start_backup_app_rpc rpc) }); int32_t app_id = request.app_id; - std::shared_ptr engine = std::make_shared(this); + std::shared_ptr engine = + std::make_shared(this, _meta_svc->get_dns_resolver()); error_code err = engine->init_backup(app_id); if (err != ERR_OK) { response.err = err; diff --git a/src/meta/meta_backup_service.h b/src/meta/meta_backup_service.h index e382a8922c..f767ad2be3 100644 --- a/src/meta/meta_backup_service.h +++ b/src/meta/meta_backup_service.h @@ -49,7 +49,7 @@ namespace dsn { class message_ex; -class rpc_address; +class host_port; namespace dist { namespace block_service { @@ -297,7 +297,7 @@ mock_private : // mock_virtual bool - update_partition_progress_unlocked(gpid pid, int32_t progress, const rpc_address &source); + update_partition_progress_unlocked(gpid pid, int32_t progress, const host_port &source); mock_virtual void record_partition_checkpoint_size_unlock(const gpid& pid, int64_t size); mock_virtual void start_backup_app_meta_unlocked(int32_t app_id); @@ -326,7 +326,7 @@ mock_private : mock_virtual void on_backup_reply(dsn::error_code err, backup_response &&response, gpid pid, - const rpc_address &primary); + const host_port &primary); mock_virtual void gc_backup_info_unlocked(const backup_info &info_to_gc); mock_virtual void issue_gc_backup_info_task_unlocked(); diff --git a/src/meta/meta_bulk_load_ingestion_context.cpp b/src/meta/meta_bulk_load_ingestion_context.cpp index fbb2c775e7..4434a49190 100644 --- a/src/meta/meta_bulk_load_ingestion_context.cpp +++ b/src/meta/meta_bulk_load_ingestion_context.cpp @@ -48,9 +48,9 @@ void ingestion_context::partition_node_info::create(const partition_configuratio const config_context &cc) { pid = config.pid; - std::unordered_set current_nodes; - current_nodes.insert(config.primary); - for (const auto &secondary : config.secondaries) { + std::unordered_set current_nodes; + current_nodes.insert(config.hp_primary); + for (const auto &secondary : config.hp_secondaries) { current_nodes.insert(secondary); } for (const auto &node : current_nodes) { @@ -139,7 +139,7 @@ bool ingestion_context::try_partition_ingestion(const partition_configuration &c return true; } -bool ingestion_context::check_node_ingestion(const rpc_address &node, const std::string &disk_tag) +bool ingestion_context::check_node_ingestion(const host_port &node, const std::string &disk_tag) { if (_nodes_context.find(node) == _nodes_context.end()) { _nodes_context[node] = node_context(node, disk_tag); diff --git a/src/meta/meta_bulk_load_ingestion_context.h b/src/meta/meta_bulk_load_ingestion_context.h index 50e7872d5f..b64f43b73d 100644 --- a/src/meta/meta_bulk_load_ingestion_context.h +++ b/src/meta/meta_bulk_load_ingestion_context.h @@ -22,7 +22,7 @@ #include #include "common/gpid.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/flags.h" namespace dsn { @@ -46,7 +46,7 @@ class ingestion_context { gpid pid; // node address -> disk_tag - std::unordered_map node_disk; + std::unordered_map node_disk; partition_node_info() {} partition_node_info(const partition_configuration &config, const config_context &cc) @@ -58,13 +58,13 @@ class ingestion_context struct node_context { - rpc_address address; + host_port address; uint32_t node_ingesting_count; // disk tag -> ingesting partition count std::unordered_map disk_ingesting_counts; node_context() {} - node_context(const rpc_address &address, const std::string &disk_tag) + node_context(const host_port &address, const std::string &disk_tag) : address(address), node_ingesting_count(0) { init_disk(disk_tag); @@ -78,7 +78,7 @@ class ingestion_context }; bool try_partition_ingestion(const partition_configuration &config, const config_context &cc); - bool check_node_ingestion(const rpc_address &node, const std::string &disk_tag); + bool check_node_ingestion(const host_port &node, const std::string &disk_tag); void add_partition(const partition_node_info &info); void remove_partition(const gpid &pid); uint32_t get_app_ingesting_count(const uint32_t app_id) const; @@ -93,7 +93,7 @@ class ingestion_context // ingesting partitions std::unordered_map _running_partitions; // every node and every disk ingesting partition count - std::unordered_map _nodes_context; + std::unordered_map _nodes_context; }; } // namespace replication diff --git a/src/meta/meta_bulk_load_service.cpp b/src/meta/meta_bulk_load_service.cpp index 8648cf9892..e04a06a7ff 100644 --- a/src/meta/meta_bulk_load_service.cpp +++ b/src/meta/meta_bulk_load_service.cpp @@ -37,6 +37,8 @@ #include "meta/meta_state_service.h" #include "meta/server_state.h" #include "meta_admin_types.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_holder.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" @@ -367,7 +369,7 @@ bool bulk_load_service::check_partition_status( } pconfig = app->partitions[pid.get_partition_index()]; - if (pconfig.primary.is_invalid()) { + if (pconfig.hp_primary.is_invalid()) { LOG_WARNING("app({}) partition({}) primary is invalid, try it later", app_name, pid); tasking::enqueue(LPC_META_STATE_NORMAL, _meta_svc->tracker(), @@ -377,7 +379,7 @@ bool bulk_load_service::check_partition_status( return false; } - if (pconfig.secondaries.size() < pconfig.max_replica_count - 1) { + if (pconfig.hp_secondaries.size() < pconfig.max_replica_count - 1) { bulk_load_status::type p_status; { zauto_read_lock l(_lock); @@ -422,7 +424,8 @@ void bulk_load_service::partition_bulk_load(const std::string &app_name, const g return; } - rpc_address primary_addr = pconfig.primary; + auto primary_addr = pconfig.primary; + auto primary_hp = pconfig.hp_primary; auto req = std::make_unique(); { zauto_read_lock l(_lock); @@ -430,6 +433,7 @@ void bulk_load_service::partition_bulk_load(const std::string &app_name, const g req->pid = pid; req->app_name = app_name; req->primary_addr = primary_addr; + req->__set_hp_primary(primary_hp); req->remote_provider_name = ainfo.file_provider_type; req->cluster_name = ainfo.cluster_name; req->meta_bulk_load_status = get_partition_bulk_load_status_unlocked(pid); @@ -438,8 +442,9 @@ void bulk_load_service::partition_bulk_load(const std::string &app_name, const g req->remote_root_path = ainfo.remote_root_path; } - LOG_INFO("send bulk load request to node({}), app({}), partition({}), partition " + LOG_INFO("send bulk load request to node({}({})), app({}), partition({}), partition " "status = {}, remote provider = {}, cluster_name = {}, remote_root_path = {}", + primary_hp, primary_addr, app_name, pid, @@ -450,6 +455,17 @@ void bulk_load_service::partition_bulk_load(const std::string &app_name, const g bulk_load_rpc rpc(std::move(req), RPC_BULK_LOAD, 0_ms, 0, pid.thread_hash()); rpc.call(primary_addr, _meta_svc->tracker(), [this, rpc](error_code err) mutable { + // fill host_port struct if needed + // remote server maybe not supported host_post, just have address + auto &bulk_load_resp = rpc.response(); + if (!bulk_load_resp.__isset.hp_group_bulk_load_state) { + bulk_load_resp.__set_hp_group_bulk_load_state({}); + for (const auto &kv : bulk_load_resp.group_bulk_load_state) { + auto hp = host_port(kv.first); + bulk_load_resp.hp_group_bulk_load_state[hp] = kv.second; + } + } + on_partition_bulk_load_reply(err, rpc.request(), rpc.response()); }); } @@ -461,15 +477,17 @@ void bulk_load_service::on_partition_bulk_load_reply(error_code err, { const std::string &app_name = request.app_name; const gpid &pid = request.pid; - const rpc_address &primary_addr = request.primary_addr; + const auto &primary_addr = request.primary_addr; + const auto &primary_hp = request.hp_primary; if (err != ERR_OK) { - LOG_ERROR( - "app({}), partition({}) failed to receive bulk load response from node({}), error = {}", - app_name, - pid, - primary_addr, - err); + LOG_ERROR("app({}), partition({}) failed to receive bulk load response from node({}({})), " + "error = {}", + app_name, + pid, + primary_hp, + primary_addr, + err); try_rollback_to_downloading(app_name, pid); try_resend_bulk_load_request(app_name, pid); return; @@ -477,9 +495,10 @@ void bulk_load_service::on_partition_bulk_load_reply(error_code err, if (response.err == ERR_OBJECT_NOT_FOUND || response.err == ERR_INVALID_STATE) { LOG_ERROR( - "app({}), partition({}) doesn't exist or has invalid state on node({}), error = {}", + "app({}), partition({}) doesn't exist or has invalid state on node({}({})), error = {}", app_name, pid, + primary_hp, primary_addr, response.err); try_rollback_to_downloading(app_name, pid); @@ -489,8 +508,9 @@ void bulk_load_service::on_partition_bulk_load_reply(error_code err, if (response.err == ERR_BUSY) { LOG_WARNING( - "node({}) has enough replicas downloading, wait for next round to send bulk load " + "node({}({})) has enough replicas downloading, wait for next round to send bulk load " "request for app({}), partition({})", + primary_hp, primary_addr, app_name, pid); @@ -499,13 +519,15 @@ void bulk_load_service::on_partition_bulk_load_reply(error_code err, } if (response.err != ERR_OK) { - LOG_ERROR("app({}), partition({}) from node({}) handle bulk load response failed, error = " - "{}, primary status = {}", - app_name, - pid, - primary_addr, - response.err, - dsn::enum_to_string(response.primary_bulk_load_status)); + LOG_ERROR( + "app({}), partition({}) from node({}({})) handle bulk load response failed, error = " + "{}, primary status = {}", + app_name, + pid, + primary_hp, + primary_addr, + response.err, + dsn::enum_to_string(response.primary_bulk_load_status)); handle_bulk_load_failed(pid.get_app_id(), response.err); try_resend_bulk_load_request(app_name, pid); return; @@ -538,7 +560,7 @@ void bulk_load_service::on_partition_bulk_load_reply(error_code err, bulk_load_status::type app_status = get_app_bulk_load_status(response.pid.get_app_id()); switch (app_status) { case bulk_load_status::BLS_DOWNLOADING: - handle_app_downloading(response, primary_addr); + handle_app_downloading(response, primary_hp); break; case bulk_load_status::BLS_DOWNLOADED: update_partition_info_on_remote_storage( @@ -546,15 +568,15 @@ void bulk_load_service::on_partition_bulk_load_reply(error_code err, // when app status is downloaded or ingesting, send request frequently break; case bulk_load_status::BLS_INGESTING: - handle_app_ingestion(response, primary_addr); + handle_app_ingestion(response, primary_hp); break; case bulk_load_status::BLS_SUCCEED: case bulk_load_status::BLS_FAILED: case bulk_load_status::BLS_CANCELED: - handle_bulk_load_finish(response, primary_addr); + handle_bulk_load_finish(response, primary_hp); break; case bulk_load_status::BLS_PAUSING: - handle_app_pausing(response, primary_addr); + handle_app_pausing(response, primary_hp); break; case bulk_load_status::BLS_PAUSED: // paused not send request to replica servers @@ -583,7 +605,7 @@ void bulk_load_service::try_resend_bulk_load_request(const std::string &app_name // ThreadPool: THREAD_POOL_META_STATE void bulk_load_service::handle_app_downloading(const bulk_load_response &response, - const rpc_address &primary_addr) + const host_port &primary_addr) { const std::string &app_name = response.app_name; const gpid &pid = response.pid; @@ -599,7 +621,7 @@ void bulk_load_service::handle_app_downloading(const bulk_load_response &respons return; } - for (const auto &kv : response.group_bulk_load_state) { + for (const auto &kv : response.hp_group_bulk_load_state) { const auto &bulk_load_states = kv.second; if (!bulk_load_states.__isset.download_progress || !bulk_load_states.__isset.download_status) { @@ -652,7 +674,7 @@ void bulk_load_service::handle_app_downloading(const bulk_load_response &respons { zauto_write_lock l(_lock); _partitions_total_download_progress[pid] = total_progress; - _partitions_bulk_load_state[pid] = response.group_bulk_load_state; + _partitions_bulk_load_state[pid] = response.hp_group_bulk_load_state; } // update partition status to `downloaded` if all replica downloaded @@ -665,7 +687,7 @@ void bulk_load_service::handle_app_downloading(const bulk_load_response &respons // ThreadPool: THREAD_POOL_META_STATE void bulk_load_service::handle_app_ingestion(const bulk_load_response &response, - const rpc_address &primary_addr) + const host_port &primary_addr) { const std::string &app_name = response.app_name; const gpid &pid = response.pid; @@ -680,7 +702,7 @@ void bulk_load_service::handle_app_ingestion(const bulk_load_response &response, return; } - for (const auto &kv : response.group_bulk_load_state) { + for (const auto &kv : response.hp_group_bulk_load_state) { const auto &bulk_load_states = kv.second; if (!bulk_load_states.__isset.ingest_status) { LOG_WARNING("receive bulk load response from node({}) app({}) partition({}), " @@ -711,7 +733,7 @@ void bulk_load_service::handle_app_ingestion(const bulk_load_response &response, response.is_group_ingestion_finished); { zauto_write_lock l(_lock); - _partitions_bulk_load_state[pid] = response.group_bulk_load_state; + _partitions_bulk_load_state[pid] = response.hp_group_bulk_load_state; } if (response.is_group_ingestion_finished) { @@ -723,7 +745,7 @@ void bulk_load_service::handle_app_ingestion(const bulk_load_response &response, // ThreadPool: THREAD_POOL_META_STATE void bulk_load_service::handle_bulk_load_finish(const bulk_load_response &response, - const rpc_address &primary_addr) + const host_port &primary_addr) { const std::string &app_name = response.app_name; const gpid &pid = response.pid; @@ -738,7 +760,7 @@ void bulk_load_service::handle_bulk_load_finish(const bulk_load_response &respon return; } - for (const auto &kv : response.group_bulk_load_state) { + for (const auto &kv : response.hp_group_bulk_load_state) { if (!kv.second.__isset.is_cleaned_up) { LOG_WARNING("receive bulk load response from node({}) app({}), partition({}), " "primary_status({}), but node({}) is_cleaned_up is not set", @@ -776,7 +798,7 @@ void bulk_load_service::handle_bulk_load_finish(const bulk_load_response &respon { zauto_write_lock l(_lock); _partitions_cleaned_up[pid] = group_cleaned_up; - _partitions_bulk_load_state[pid] = response.group_bulk_load_state; + _partitions_bulk_load_state[pid] = response.hp_group_bulk_load_state; } if (group_cleaned_up) { @@ -804,7 +826,7 @@ void bulk_load_service::handle_bulk_load_finish(const bulk_load_response &respon // ThreadPool: THREAD_POOL_META_STATE void bulk_load_service::handle_app_pausing(const bulk_load_response &response, - const rpc_address &primary_addr) + const host_port &primary_addr) { const std::string &app_name = response.app_name; const gpid &pid = response.pid; @@ -819,7 +841,7 @@ void bulk_load_service::handle_app_pausing(const bulk_load_response &response, return; } - for (const auto &kv : response.group_bulk_load_state) { + for (const auto &kv : response.hp_group_bulk_load_state) { if (!kv.second.__isset.is_paused) { LOG_WARNING("receive bulk load response from node({}) app({}), partition({}), " "primary_status({}), but node({}) is_paused is not set", @@ -842,7 +864,7 @@ void bulk_load_service::handle_app_pausing(const bulk_load_response &response, is_group_paused); { zauto_write_lock l(_lock); - _partitions_bulk_load_state[pid] = response.group_bulk_load_state; + _partitions_bulk_load_state[pid] = response.hp_group_bulk_load_state; } if (is_group_paused) { @@ -1013,10 +1035,10 @@ void bulk_load_service::update_partition_info_unlock(const gpid &pid, // no need to update other field of partition_bulk_load_info return; } - pinfo.addresses.clear(); + pinfo.host_ports.clear(); const auto &state = _partitions_bulk_load_state[pid]; for (const auto &kv : state) { - pinfo.addresses.emplace_back(kv.first); + pinfo.host_ports.emplace_back(kv.first); } pinfo.ever_ingest_succeed = true; } @@ -1187,15 +1209,15 @@ bool bulk_load_service::check_ever_ingestion_succeed(const partition_configurati return false; } - std::vector current_nodes; - current_nodes.emplace_back(config.primary); - for (const auto &secondary : config.secondaries) { + std::vector current_nodes; + current_nodes.emplace_back(config.hp_primary); + for (const auto &secondary : config.hp_secondaries) { current_nodes.emplace_back(secondary); } - std::sort(pinfo.addresses.begin(), pinfo.addresses.end()); + std::sort(pinfo.host_ports.begin(), pinfo.host_ports.end()); std::sort(current_nodes.begin(), current_nodes.end()); - if (current_nodes == pinfo.addresses) { + if (current_nodes == pinfo.host_ports) { LOG_INFO("app({}) partition({}) has already executed ingestion succeed", app_name, pid); update_partition_info_on_remote_storage(app_name, pid, bulk_load_status::BLS_SUCCEED); return true; @@ -1257,7 +1279,7 @@ void bulk_load_service::partition_ingestion(const std::string &app_name, const g return; } - rpc_address primary_addr = pconfig.primary; + host_port primary_addr = pconfig.hp_primary; ballot meta_ballot = pconfig.ballot; tasking::enqueue(LPC_BULK_LOAD_INGESTION, _meta_svc->tracker(), @@ -1274,7 +1296,7 @@ void bulk_load_service::partition_ingestion(const std::string &app_name, const g // ThreadPool: THREAD_POOL_DEFAULT void bulk_load_service::send_ingestion_request(const std::string &app_name, const gpid &pid, - const rpc_address &primary_addr, + const host_port &primary_addr, const ballot &meta_ballot) { ingestion_request req; @@ -1309,7 +1331,7 @@ void bulk_load_service::on_partition_ingestion_reply(error_code err, const ingestion_response &&resp, const std::string &app_name, const gpid &pid, - const rpc_address &primary_addr) + const host_port &primary_addr) { if (err != ERR_OK || resp.err != ERR_OK || resp.rocksdb_error != ERR_OK) { finish_ingestion(pid); @@ -1595,9 +1617,18 @@ void bulk_load_service::on_query_bulk_load_status(query_bulk_load_rpc rpc) } response.bulk_load_states.resize(partition_count); + response.__set_hp_bulk_load_states( + std::vector>(partition_count)); for (const auto &kv : _partitions_bulk_load_state) { if (kv.first.get_app_id() == app_id) { - response.bulk_load_states[kv.first.get_partition_index()] = kv.second; + auto pidx = kv.first.get_partition_index(); + response.hp_bulk_load_states[pidx] = kv.second; + + std::map addr_pbls; + for (const auto &bls : kv.second) { + addr_pbls[_meta_svc->get_dns_resolver()->resolve_address(bls.first)] = bls.second; + } + response.bulk_load_states[pidx] = addr_pbls; } } diff --git a/src/meta/meta_bulk_load_service.h b/src/meta/meta_bulk_load_service.h index c411d87f44..98258fc9d3 100644 --- a/src/meta/meta_bulk_load_service.h +++ b/src/meta/meta_bulk_load_service.h @@ -35,7 +35,7 @@ #include "common/replication_other_types.h" #include "meta/meta_state_service_utils.h" #include "meta_bulk_load_ingestion_context.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task_tracker.h" #include "server_state.h" #include "utils/error_code.h" @@ -87,8 +87,8 @@ struct partition_bulk_load_info bulk_load_status::type status; bulk_load_metadata metadata; bool ever_ingest_succeed; - std::vector addresses; - DEFINE_JSON_SERIALIZATION(status, metadata, ever_ingest_succeed, addresses) + std::vector host_ports; + DEFINE_JSON_SERIALIZATION(status, metadata, ever_ingest_succeed, host_ports) }; // Used for remote file provider @@ -200,17 +200,15 @@ class bulk_load_service // if app is still in bulk load, resend bulk_load_request to primary after interval seconds void try_resend_bulk_load_request(const std::string &app_name, const gpid &pid); - void handle_app_downloading(const bulk_load_response &response, - const rpc_address &primary_addr); + void handle_app_downloading(const bulk_load_response &response, const host_port &primary_addr); - void handle_app_ingestion(const bulk_load_response &response, const rpc_address &primary_addr); + void handle_app_ingestion(const bulk_load_response &response, const host_port &primary_addr); // when app status is `succeed, `failed`, `canceled`, meta and replica should cleanup bulk load // states - void handle_bulk_load_finish(const bulk_load_response &response, - const rpc_address &primary_addr); + void handle_bulk_load_finish(const bulk_load_response &response, const host_port &primary_addr); - void handle_app_pausing(const bulk_load_response &response, const rpc_address &primary_addr); + void handle_app_pausing(const bulk_load_response &response, const host_port &primary_addr); // app not existed or not available during bulk load void handle_app_unavailable(int32_t app_id, const std::string &app_name); @@ -225,14 +223,14 @@ class bulk_load_service void send_ingestion_request(const std::string &app_name, const gpid &pid, - const rpc_address &primary_addr, + const host_port &primary_addr, const ballot &meta_ballot); void on_partition_ingestion_reply(error_code err, const ingestion_response &&resp, const std::string &app_name, const gpid &pid, - const rpc_address &primary_addr); + const host_port &primary_addr); // Called by `partition_ingestion` // - true : this partition has ever executed ingestion succeed, no need to send ingestion @@ -519,7 +517,7 @@ class bulk_load_service // partition_index -> group total download progress std::unordered_map _partitions_total_download_progress; // partition_index -> group bulk load states(node address -> state) - std::unordered_map> + std::unordered_map> _partitions_bulk_load_state; std::unordered_map _partitions_cleaned_up; diff --git a/src/meta/meta_data.cpp b/src/meta/meta_data.cpp index a44180d4a0..7d101b15d8 100644 --- a/src/meta/meta_data.cpp +++ b/src/meta/meta_data.cpp @@ -31,6 +31,7 @@ #include "common/replication_enums.h" #include "meta_data.h" #include "runtime/api_layer1.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_message.h" #include "utils/flags.h" @@ -93,27 +94,10 @@ void when_update_replicas(config_type::type t, const std::function & } } -void maintain_drops(std::vector &drops, const rpc_address &node, config_type::type t) -{ - auto action = [&drops, &node](bool is_adding) { - auto it = std::find(drops.begin(), drops.end(), node); - if (is_adding) { - if (it != drops.end()) { - drops.erase(it); - } - } else { - CHECK( - it == drops.end(), "the node({}) cannot be in drops set before this update", node); - drops.push_back(node); - if (drops.size() > 3) { - drops.erase(drops.begin()); - } - } - }; - when_update_replicas(t, action); -} - -bool construct_replica(meta_view view, const gpid &pid, int max_replica_count) +bool construct_replica(meta_view view, + const gpid &pid, + int max_replica_count, + const std::shared_ptr &resolver) { partition_configuration &pc = *get_config(*view.apps, pid); config_context &cc = *get_config_context(*view.apps, pid); @@ -133,7 +117,8 @@ bool construct_replica(meta_view view, const gpid &pid, int max_replica_count) invalid_ballot, "the ballot of server must not be invalid_ballot, node = {}", server.node); - pc.primary = server.node; + pc.primary = resolver->resolve_address(server.node); + pc.__set_hp_primary(server.node); pc.ballot = server.ballot; pc.partition_flags = 0; pc.max_replica_count = max_replica_count; @@ -151,12 +136,13 @@ bool construct_replica(meta_view view, const gpid &pid, int max_replica_count) // we put max_replica_count-1 recent replicas to last_drops, in case of the DDD-state when the // only primary dead // when add node to pc.last_drops, we don't remove it from our cc.drop_list - CHECK(pc.last_drops.empty(), "last_drops of partition({}) must be empty", pid); + CHECK(pc.hp_last_drops.empty(), "last_drops of partition({}) must be empty", pid); for (auto iter = drop_list.rbegin(); iter != drop_list.rend(); ++iter) { - if (pc.last_drops.size() + 1 >= max_replica_count) + if (pc.hp_last_drops.size() + 1 >= max_replica_count) break; // similar to cc.drop_list, pc.last_drop is also a stack structure - pc.last_drops.insert(pc.last_drops.begin(), iter->node); + pc.last_drops.insert(pc.last_drops.begin(), resolver->resolve_address(iter->node)); + pc.hp_last_drops.insert(pc.hp_last_drops.begin(), iter->node); LOG_INFO("construct for ({}), select {} into last_drops, ballot({}), " "committed_decree({}), prepare_decree({})", pid, @@ -170,7 +156,7 @@ bool construct_replica(meta_view view, const gpid &pid, int max_replica_count) return true; } -bool collect_replica(meta_view view, const rpc_address &node, const replica_info &info) +bool collect_replica(meta_view view, const host_port &node, const replica_info &info) { partition_configuration &pc = *get_config(*view.apps, info.pid); // current partition is during partition split @@ -204,12 +190,12 @@ void proposal_actions::reset_tracked_current_learner() current_learner.last_prepared_decree = invalid_decree; } -void proposal_actions::track_current_learner(const dsn::rpc_address &node, const replica_info &info) +void proposal_actions::track_current_learner(const dsn::host_port &node, const replica_info &info) { if (empty()) return; configuration_proposal_action &act = acts.front(); - if (act.node != node) + if (act.hp_node != node) return; // currently we only handle add secondary @@ -327,7 +313,7 @@ void config_context::check_size() } } -std::vector::iterator config_context::find_from_dropped(const rpc_address &node) +std::vector::iterator config_context::find_from_dropped(const host_port &node) { return std::find_if(dropped.begin(), dropped.end(), [&node](const dropped_replica &r) { return r.node == node; @@ -335,14 +321,14 @@ std::vector::iterator config_context::find_from_dropped(const r } std::vector::const_iterator -config_context::find_from_dropped(const rpc_address &node) const +config_context::find_from_dropped(const host_port &node) const { return std::find_if(dropped.begin(), dropped.end(), [&node](const dropped_replica &r) { return r.node == node; }); } -bool config_context::remove_from_dropped(const rpc_address &node) +bool config_context::remove_from_dropped(const host_port &node) { auto iter = find_from_dropped(node); if (iter != dropped.end()) { @@ -353,7 +339,7 @@ bool config_context::remove_from_dropped(const rpc_address &node) return false; } -bool config_context::record_drop_history(const rpc_address &node) +bool config_context::record_drop_history(const host_port &node) { auto iter = find_from_dropped(node); if (iter != dropped.end()) @@ -365,7 +351,7 @@ bool config_context::record_drop_history(const rpc_address &node) return true; } -int config_context::collect_drop_replica(const rpc_address &node, const replica_info &info) +int config_context::collect_drop_replica(const host_port &node, const replica_info &info) { bool in_dropped = false; auto iter = find_from_dropped(node); @@ -426,7 +412,7 @@ bool config_context::check_order() return true; } -std::vector::iterator config_context::find_from_serving(const rpc_address &node) +std::vector::iterator config_context::find_from_serving(const host_port &node) { return std::find_if(serving.begin(), serving.end(), [&node](const serving_replica &r) { return r.node == node; @@ -434,14 +420,14 @@ std::vector::iterator config_context::find_from_serving(const r } std::vector::const_iterator -config_context::find_from_serving(const rpc_address &node) const +config_context::find_from_serving(const host_port &node) const { return std::find_if(serving.begin(), serving.end(), [&node](const serving_replica &r) { return r.node == node; }); } -bool config_context::remove_from_serving(const rpc_address &node) +bool config_context::remove_from_serving(const host_port &node) { auto iter = find_from_serving(node); if (iter != serving.end()) { @@ -451,7 +437,7 @@ bool config_context::remove_from_serving(const rpc_address &node) return false; } -void config_context::collect_serving_replica(const rpc_address &node, const replica_info &info) +void config_context::collect_serving_replica(const host_port &node, const replica_info &info) { auto iter = find_from_serving(node); auto compact_status = info.__isset.manual_compact_status ? info.manual_compact_status @@ -465,12 +451,12 @@ void config_context::collect_serving_replica(const rpc_address &node, const repl } } -void config_context::adjust_proposal(const rpc_address &node, const replica_info &info) +void config_context::adjust_proposal(const host_port &node, const replica_info &info) { lb_actions.track_current_learner(node, info); } -bool config_context::get_disk_tag(const rpc_address &node, /*out*/ std::string &disk_tag) const +bool config_context::get_disk_tag(const host_port &node, /*out*/ std::string &disk_tag) const { auto iter = find_from_serving(node); if (iter == serving.end()) { @@ -549,6 +535,11 @@ app_state::app_state(const app_info &info) : app_info(info), helpers(new app_sta config.max_replica_count = app_info::max_replica_count; config.primary.set_invalid(); config.secondaries.clear(); + + config.__set_hp_primary(host_port()); + config.__set_hp_secondaries({}); + config.__set_hp_last_drops({}); + partitions.assign(app_info::partition_count, config); for (int i = 0; i != app_info::partition_count; ++i) partitions[i].pid.set_partition_index(i); diff --git a/src/meta/meta_data.h b/src/meta/meta_data.h index ea236fa5cc..b1799ba6e1 100644 --- a/src/meta/meta_data.h +++ b/src/meta/meta_data.h @@ -47,15 +47,17 @@ #include "meta_admin_types.h" #include "metadata_types.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task.h" #include "utils/autoref_ptr.h" #include "utils/blob.h" #include "utils/enum_helper.h" #include "utils/error_code.h" #include "utils/extensible_object.h" +#include "utils/fmt_logging.h" namespace dsn { +class dns_resolver; class message_ex; namespace replication { @@ -112,7 +114,7 @@ class proposal_actions public: proposal_actions(); void reset_tracked_current_learner(); - void track_current_learner(const rpc_address &node, const replica_info &info); + void track_current_learner(const host_port &node, const replica_info &info); void clear(); // return the action in acts & whether the action is from balancer @@ -143,7 +145,7 @@ class proposal_actions // if you modify the dropped_replica::INVALID_TIMESTAMP, please modify the dropped_cmp accordingly. struct dropped_replica { - dsn::rpc_address node; + dsn::host_port node; // if a drop-replica is generated by the update-cfg-req, then we can // record the drop time (milliseconds) @@ -184,7 +186,7 @@ inline int dropped_cmp(const dropped_replica &d1, const dropped_replica &d2) // Load balancer may use this to do balance decisions. struct serving_replica { - dsn::rpc_address node; + dsn::host_port node; // TODO: report the storage size of replica int64_t storage_mb; std::string disk_tag; @@ -222,37 +224,37 @@ class config_context void check_size(); void cancel_sync(); - std::vector::iterator find_from_dropped(const dsn::rpc_address &node); - std::vector::const_iterator find_from_dropped(const rpc_address &node) const; + std::vector::iterator find_from_dropped(const dsn::host_port &node); + std::vector::const_iterator find_from_dropped(const host_port &node) const; // return true if remove ok, false if node doesn't in dropped - bool remove_from_dropped(const dsn::rpc_address &node); + bool remove_from_dropped(const dsn::host_port &node); // put recently downgraded node to dropped // return true if put ok, false if the node has been in dropped - bool record_drop_history(const dsn::rpc_address &node); + bool record_drop_history(const dsn::host_port &node); // Notice: please make sure whether node is actually an inactive or a serving replica // ret: // 1 => node has been in the dropped // 0 => insert the info to the dropped // -1 => info is too staled to insert - int collect_drop_replica(const dsn::rpc_address &node, const replica_info &info); + int collect_drop_replica(const dsn::host_port &node, const replica_info &info); // check if dropped vector satisfied the order bool check_order(); - std::vector::iterator find_from_serving(const dsn::rpc_address &node); - std::vector::const_iterator find_from_serving(const rpc_address &node) const; + std::vector::iterator find_from_serving(const dsn::host_port &node); + std::vector::const_iterator find_from_serving(const host_port &node) const; // return true if remove ok, false if node doesn't in serving - bool remove_from_serving(const dsn::rpc_address &node); + bool remove_from_serving(const dsn::host_port &node); - void collect_serving_replica(const dsn::rpc_address &node, const replica_info &info); + void collect_serving_replica(const dsn::host_port &node, const replica_info &info); - void adjust_proposal(const dsn::rpc_address &node, const replica_info &info); + void adjust_proposal(const dsn::host_port &node, const replica_info &info); - bool get_disk_tag(const rpc_address &node, /*out*/ std::string &disk_tag) const; + bool get_disk_tag(const host_port &node, /*out*/ std::string &disk_tag) const; public: // intialize to 4 statically. @@ -264,19 +266,19 @@ struct partition_configuration_stateless { partition_configuration &config; partition_configuration_stateless(partition_configuration &pc) : config(pc) {} - std::vector &workers() { return config.last_drops; } - std::vector &hosts() { return config.secondaries; } - bool is_host(const rpc_address &node) const + std::vector &workers() { return config.hp_last_drops; } + std::vector &hosts() { return config.hp_secondaries; } + bool is_host(const host_port &node) const { - return std::find(config.secondaries.begin(), config.secondaries.end(), node) != - config.secondaries.end(); + return std::find(config.hp_secondaries.begin(), config.hp_secondaries.end(), node) != + config.hp_secondaries.end(); } - bool is_worker(const rpc_address &node) const + bool is_worker(const host_port &node) const { - return std::find(config.last_drops.begin(), config.last_drops.end(), node) != - config.last_drops.end(); + return std::find(config.hp_last_drops.begin(), config.hp_last_drops.end(), node) != + config.hp_last_drops.end(); } - bool is_member(const rpc_address &node) const { return is_host(node) || is_worker(node); } + bool is_member(const host_port &node) const { return is_host(node) || is_worker(node); } }; struct restore_state @@ -392,7 +394,7 @@ class node_state : public extensible_object // status bool is_alive; bool has_collected_replicas; - dsn::rpc_address address; + dsn::host_port hp; const partition_set *get_partitions(app_id id, bool only_primary) const; partition_set *get_partitions(app_id id, bool only_primary, bool create_new); @@ -416,8 +418,8 @@ class node_state : public extensible_object void set_alive(bool alive) { is_alive = alive; } bool has_collected() { return has_collected_replicas; } void set_replicas_collect_flag(bool has_collected) { has_collected_replicas = has_collected; } - dsn::rpc_address addr() const { return address; } - void set_addr(const dsn::rpc_address &addr) { address = addr; } + dsn::host_port host_port() const { return hp; } + void set_hp(const dsn::host_port &val) { hp = val; } void put_partition(const dsn::gpid &pid, bool is_primary); void remove_partition(const dsn::gpid &pid, bool only_primary); @@ -427,7 +429,7 @@ class node_state : public extensible_object bool for_each_primary(app_id id, const std::function &f) const; }; -typedef std::unordered_map node_mapper; +typedef std::unordered_map node_mapper; typedef std::map> migration_list; struct meta_view @@ -436,22 +438,22 @@ struct meta_view node_mapper *nodes; }; -inline node_state *get_node_state(node_mapper &nodes, rpc_address addr, bool create_new) +inline node_state *get_node_state(node_mapper &nodes, host_port hp, bool create_new) { node_state *ns; - if (nodes.find(addr) == nodes.end()) { + if (nodes.find(hp) == nodes.end()) { if (!create_new) return nullptr; - ns = &nodes[addr]; - ns->set_addr(addr); + ns = &nodes[hp]; + ns->set_hp(hp); } - ns = &nodes[addr]; + ns = &nodes[hp]; return ns; } -inline bool is_node_alive(const node_mapper &nodes, rpc_address addr) +inline bool is_node_alive(const node_mapper &nodes, host_port addr) { - auto iter = nodes.find(addr); + auto iter = nodes.find(host_port(addr)); if (iter == nodes.end()) return false; return iter->second.alive(); @@ -491,8 +493,8 @@ inline config_context *get_config_context(app_mapper &apps, const dsn::gpid &gpi inline int replica_count(const partition_configuration &pc) { - int ans = (pc.primary.is_invalid()) ? 0 : 1; - return ans + pc.secondaries.size(); + int ans = (pc.hp_primary.is_invalid()) ? 0 : 1; + return ans + pc.hp_secondaries.size(); } enum health_status @@ -509,13 +511,13 @@ enum health_status inline health_status partition_health_status(const partition_configuration &pc, int mutation_2pc_min_replica_count) { - if (pc.primary.is_invalid()) { - if (pc.secondaries.empty()) + if (pc.hp_primary.is_invalid()) { + if (pc.hp_secondaries.empty()) return HS_DEAD; else return HS_UNREADABLE; } else { // !pc.primary.is_invalid() - int n = pc.secondaries.size() + 1; + int n = pc.hp_secondaries.size() + 1; if (n < mutation_2pc_min_replica_count) return HS_UNWRITABLE; else if (n < pc.max_replica_count) @@ -547,16 +549,37 @@ inline int count_partitions(const app_mapper &apps) } void when_update_replicas(config_type::type t, const std::function &func); -void maintain_drops(/*inout*/ std::vector &drops, - const dsn::rpc_address &node, - config_type::type t); + +template +void maintain_drops(/*inout*/ std::vector &drops, const T &node, config_type::type t) +{ + auto action = [&drops, &node](bool is_adding) { + auto it = std::find(drops.begin(), drops.end(), node); + if (is_adding) { + if (it != drops.end()) { + drops.erase(it); + } + } else { + CHECK( + it == drops.end(), "the node({}) cannot be in drops set before this update", node); + drops.push_back(node); + if (drops.size() > 3) { + drops.erase(drops.begin()); + } + } + }; + when_update_replicas(t, action); +} // Try to construct a replica-group by current replica-infos of a gpid // ret: // if construct the replica successfully, return true. // Notice: as long as we can construct something from current infos, we treat it as a // success -bool construct_replica(meta_view view, const gpid &pid, int max_replica_count); +bool construct_replica(meta_view view, + const gpid &pid, + int max_replica_count, + const std::shared_ptr &resolver); // When replica infos are collected from replica servers, meta-server // will use this to check if a replica on a server is useful @@ -566,7 +589,7 @@ bool construct_replica(meta_view view, const gpid &pid, int max_replica_count); // ret: // return true if the replica is accepted as an useful replica. Or-else false. // WARNING: if false is returned, the replica on node may be garbage-collected -bool collect_replica(meta_view view, const rpc_address &node, const replica_info &info); +bool collect_replica(meta_view view, const host_port &node, const replica_info &info); inline bool has_seconds_expired(uint64_t second_ts) { return second_ts * 1000 < dsn_now_ms(); } diff --git a/src/meta/meta_http_service.cpp b/src/meta/meta_http_service.cpp index 19f3e53451..cf104c8d22 100644 --- a/src/meta/meta_http_service.cpp +++ b/src/meta/meta_http_service.cpp @@ -47,7 +47,7 @@ #include "meta_http_service.h" #include "meta_server_failure_detector.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "server_load_balancer.h" #include "server_state.h" #include "utils/error_code.h" @@ -129,7 +129,7 @@ void meta_http_service::get_app_handler(const http_request &req, http_response & tp_details.add_column("replica_count"); tp_details.add_column("primary"); tp_details.add_column("secondaries"); - std::map> node_stat; + std::map> node_stat; int total_prim_count = 0; int total_sec_count = 0; @@ -138,14 +138,14 @@ void meta_http_service::get_app_handler(const http_request &req, http_response & int read_unhealthy = 0; for (const auto &p : response.partitions) { int replica_count = 0; - if (!p.primary.is_invalid()) { + if (!p.hp_primary.is_invalid()) { replica_count++; - node_stat[p.primary].first++; + node_stat[p.hp_primary].first++; total_prim_count++; } - replica_count += p.secondaries.size(); - total_sec_count += p.secondaries.size(); - if (!p.primary.is_invalid()) { + replica_count += p.hp_secondaries.size(); + total_sec_count += p.hp_secondaries.size(); + if (!p.hp_primary.is_invalid()) { if (replica_count >= p.max_replica_count) fully_healthy++; else if (replica_count < 2) @@ -159,14 +159,14 @@ void meta_http_service::get_app_handler(const http_request &req, http_response & std::stringstream oss; oss << replica_count << "/" << p.max_replica_count; tp_details.append_data(oss.str()); - tp_details.append_data((p.primary.is_invalid() ? "-" : p.primary.to_std_string())); + tp_details.append_data((p.hp_primary.is_invalid() ? "-" : p.hp_primary.to_string())); oss.str(""); oss << "["; - for (int j = 0; j < p.secondaries.size(); j++) { + for (int j = 0; j < p.hp_secondaries.size(); j++) { if (j != 0) oss << ","; - oss << p.secondaries[j].to_std_string(); - node_stat[p.secondaries[j]].second++; + oss << p.hp_secondaries[j]; + node_stat[p.hp_secondaries[j]].second++; } oss << "]"; tp_details.append_data(oss.str()); @@ -180,7 +180,7 @@ void meta_http_service::get_app_handler(const http_request &req, http_response & tp_nodes.add_column("secondary"); tp_nodes.add_column("total"); for (auto &kv : node_stat) { - tp_nodes.add_row(kv.first.to_std_string()); + tp_nodes.add_row(kv.first.to_string()); tp_nodes.append_data(kv.second.first); tp_nodes.append_data(kv.second.second); tp_nodes.append_data(kv.second.first + kv.second.second); @@ -320,11 +320,11 @@ void meta_http_service::list_app_handler(const http_request &req, http_response for (int i = 0; i < response.partitions.size(); i++) { const dsn::partition_configuration &p = response.partitions[i]; int replica_count = 0; - if (!p.primary.is_invalid()) { + if (!p.hp_primary.is_invalid()) { replica_count++; } - replica_count += p.secondaries.size(); - if (!p.primary.is_invalid()) { + replica_count += p.hp_secondaries.size(); + if (!p.hp_primary.is_invalid()) { if (replica_count >= p.max_replica_count) fully_healthy++; else if (replica_count < 2) @@ -385,12 +385,12 @@ void meta_http_service::list_node_handler(const http_request &req, http_response if (!redirect_if_not_primary(req, resp)) return; - std::map tmp_map; + std::map tmp_map; for (const auto &node : _service->_alive_set) { - tmp_map.emplace(node, list_nodes_helper(node.to_std_string(), "ALIVE")); + tmp_map.emplace(node, list_nodes_helper(node.to_string(), "ALIVE")); } for (const auto &node : _service->_dead_set) { - tmp_map.emplace(node, list_nodes_helper(node.to_std_string(), "UNALIVE")); + tmp_map.emplace(node, list_nodes_helper(node.to_string(), "UNALIVE")); } int alive_node_count = (_service->_alive_set).size(); int unalive_node_count = (_service->_dead_set).size(); @@ -410,14 +410,14 @@ void meta_http_service::list_node_handler(const http_request &req, http_response for (int i = 0; i < response_app.partitions.size(); i++) { const dsn::partition_configuration &p = response_app.partitions[i]; - if (!p.primary.is_invalid()) { - auto find = tmp_map.find(p.primary); + if (!p.hp_primary.is_invalid()) { + auto find = tmp_map.find(p.hp_primary); if (find != tmp_map.end()) { find->second.primary_count++; } } - for (int j = 0; j < p.secondaries.size(); j++) { - auto find = tmp_map.find(p.secondaries[j]); + for (int j = 0; j < p.hp_secondaries.size(); j++) { + auto find = tmp_map.find(p.hp_secondaries[j]); if (find != tmp_map.end()) { find->second.secondary_count++; } @@ -470,13 +470,13 @@ void meta_http_service::get_cluster_info_handler(const http_request &req, http_r std::string meta_servers_str; int ms_size = _service->_opts.meta_servers.size(); for (int i = 0; i < ms_size; i++) { - meta_servers_str += _service->_opts.meta_servers[i].to_std_string(); + meta_servers_str += _service->_opts.meta_servers[i].to_string(); if (i != ms_size - 1) { meta_servers_str += ","; } } tp.add_row_name_and_data("meta_servers", meta_servers_str); - tp.add_row_name_and_data("primary_meta_server", dsn_primary_address().to_std_string()); + tp.add_row_name_and_data("primary_meta_server", dsn_primary_host_port().to_string()); tp.add_row_name_and_data("zookeeper_hosts", dsn::dist::FLAGS_hosts_list); tp.add_row_name_and_data("zookeeper_root", _service->_cluster_root); tp.add_row_name_and_data( @@ -847,13 +847,13 @@ bool meta_http_service::redirect_if_not_primary(const http_request &req, http_re } #endif - rpc_address leader; + host_port leader; if (_service->_failure_detector->get_leader(&leader)) { return true; } // set redirect response - resp.location = "http://" + leader.to_std_string() + '/' + req.path; + resp.location = "http://" + leader.to_string() + '/' + req.path; if (!req.query_args.empty()) { resp.location += '?'; for (const auto &i : req.query_args) { diff --git a/src/meta/meta_server_failure_detector.cpp b/src/meta/meta_server_failure_detector.cpp index 0e76c3a263..0f4a600e2d 100644 --- a/src/meta/meta_server_failure_detector.cpp +++ b/src/meta/meta_server_failure_detector.cpp @@ -34,6 +34,8 @@ #include "meta/meta_options.h" #include "meta/meta_service.h" #include "runtime/app_model.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/rpc_address.h" #include "runtime/serverlet.h" #include "runtime/task/task_code.h" #include "utils/autoref_ptr.h" @@ -63,8 +65,10 @@ DSN_DEFINE_string(meta_server, namespace dsn { namespace replication { -meta_server_failure_detector::meta_server_failure_detector(meta_service *svc) - : _svc(svc), +meta_server_failure_detector::meta_server_failure_detector( + const std::shared_ptr &resolver, meta_service *svc) + : dsn::fd::failure_detector(resolver), + _svc(svc), _lock_svc(nullptr), _primary_lock_id("dsn.meta.server.leader"), _is_leader(false), @@ -89,25 +93,25 @@ meta_server_failure_detector::~meta_server_failure_detector() } } -void meta_server_failure_detector::on_worker_disconnected(const std::vector &nodes) +void meta_server_failure_detector::on_worker_disconnected(const std::vector &nodes) { _svc->set_node_state(nodes, false); } -void meta_server_failure_detector::on_worker_connected(rpc_address node) +void meta_server_failure_detector::on_worker_connected(host_port node) { - _svc->set_node_state(std::vector{node}, true); + _svc->set_node_state({node}, true); } -bool meta_server_failure_detector::get_leader(rpc_address *leader) +bool meta_server_failure_detector::get_leader(host_port *leader) { FAIL_POINT_INJECT_F("meta_server_failure_detector_get_leader", [leader](absl::string_view str) { /// the format of str is : true#{ip}:{port} or false#{ip}:{port} auto pos = str.find("#"); - // get leader addr + // get leader host_port auto addr_part = str.substr(pos + 1, str.length() - pos - 1); - if (!leader->from_string_ipv4(addr_part.data())) { - CHECK(false, "parse {} to rpc_address failed", addr_part); + if (!leader->from_string(addr_part.data())) { + CHECK(false, "parse {} to host_port failed", addr_part); } // get the return value which implies whether the current node is primary or not @@ -119,26 +123,26 @@ bool meta_server_failure_detector::get_leader(rpc_address *leader) return is_leader; }); - dsn::rpc_address holder; + dsn::host_port holder; if (leader == nullptr) { leader = &holder; } if (_is_leader.load()) { - *leader = dsn_primary_address(); + *leader = dsn_primary_host_port(); return true; } else if (_lock_svc == nullptr) { - leader->set_invalid(); + leader->reset(); return false; } else { std::string lock_owner; uint64_t version; error_code err = _lock_svc->query_cache(_primary_lock_id, lock_owner, version); - if (err == dsn::ERR_OK && leader->from_string_ipv4(lock_owner.c_str())) { - return (*leader) == dsn_primary_address(); + if (err == dsn::ERR_OK && leader->from_string(lock_owner.c_str())) { + return (*leader) == dsn_primary_host_port(); } else { LOG_WARNING("query leader from cache got error({})", err); - leader->set_invalid(); + leader->reset(); return false; } } @@ -155,7 +159,7 @@ void meta_server_failure_detector::acquire_leader_lock() error_code err; auto tasks = _lock_svc->lock( _primary_lock_id, - dsn_primary_address().to_std_string(), + dsn_primary_host_port().to_string(), // lock granted LPC_META_SERVER_LEADER_LOCK_CALLBACK, [this, &err](error_code ec, const std::string &owner, uint64_t version) { @@ -194,7 +198,7 @@ void meta_server_failure_detector::acquire_leader_lock() } } -void meta_server_failure_detector::reset_stability_stat(const rpc_address &node) +void meta_server_failure_detector::reset_stability_stat(const host_port &node) { zauto_lock l(_map_lock); auto iter = _stablity.find(node); @@ -212,11 +216,9 @@ void meta_server_failure_detector::reset_stability_stat(const rpc_address &node) void meta_server_failure_detector::leader_initialize(const std::string &lock_service_owner) { - dsn::rpc_address addr; - CHECK(addr.from_string_ipv4(lock_service_owner.c_str()), - "parse {} to rpc_address failed", - lock_service_owner); - CHECK_EQ_MSG(addr, dsn_primary_address(), "acquire leader return success, but owner not match"); + dsn::host_port hp; + CHECK(hp.from_string(lock_service_owner), "parse {} to host_port failed", lock_service_owner); + CHECK_EQ_MSG(hp, dsn_primary_host_port(), "acquire leader return success, but owner not match"); _is_leader.store(true); _election_moment.store(dsn_now_ms()); } @@ -224,36 +226,46 @@ void meta_server_failure_detector::leader_initialize(const std::string &lock_ser bool meta_server_failure_detector::update_stability_stat(const fd::beacon_msg &beacon) { zauto_lock l(_map_lock); - auto iter = _stablity.find(beacon.from_addr); + + host_port hp_from_addr; + GET_HOST_PORT(beacon, from_addr, hp_from_addr); + + auto iter = _stablity.find(hp_from_addr); if (iter == _stablity.end()) { - _stablity.emplace(beacon.from_addr, worker_stability{beacon.start_time, 0}); + _stablity.emplace(hp_from_addr, worker_stability{beacon.start_time, 0}); return true; } else { worker_stability &w = iter->second; if (beacon.start_time == w.last_start_time_ms) { - LOG_DEBUG( - "{} isn't restarted, last_start_time({})", beacon.from_addr, w.last_start_time_ms); + LOG_DEBUG("{}({}) isn't restarted, last_start_time({})", + hp_from_addr, + beacon.from_addr, + w.last_start_time_ms); if (dsn_now_ms() - w.last_start_time_ms >= FLAGS_stable_rs_min_running_seconds * 1000 && w.unstable_restart_count > 0) { - LOG_INFO("{} has stably run for a while, reset it's unstable count({}) to 0", + LOG_INFO("{}({}) has stably run for a while, reset it's unstable count({}) to 0", + hp_from_addr, beacon.from_addr, w.unstable_restart_count); w.unstable_restart_count = 0; } } else if (beacon.start_time > w.last_start_time_ms) { - LOG_INFO("check {} restarted, last_time({}), this_time({})", + LOG_INFO("check {}({}) restarted, last_time({}), this_time({})", + hp_from_addr, beacon.from_addr, w.last_start_time_ms, beacon.start_time); if (beacon.start_time - w.last_start_time_ms < FLAGS_stable_rs_min_running_seconds * 1000) { w.unstable_restart_count++; - LOG_WARNING("{} encounter an unstable restart, total_count({})", + LOG_WARNING("{}({}) encounter an unstable restart, total_count({})", + hp_from_addr, beacon.from_addr, w.unstable_restart_count); } else if (w.unstable_restart_count > 0) { - LOG_INFO("{} restart in {} ms after last restart, may recover ok, reset " + LOG_INFO("{}({}) restart in {} ms after last restart, may recover ok, reset " "it's unstable count({}) to 0", + hp_from_addr, beacon.from_addr, beacon.start_time - w.last_start_time_ms, w.unstable_restart_count); @@ -262,7 +274,9 @@ bool meta_server_failure_detector::update_stability_stat(const fd::beacon_msg &b w.last_start_time_ms = beacon.start_time; } else { - LOG_WARNING("{}: possible encounter a staled message, ignore it", beacon.from_addr); + LOG_WARNING("{}({}): possible encounter a staled message, ignore it", + hp_from_addr, + beacon.from_addr); } return w.unstable_restart_count < FLAGS_max_succssive_unstable_restart; } @@ -271,50 +285,64 @@ bool meta_server_failure_detector::update_stability_stat(const fd::beacon_msg &b void meta_server_failure_detector::on_ping(const fd::beacon_msg &beacon, rpc_replier &reply) { - fd::beacon_ack ack; - ack.time = beacon.time; - ack.this_node = beacon.to_addr; - ack.allowed = true; + host_port hp_from_addr, hp_to_addr; + GET_HOST_PORT(beacon, from_addr, hp_from_addr); + GET_HOST_PORT(beacon, to_addr, hp_to_addr); if (beacon.__isset.start_time && !update_stability_stat(beacon)) { - LOG_WARNING("{} is unstable, don't response to it's beacon", beacon.from_addr); + LOG_WARNING( + "{}({}) is unstable, don't response to it's beacon", beacon.from_addr, hp_from_addr); return; } - dsn::rpc_address leader; + fd::beacon_ack ack; + ack.time = beacon.time; + ack.this_node = beacon.to_addr; + ack.allowed = true; + ack.__set_hp_this_node(hp_to_addr); + + dsn::host_port leader; if (!get_leader(&leader)) { ack.is_master = false; - ack.primary_node = leader; + ack.primary_node = _dns_resolver->resolve_address(leader); + ack.__set_hp_primary_node(leader); } else { ack.is_master = true; ack.primary_node = beacon.to_addr; + ack.__set_hp_primary_node(hp_to_addr); failure_detector::on_ping_internal(beacon, ack); } - LOG_INFO("on_ping, beacon send time[{}], is_master({}), from_node({}), this_node({}), " - "primary_node({})", + LOG_INFO("on_ping, beacon send time[{}], is_master({}), from_node({}({})), this_node({}({})), " + "primary_node({}({}))", ack.time, ack.is_master ? "true" : "false", + hp_from_addr, beacon.from_addr, - ack.this_node, + hp_to_addr, + beacon.to_addr, + ack.hp_primary_node, ack.primary_node); reply(ack); } /*the following functions are only for test*/ -meta_server_failure_detector::meta_server_failure_detector(rpc_address leader_address, - bool is_myself_leader) +meta_server_failure_detector::meta_server_failure_detector( + const std::shared_ptr &resolver, + host_port leader_host_port, + bool is_myself_leader) + : dsn::fd::failure_detector(resolver) { - LOG_INFO("set {} as leader", leader_address); + LOG_INFO("set {} as leader", leader_host_port); _lock_svc = nullptr; _is_leader.store(is_myself_leader); } -void meta_server_failure_detector::set_leader_for_test(rpc_address leader_address, +void meta_server_failure_detector::set_leader_for_test(host_port leader_host_port, bool is_myself_leader) { - LOG_INFO("set {} as leader", leader_address); + LOG_INFO("set {} as leader", leader_host_port); _is_leader.store(is_myself_leader); } diff --git a/src/meta/meta_server_failure_detector.h b/src/meta/meta_server_failure_detector.h index e6cf1e3b51..4d80650c29 100644 --- a/src/meta/meta_server_failure_detector.h +++ b/src/meta/meta_server_failure_detector.h @@ -29,12 +29,13 @@ #include #include #include +#include #include #include #include "failure_detector/failure_detector.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task.h" #include "utils/fmt_logging.h" #include "utils/zlocks.h" @@ -47,6 +48,7 @@ namespace fd { class beacon_ack; class beacon_msg; } // namespace fd +class dns_resolver; template class rpc_replier; @@ -70,22 +72,22 @@ class meta_server_failure_detector : public fd::failure_detector { } }; - typedef std::map stability_map; + typedef std::map stability_map; public: - meta_server_failure_detector(meta_service *svc); + meta_server_failure_detector(const std::shared_ptr &resolver, meta_service *svc); virtual ~meta_server_failure_detector(); // get the meta-server's leader - // leader: the leader's address. Invalid if no leader selected + // leader: the leader's host_port. Invalid if no leader selected // if leader==nullptr, then the new leader won't be returned // ret true if i'm the current leader; false if not. - bool get_leader(/*output*/ dsn::rpc_address *leader); + bool get_leader(/*output*/ dsn::host_port *leader); // return if acquire the leader lock, or-else blocked forever void acquire_leader_lock(); - void reset_stability_stat(const dsn::rpc_address &node); + void reset_stability_stat(const dsn::host_port &node); // _fd_opts is initialized in constructor with a fd_suboption stored in meta_service. // so usually you don't need to call this. @@ -94,18 +96,18 @@ class meta_server_failure_detector : public fd::failure_detector void set_options(fd_suboptions *options) { _fd_opts = options; } // client side - virtual void on_master_disconnected(const std::vector &) + virtual void on_master_disconnected(const std::vector &) { CHECK(false, "unsupported method"); } - virtual void on_master_connected(rpc_address) { CHECK(false, "unsupported method"); } + virtual void on_master_connected(host_port) { CHECK(false, "unsupported method"); } // server side // it is in the protection of failure_detector::_lock - virtual void on_worker_disconnected(const std::vector &nodes) override; + virtual void on_worker_disconnected(const std::vector &nodes) override; // it is in the protection of failure_detector::_lock - virtual void on_worker_connected(rpc_address node) override; - virtual bool is_worker_connected(rpc_address node) const override + virtual void on_worker_connected(host_port node) override; + virtual bool is_worker_connected(host_port node) const override { // we treat all nodes not in the worker list alive in the first grace period. // For the reason, please consider this situation: @@ -150,8 +152,10 @@ class meta_server_failure_detector : public fd::failure_detector public: /* these two functions are for test */ - meta_server_failure_detector(rpc_address leader_address, bool is_myself_leader); - void set_leader_for_test(rpc_address leader_address, bool is_myself_leader); + meta_server_failure_detector(const std::shared_ptr &resolver, + host_port leader_host_port, + bool is_myself_leader); + void set_leader_for_test(host_port leader_host_port, bool is_myself_leader); stability_map *get_stability_map_for_test(); }; } diff --git a/src/meta/meta_service.cpp b/src/meta/meta_service.cpp index 8fc64c1ba7..3ec3f95a27 100644 --- a/src/meta/meta_service.cpp +++ b/src/meta/meta_service.cpp @@ -57,6 +57,7 @@ #include "partition_split_types.h" #include "remote_cmd/remote_command.h" #include "ranger/ranger_resource_policy_manager.h" +#include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_holder.h" #include "runtime/task/async_calls.h" #include "server_load_balancer.h" @@ -163,7 +164,8 @@ meta_service::meta_service() _recovering(false), METRIC_VAR_INIT_server(replica_server_disconnections), METRIC_VAR_INIT_server(unalive_replica_servers), - METRIC_VAR_INIT_server(alive_replica_servers) + METRIC_VAR_INIT_server(alive_replica_servers), + _dns_resolver(new dns_resolver()) { _opts.initialize(); _meta_opts.initialize(); @@ -241,7 +243,7 @@ error_code meta_service::remote_storage_initialize() } // visited in protection of failure_detector::_lock -void meta_service::set_node_state(const std::vector &nodes, bool is_alive) +void meta_service::set_node_state(const std::vector &nodes, bool is_alive) { for (auto &node : nodes) { if (is_alive) { @@ -260,16 +262,15 @@ void meta_service::set_node_state(const std::vector &nodes, bool is if (!_started) { return; } - for (const rpc_address &address : nodes) { - tasking::enqueue( - LPC_META_STATE_HIGH, - nullptr, - std::bind(&server_state::on_change_node_state, _state.get(), address, is_alive), - server_state::sStateHash); + for (const auto &hp : nodes) { + tasking::enqueue(LPC_META_STATE_HIGH, + nullptr, + std::bind(&server_state::on_change_node_state, _state.get(), hp, is_alive), + server_state::sStateHash); } } -void meta_service::get_node_state(/*out*/ std::map &all_nodes) +void meta_service::get_node_state(/*out*/ std::map &all_nodes) { zauto_lock l(_failure_detector->_lock); for (auto &node : _alive_set) @@ -340,7 +341,7 @@ void meta_service::start_service() METRIC_VAR_SET(alive_replica_servers, _alive_set.size()); - for (const dsn::rpc_address &node : _alive_set) { + for (const auto &node : _alive_set) { // sync alive set and the failure_detector _failure_detector->unregister_worker(node); _failure_detector->register_worker(node, true); @@ -352,13 +353,13 @@ void meta_service::start_service() _access_controller = security::create_meta_access_controller(_ranger_resource_policy_manager); _started = true; - for (const dsn::rpc_address &node : _alive_set) { + for (const auto &node : _alive_set) { tasking::enqueue(LPC_META_STATE_HIGH, nullptr, std::bind(&server_state::on_change_node_state, _state.get(), node, true), server_state::sStateHash); } - for (const dsn::rpc_address &node : _dead_set) { + for (const auto &node : _dead_set) { tasking::enqueue(LPC_META_STATE_HIGH, nullptr, std::bind(&server_state::on_change_node_state, _state.get(), node, false), @@ -400,7 +401,7 @@ error_code meta_service::start() LOG_INFO("remote storage is successfully initialized"); // start failure detector, and try to acquire the leader lock - _failure_detector.reset(new meta_server_failure_detector(this)); + _failure_detector.reset(new meta_server_failure_detector(_dns_resolver, this)); if (FLAGS_enable_white_list) _failure_detector->set_allow_list(_meta_opts.replica_white_list); _failure_detector->register_ctrl_commands(); @@ -425,7 +426,8 @@ error_code meta_service::start() _failure_detector->acquire_leader_lock(); CHECK(_failure_detector->get_leader(nullptr), "must be primary at this point"); - LOG_INFO("{} got the primary lock, start to recover server state from remote storage", + LOG_INFO("{}({}) got the primary lock, start to recover server state from remote storage", + dsn_primary_host_port(), dsn_primary_address()); // initialize the load balancer @@ -571,10 +573,9 @@ void meta_service::register_rpc_handlers() &meta_service::on_set_max_replica_count); } -meta_leader_state meta_service::check_leader(dsn::message_ex *req, - dsn::rpc_address *forward_address) +meta_leader_state meta_service::check_leader(dsn::message_ex *req, dsn::host_port *forward_address) { - dsn::rpc_address leader; + host_port leader; if (!_failure_detector->get_leader(&leader)) { if (!req->header->context.u.is_forward_supported) { if (forward_address != nullptr) @@ -584,11 +585,11 @@ meta_leader_state meta_service::check_leader(dsn::message_ex *req, LOG_DEBUG("leader address: {}", leader); if (!leader.is_invalid()) { - dsn_rpc_forward(req, leader); + dsn_rpc_forward(req, _dns_resolver->resolve_address(leader)); return meta_leader_state::kNotLeaderAndCanForwardRpc; } else { if (forward_address != nullptr) - forward_address->set_invalid(); + forward_address->reset(); return meta_leader_state::kNotLeaderAndCannotForwardRpc; } } @@ -701,7 +702,8 @@ void meta_service::on_list_nodes(configuration_list_nodes_rpc rpc) if (request.status == node_status::NS_INVALID || request.status == node_status::NS_ALIVE) { info.status = node_status::NS_ALIVE; for (auto &node : _alive_set) { - info.address = node; + info.address = _dns_resolver->resolve_address(node); + info.__set_hp_address(node); response.infos.push_back(info); } } @@ -709,7 +711,8 @@ void meta_service::on_list_nodes(configuration_list_nodes_rpc rpc) request.status == node_status::NS_UNALIVE) { info.status = node_status::NS_UNALIVE; for (auto &node : _dead_set) { - info.address = node; + info.address = _dns_resolver->resolve_address(node); + info.__set_hp_address(node); response.infos.push_back(info); } } @@ -734,7 +737,7 @@ void meta_service::on_query_cluster_info(configuration_cluster_info_rpc rpc) response.values.push_back(oss.str()); response.keys.push_back("primary_meta_server"); - response.values.push_back(dsn_primary_address().to_std_string()); + response.values.push_back(dsn_primary_host_port().to_string()); response.keys.push_back("zookeeper_hosts"); response.values.push_back(dsn::dist::FLAGS_hosts_list); response.keys.push_back("zookeeper_root"); @@ -761,11 +764,12 @@ void meta_service::on_query_cluster_info(configuration_cluster_info_rpc rpc) void meta_service::on_query_configuration_by_index(configuration_query_by_index_rpc rpc) { query_cfg_response &response = rpc.response(); - rpc_address forward_address; - if (!check_status_and_authz(rpc, &forward_address)) { - if (!forward_address.is_invalid()) { + host_port forward_hp; + if (!check_status_and_authz(rpc, &forward_hp)) { + if (!forward_hp.is_invalid()) { partition_configuration config; - config.primary = forward_address; + config.primary = _dns_resolver->resolve_address(forward_hp); + config.__set_hp_primary(forward_hp); response.partitions.push_back(std::move(config)); } return; @@ -881,7 +885,8 @@ void meta_service::on_start_recovery(configuration_recovery_rpc rpc) } else { zauto_write_lock l(_meta_lock); if (_started.load()) { - LOG_INFO("service({}) is already started, ignore the recovery request", + LOG_INFO("service({}({})) is already started, ignore the recovery request", + dsn_primary_host_port(), dsn_primary_address()); response.err = ERR_SERVICE_ALREADY_RUNNING; } else { @@ -1065,7 +1070,7 @@ void meta_service::register_duplication_rpc_handlers() void meta_service::initialize_duplication_service() { if (FLAGS_duplication_enabled) { - _dup_svc = std::make_unique(_state.get(), this); + _dup_svc = std::make_unique(_state.get(), this, _dns_resolver); } } diff --git a/src/meta/meta_service.h b/src/meta/meta_service.h index e46ad908bd..3cb6b2b179 100644 --- a/src/meta/meta_service.h +++ b/src/meta/meta_service.h @@ -47,8 +47,9 @@ #include "meta_rpc_types.h" #include "meta_server_failure_detector.h" #include "runtime/api_layer1.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/network.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "security/access_controller.h" @@ -172,20 +173,20 @@ class meta_service : public serverlet { dsn_rpc_reply(response); } - virtual void send_message(const rpc_address &target, dsn::message_ex *request) + virtual void send_message(const host_port &target, dsn::message_ex *request) { - dsn_rpc_call_one_way(target, request); + dsn_rpc_call_one_way(_dns_resolver->resolve_address(target), request); } virtual void send_request(dsn::message_ex * /*req*/, - const rpc_address &target, + const host_port &target, const rpc_response_task_ptr &callback) { - dsn_rpc_call(target, callback); + dsn_rpc_call(_dns_resolver->resolve_address(target), callback); } // these two callbacks are running in fd's thread_pool, and in fd's lock - void set_node_state(const std::vector &nodes_list, bool is_alive); - void get_node_state(/*out*/ std::map &all_nodes); + void set_node_state(const std::vector &nodes_list, bool is_alive); + void get_node_state(/*out*/ std::map &all_nodes); void start_service(); void balancer_run(); @@ -209,6 +210,8 @@ class meta_service : public serverlet std::string cluster_root() const { return _cluster_root; } + std::shared_ptr get_dns_resolver() { return _dns_resolver; } + private: void register_rpc_handlers(); void register_ctrl_commands(); @@ -287,9 +290,9 @@ class meta_service : public serverlet // if return 'kNotLeaderAndCannotForwardRpc' and 'forward_address' != nullptr, then return // leader by 'forward_address'. - meta_leader_state check_leader(dsn::message_ex *req, dsn::rpc_address *forward_address); + meta_leader_state check_leader(dsn::message_ex *req, dsn::host_port *forward_address); template - meta_leader_state check_leader(TRpcHolder rpc, /*out*/ rpc_address *forward_address); + meta_leader_state check_leader(TRpcHolder rpc, /*out*/ host_port *forward_address); // app_name: when the Ranger ACL is enabled, some rpc requests need to verify the app_name // ret: @@ -297,7 +300,7 @@ class meta_service : public serverlet // true: rpc request check and authentication succeed template bool check_status_and_authz(TRpcHolder rpc, - /*out*/ rpc_address *forward_address = nullptr, + /*out*/ host_port *forward_address = nullptr, const std::string &app_name = ""); // app_name: when the Ranger ACL is enabled, some rpc requests need to verify the app_name @@ -312,7 +315,7 @@ class meta_service : public serverlet bool check_status_and_authz_with_reply(message_ex *msg); template - bool check_leader_status(TRpcHolder rpc, rpc_address *forward_address = nullptr); + bool check_leader_status(TRpcHolder rpc, host_port *forward_address = nullptr); error_code remote_storage_initialize(); bool check_freeze() const; @@ -363,8 +366,8 @@ class meta_service : public serverlet // [ // this is protected by failure_detector::_lock - std::set _alive_set; - std::set _dead_set; + std::set _alive_set; + std::set _dead_set; // ] mutable zrwlock_nr _meta_lock; @@ -388,12 +391,15 @@ class meta_service : public serverlet // indicate which operation is processeding in meta server std::atomic _meta_op_status; + + // Resolve host_port to address. + std::shared_ptr _dns_resolver; }; template -meta_leader_state meta_service::check_leader(TRpcHolder rpc, rpc_address *forward_address) +meta_leader_state meta_service::check_leader(TRpcHolder rpc, host_port *forward_address) { - dsn::rpc_address leader; + host_port leader; if (!_failure_detector->get_leader(&leader)) { if (!rpc.dsn_request()->header->context.u.is_forward_supported) { if (forward_address != nullptr) @@ -403,11 +409,11 @@ meta_leader_state meta_service::check_leader(TRpcHolder rpc, rpc_address *forwar LOG_DEBUG("leader address: {}", leader); if (!leader.is_invalid()) { - rpc.forward(leader); + rpc.forward(_dns_resolver->resolve_address(leader)); return meta_leader_state::kNotLeaderAndCanForwardRpc; } else { if (forward_address != nullptr) - forward_address->set_invalid(); + forward_address->reset(); return meta_leader_state::kNotLeaderAndCannotForwardRpc; } } @@ -415,7 +421,7 @@ meta_leader_state meta_service::check_leader(TRpcHolder rpc, rpc_address *forwar } template -bool meta_service::check_leader_status(TRpcHolder rpc, rpc_address *forward_address) +bool meta_service::check_leader_status(TRpcHolder rpc, host_port *forward_address) { auto result = check_leader(rpc, forward_address); if (result == meta_leader_state::kNotLeaderAndCanForwardRpc) @@ -439,7 +445,7 @@ bool meta_service::check_leader_status(TRpcHolder rpc, rpc_address *forward_addr // above policy information may be out of date. template bool meta_service::check_status_and_authz(TRpcHolder rpc, - rpc_address *forward_address, + host_port *forward_address, const std::string &app_name) { if (!check_leader_status(rpc, forward_address)) { diff --git a/src/meta/meta_split_service.cpp b/src/meta/meta_split_service.cpp index 33610fc6a4..cafa11c36c 100644 --- a/src/meta/meta_split_service.cpp +++ b/src/meta/meta_split_service.cpp @@ -305,9 +305,11 @@ void meta_split_service::on_add_child_on_remote_storage_reply(error_code ec, update_child_request->info = *app; update_child_request->type = config_type::CT_REGISTER_CHILD; update_child_request->node = request.primary_address; + update_child_request->__set_hp_node(request.hp_primary); partition_configuration child_config = app->partitions[child_gpid.get_partition_index()]; child_config.secondaries = request.child_config.secondaries; + child_config.__set_hp_secondaries(request.child_config.hp_secondaries); _state->update_configuration_locally(*app, update_child_request); if (parent_context.msg) { diff --git a/src/meta/partition_guardian.cpp b/src/meta/partition_guardian.cpp index 0c161067da..29f534d0fb 100644 --- a/src/meta/partition_guardian.cpp +++ b/src/meta/partition_guardian.cpp @@ -17,6 +17,7 @@ #include "meta/partition_guardian.h" +#include // IWYU pragma: no_include #include #include @@ -26,11 +27,14 @@ #include "common/replication_common.h" #include "common/replication_other_types.h" +#include "meta/greedy_load_balancer.h" // IWYU pragma: keep #include "meta/meta_data.h" #include "meta/meta_service.h" #include "meta/server_load_balancer.h" #include "meta/server_state.h" #include "meta/table_metrics.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/rpc_address.h" #include "utils/flags.h" #include "utils/fmt_logging.h" #include "utils/metrics.h" @@ -72,11 +76,11 @@ pc_status partition_guardian::cure(meta_view view, CHECK(acts.empty(), ""); pc_status status; - if (pc.primary.is_invalid()) + if (pc.hp_primary.is_invalid()) status = on_missing_primary(view, gpid); - else if (static_cast(pc.secondaries.size()) + 1 < pc.max_replica_count) + else if (static_cast(pc.hp_secondaries.size()) + 1 < pc.max_replica_count) status = on_missing_secondary(view, gpid); - else if (static_cast(pc.secondaries.size()) >= pc.max_replica_count) + else if (static_cast(pc.hp_secondaries.size()) >= pc.max_replica_count) status = on_redundant_secondary(view, gpid); else status = pc_status::healthy; @@ -111,22 +115,23 @@ void partition_guardian::reconfig(meta_view view, const configuration_update_req if (request.type == config_type::CT_DROP_PARTITION) { cc->serving.clear(); - const std::vector &config_dropped = request.config.last_drops; - for (const rpc_address &drop_node : config_dropped) { + const std::vector &config_dropped = request.config.hp_last_drops; + for (const auto &drop_node : config_dropped) { cc->record_drop_history(drop_node); } } else { when_update_replicas(request.type, [cc, &request](bool is_adding) { if (is_adding) { - cc->remove_from_dropped(request.node); + cc->remove_from_dropped(request.hp_node); // when some replicas are added to partition_config // we should try to adjust the size of drop_list cc->check_size(); } else { - cc->remove_from_serving(request.node); + cc->remove_from_serving(request.hp_node); - CHECK(cc->record_drop_history(request.node), - "node({}) has been in the dropped", + CHECK(cc->record_drop_history(request.hp_node), + "node({}({})) has been in the dropped", + request.hp_node, request.node); } }); @@ -146,64 +151,63 @@ bool partition_guardian::from_proposals(meta_view &view, return false; } action = *(cc.lb_actions.front()); - char reason[1024]; - if (action.target.is_invalid()) { - sprintf(reason, "action target is invalid"); + std::string reason; + if (action.target.is_invalid() || action.hp_target.is_invalid()) { + reason = "action target is invalid"; goto invalid_action; } - if (action.node.is_invalid()) { - sprintf(reason, "action node is invalid"); + if (action.node.is_invalid() || action.hp_node.is_invalid()) { + reason = "action node is invalid"; goto invalid_action; } - if (!is_node_alive(*(view.nodes), action.target)) { - sprintf(reason, "action target(%s) is not alive", action.target.to_string()); + if (!is_node_alive(*(view.nodes), action.hp_target)) { + reason = fmt::format("action target({}) is not alive", action.hp_target); goto invalid_action; } - if (!is_node_alive(*(view.nodes), action.node)) { - sprintf(reason, "action node(%s) is not alive", action.node.to_string()); + if (!is_node_alive(*(view.nodes), action.hp_node)) { + reason = fmt::format("action node({}) is not alive", action.hp_node); goto invalid_action; } if (cc.lb_actions.is_abnormal_learning_proposal()) { - sprintf(reason, "learning process abnormal"); + reason = "learning process abnormal"; goto invalid_action; } switch (action.type) { case config_type::CT_ASSIGN_PRIMARY: - is_action_valid = (action.node == action.target && pc.primary.is_invalid() && - !is_secondary(pc, action.node)); + is_action_valid = (action.hp_node == action.hp_target && pc.primary.is_invalid() && + !is_secondary(pc, action.hp_node)); break; case config_type::CT_UPGRADE_TO_PRIMARY: - is_action_valid = (action.node == action.target && pc.primary.is_invalid() && - is_secondary(pc, action.node)); + is_action_valid = (action.hp_node == action.hp_target && pc.primary.is_invalid() && + is_secondary(pc, action.hp_node)); break; case config_type::CT_ADD_SECONDARY: case config_type::CT_ADD_SECONDARY_FOR_LB: - is_action_valid = (is_primary(pc, action.target) && !is_secondary(pc, action.node)); - is_action_valid = (is_action_valid && is_node_alive(*(view.nodes), action.node)); + is_action_valid = (is_primary(pc, action.hp_target) && !is_secondary(pc, action.hp_node)); + is_action_valid = (is_action_valid && is_node_alive(*(view.nodes), action.hp_node)); break; case config_type::CT_DOWNGRADE_TO_INACTIVE: case config_type::CT_REMOVE: - is_action_valid = (is_primary(pc, action.target) && is_member(pc, action.node)); + is_action_valid = (is_primary(pc, action.hp_target) && is_member(pc, action.hp_node)); break; case config_type::CT_DOWNGRADE_TO_SECONDARY: - is_action_valid = (action.target == action.node && is_primary(pc, action.target)); + is_action_valid = (action.hp_target == action.hp_node && is_primary(pc, action.hp_target)); break; default: is_action_valid = false; break; } - if (is_action_valid) + if (is_action_valid) { return true; - else - sprintf(reason, "action is invalid"); + } else { + reason = "action is invalid"; + } invalid_action: - std::stringstream ss; - ss << action; LOG_INFO("proposal action({}) for gpid({}) is invalid, clear all proposal actions: {}", - ss.str(), + action, gpid, reason); action.type = config_type::CT_INVALID; @@ -231,42 +235,45 @@ pc_status partition_guardian::on_missing_primary(meta_view &view, const dsn::gpi action.type = config_type::CT_INVALID; // try to upgrade a secondary to primary if the primary is missing - if (pc.secondaries.size() > 0) { + if (pc.hp_secondaries.size() > 0) { action.node.set_invalid(); + action.__set_hp_node(host_port()); - for (int i = 0; i < pc.secondaries.size(); ++i) { - node_state *ns = get_node_state(*(view.nodes), pc.secondaries[i], false); - CHECK_NOTNULL(ns, "invalid secondary address, address = {}", pc.secondaries[i]); + for (int i = 0; i < pc.hp_secondaries.size(); ++i) { + node_state *ns = get_node_state(*(view.nodes), pc.hp_secondaries[i], false); + CHECK_NOTNULL(ns, "invalid secondary address, address = {}", pc.hp_secondaries[i]); if (!ns->alive()) continue; // find a node with minimal primaries newly_partitions *np = newly_partitions_ext::get_inited(ns); - if (action.node.is_invalid() || - np->less_primaries(*get_newly_partitions(*(view.nodes), action.node), + if (action.hp_node.is_invalid() || + np->less_primaries(*get_newly_partitions(*(view.nodes), action.hp_node), gpid.get_app_id())) { - action.node = ns->addr(); + action.node = _svc->get_dns_resolver()->resolve_address(ns->host_port()); + action.__set_hp_node(ns->host_port()); } } - if (action.node.is_invalid()) { + if (action.hp_node.is_invalid()) { LOG_ERROR( "all nodes for gpid({}) are dead, waiting for some secondary to come back....", gpid_name); result = pc_status::dead; } else { action.type = config_type::CT_UPGRADE_TO_PRIMARY; - newly_partitions *np = get_newly_partitions(*(view.nodes), action.node); + newly_partitions *np = get_newly_partitions(*(view.nodes), action.hp_node); np->newly_add_primary(gpid.get_app_id(), true); action.target = action.node; + action.hp_target = action.hp_node; result = pc_status::ill; } } // if nothing in the last_drops, it means that this is a newly created partition, so let's // just find a node and assign primary for it. - else if (pc.last_drops.empty()) { - dsn::rpc_address min_primary_server; + else if (pc.hp_last_drops.empty()) { + dsn::host_port min_primary_server; newly_partitions *min_primary_server_np = nullptr; for (auto &pairs : *view.nodes) { @@ -277,14 +284,16 @@ pc_status partition_guardian::on_missing_primary(meta_view &view, const dsn::gpi // find a node which has minimal primaries if (min_primary_server_np == nullptr || np->less_primaries(*min_primary_server_np, gpid.get_app_id())) { - min_primary_server = ns.addr(); + min_primary_server = ns.host_port(); min_primary_server_np = np; } } if (min_primary_server_np != nullptr) { - action.node = min_primary_server; + action.node = _svc->get_dns_resolver()->resolve_address(min_primary_server); + action.__set_hp_node(min_primary_server); action.target = action.node; + action.__set_hp_target(action.hp_node); action.type = config_type::CT_ASSIGN_PRIMARY; min_primary_server_np->newly_add_primary(gpid.get_app_id(), false); } @@ -302,6 +311,7 @@ pc_status partition_guardian::on_missing_primary(meta_view &view, const dsn::gpi std::string reason; config_context &cc = *get_config_context(*view.apps, gpid); action.node.set_invalid(); + action.__set_hp_node(host_port()); for (int i = 0; i < cc.dropped.size(); ++i) { const dropped_replica &dr = cc.dropped[i]; char time_buf[30] = {0}; @@ -319,28 +329,31 @@ pc_status partition_guardian::on_missing_primary(meta_view &view, const dsn::gpi dr.last_prepared_decree); } - for (int i = 0; i < pc.last_drops.size(); ++i) { + for (int i = 0; i < pc.hp_last_drops.size(); ++i) { int dropped_index = -1; for (int k = 0; k < cc.dropped.size(); k++) { - if (cc.dropped[k].node == pc.last_drops[i]) { + if (cc.dropped[k].node == pc.hp_last_drops[i]) { dropped_index = k; break; } } - LOG_INFO("{}: config_context.last_drops[{}]: node({}), dropped_index({})", + LOG_INFO("{}: config_context.last_drops[{}({})]: node({}), dropped_index({})", gpid_name, i, + pc.hp_last_drops[i], pc.last_drops[i], dropped_index); } - if (pc.last_drops.size() == 1) { - LOG_WARNING("{}: the only node({}) is dead, waiting it to come back", + if (pc.hp_last_drops.size() == 1) { + LOG_WARNING("{}: the only node({}({})) is dead, waiting it to come back", gpid_name, + pc.hp_last_drops.back(), pc.last_drops.back()); + action.hp_node = pc.hp_last_drops.back(); action.node = pc.last_drops.back(); } else { - std::vector nodes(pc.last_drops.end() - 2, pc.last_drops.end()); + std::vector nodes(pc.hp_last_drops.end() - 2, pc.hp_last_drops.end()); std::vector collected_info(2); bool ready = true; @@ -353,11 +366,11 @@ pc_status partition_guardian::on_missing_primary(meta_view &view, const dsn::gpi node_state *ns = get_node_state(*view.nodes, nodes[i], false); if (ns == nullptr || !ns->alive()) { ready = false; - reason = "the last dropped node(" + nodes[i].to_std_string() + - ") haven't come back yet"; + reason = + fmt::format("the last dropped node({}) haven't come back yet", nodes[i]); LOG_WARNING("{}: don't select primary: {}", gpid_name, reason); } else { - std::vector::iterator it = cc.find_from_dropped(nodes[i]); + const auto &it = cc.find_from_dropped(nodes[i]); if (it == cc.dropped.end() || it->ballot == invalid_ballot) { if (ns->has_collected()) { LOG_INFO("{}: ignore {}'s replica info as it doesn't exist on " @@ -367,8 +380,8 @@ pc_status partition_guardian::on_missing_primary(meta_view &view, const dsn::gpi collected_info[i] = {nodes[i], 0, -1, -1, -1}; } else { ready = false; - reason = "the last dropped node(" + nodes[i].to_std_string() + - ") is unavailable because "; + reason = fmt::format( + "the last dropped node({}) is unavailable because ", nodes[i]); if (it == cc.dropped.end()) { reason += "the node is not exist in dropped_nodes"; } else { @@ -405,18 +418,22 @@ pc_status partition_guardian::on_missing_primary(meta_view &view, const dsn::gpi if (larger_pd >= pc.last_committed_decree && larger_pd >= larger_cd) { if (gap1 != 0) { // 1. choose node with larger ballot - action.node = gap1 < 0 ? recent_dead.node : previous_dead.node; + action.hp_node = gap1 < 0 ? recent_dead.node : previous_dead.node; } else if (gap2 != 0) { // 2. choose node with larger last_committed_decree - action.node = gap2 < 0 ? recent_dead.node : previous_dead.node; + action.hp_node = gap2 < 0 ? recent_dead.node : previous_dead.node; } else { // 3. choose node with larger last_prepared_decree - action.node = previous_dead.last_prepared_decree > - recent_dead.last_prepared_decree - ? previous_dead.node - : recent_dead.node; + action.hp_node = previous_dead.last_prepared_decree > + recent_dead.last_prepared_decree + ? previous_dead.node + : recent_dead.node; } - LOG_INFO("{}: select {} as a new primary", gpid_name, action.node); + action.node = _svc->get_dns_resolver()->resolve_address(action.hp_node); + LOG_INFO("{}: select {}({}) as a new primary", + gpid_name, + action.hp_node, + action.node); } else { char buf[1000]; sprintf(buf, @@ -436,11 +453,12 @@ pc_status partition_guardian::on_missing_primary(meta_view &view, const dsn::gpi } } - if (!action.node.is_invalid()) { + if (!action.hp_node.is_invalid()) { + action.__set_hp_target(action.hp_node); action.target = action.node; action.type = config_type::CT_ASSIGN_PRIMARY; - get_newly_partitions(*view.nodes, action.node) + get_newly_partitions(*view.nodes, action.hp_node) ->newly_add_primary(gpid.get_app_id(), false); } else { LOG_WARNING("{}: don't select any node for security reason, administrator can select " @@ -454,7 +472,8 @@ pc_status partition_guardian::on_missing_primary(meta_view &view, const dsn::gpi for (int i = 0; i < cc.dropped.size(); ++i) { const dropped_replica &dr = cc.dropped[i]; ddd_node_info ninfo; - ninfo.node = dr.node; + ninfo.node = _svc->get_dns_resolver()->resolve_address(dr.node); + ninfo.__set_hp_node(dr.node); ninfo.drop_time_ms = dr.time; ninfo.ballot = invalid_ballot; ninfo.last_committed_decree = invalid_decree; @@ -521,6 +540,7 @@ pc_status partition_guardian::on_missing_secondary(meta_view &view, const dsn::g is_emergency = true; } action.node.set_invalid(); + action.__set_hp_node(host_port()); if (is_emergency) { std::ostringstream oss; @@ -554,7 +574,8 @@ pc_status partition_guardian::on_missing_secondary(meta_view &view, const dsn::g cc.prefered_dropped, cc.prefered_dropped, cc.prefered_dropped - 1); - action.node = server.node; + action.hp_node = server.node; + action.node = _svc->get_dns_resolver()->resolve_address(server.node); cc.prefered_dropped--; break; } else { @@ -569,30 +590,31 @@ pc_status partition_guardian::on_missing_secondary(meta_view &view, const dsn::g } } - if (action.node.is_invalid() || in_black_list(action.node)) { - if (!action.node.is_invalid()) { + if (action.hp_node.is_invalid() || in_black_list(action.hp_node)) { + if (!action.hp_node.is_invalid()) { LOG_INFO("gpid({}) refuse to use selected node({}) as it is in black list", gpid, - action.node); + action.hp_node); } newly_partitions *min_server_np = nullptr; for (auto &pairs : *view.nodes) { node_state &ns = pairs.second; - if (!ns.alive() || is_member(pc, ns.addr()) || in_black_list(ns.addr())) + if (!ns.alive() || is_member(pc, ns.host_port()) || in_black_list(ns.host_port())) continue; newly_partitions *np = newly_partitions_ext::get_inited(&ns); if (min_server_np == nullptr || np->less_partitions(*min_server_np, gpid.get_app_id())) { - action.node = ns.addr(); + action.__set_hp_node(ns.host_port()); + action.node = _svc->get_dns_resolver()->resolve_address(ns.host_port()); min_server_np = np; } } - if (!action.node.is_invalid()) { + if (!action.hp_node.is_invalid()) { LOG_INFO("gpid({}): can't find valid node in dropped list to add as secondary, " "choose new node({}) with minimal partitions serving", gpid, - action.node); + action.hp_node); } else { LOG_INFO("gpid({}): can't find valid node in dropped list to add as secondary, " "but also we can't find a new node to add as secondary", @@ -604,10 +626,11 @@ pc_status partition_guardian::on_missing_secondary(meta_view &view, const dsn::g const dropped_replica &server = cc.dropped.back(); if (is_node_alive(*view.nodes, server.node)) { CHECK(!server.node.is_invalid(), "invalid server address, address = {}", server.node); - action.node = server.node; + action.hp_node = server.node; + action.node = _svc->get_dns_resolver()->resolve_address(server.node); } - if (!action.node.is_invalid()) { + if (!action.hp_node.is_invalid()) { LOG_INFO("gpid({}): choose node({}) as secondary coz it is last_dropped_node and is " "alive now", gpid, @@ -620,11 +643,12 @@ pc_status partition_guardian::on_missing_secondary(meta_view &view, const dsn::g } } - if (!action.node.is_invalid()) { + if (!action.hp_node.is_invalid()) { action.type = config_type::CT_ADD_SECONDARY; action.target = pc.primary; + action.__set_hp_target(pc.hp_primary); - newly_partitions *np = get_newly_partitions(*(view.nodes), action.node); + newly_partitions *np = get_newly_partitions(*(view.nodes), action.hp_node); CHECK_NOTNULL(np, ""); np->newly_add_partition(gpid.get_app_id()); @@ -639,9 +663,9 @@ pc_status partition_guardian::on_redundant_secondary(meta_view &view, const dsn: const node_mapper &nodes = *(view.nodes); const partition_configuration &pc = *get_config(*(view.apps), gpid); int target = 0; - int load = nodes.find(pc.secondaries.front())->second.partition_count(); - for (int i = 0; i != pc.secondaries.size(); ++i) { - int l = nodes.find(pc.secondaries[i])->second.partition_count(); + int load = nodes.find(pc.hp_secondaries.front())->second.partition_count(); + for (int i = 0; i != pc.hp_secondaries.size(); ++i) { + int l = nodes.find(pc.hp_secondaries[i])->second.partition_count(); if (l > load) { load = l; target = i; @@ -652,6 +676,8 @@ pc_status partition_guardian::on_redundant_secondary(meta_view &view, const dsn: action.type = config_type::CT_REMOVE; action.node = pc.secondaries[target]; action.target = pc.primary; + action.hp_node = pc.hp_secondaries[target]; + action.hp_target = pc.hp_primary; // TODO: treat remove as cure proposals too get_config_context(*view.apps, gpid)->lb_actions.assign_balancer_proposals({action}); @@ -662,11 +688,11 @@ void partition_guardian::finish_cure_proposal(meta_view &view, const dsn::gpid &gpid, const configuration_proposal_action &act) { - newly_partitions *np = get_newly_partitions(*(view.nodes), act.node); + newly_partitions *np = get_newly_partitions(*(view.nodes), act.hp_node); if (np == nullptr) { LOG_INFO("can't get the newly_partitions extension structure for node({}), " "the node may be dead and removed", - act.node); + act.hp_node); } else { if (act.type == config_type::CT_ASSIGN_PRIMARY) { np->newly_remove_primary(gpid.get_app_id(), false); @@ -751,15 +777,15 @@ partition_guardian::ctrl_assign_secondary_black_list(const std::vector addr_list; + std::set hp_list; for (const std::string &s : ip_ports) { - dsn::rpc_address addr; - if (!addr.from_string_ipv4(s.c_str())) { + dsn::host_port hp; + if (!hp.from_string(s)) { return invalid_arguments; } - addr_list.insert(addr); + hp_list.insert(hp); } - _assign_secondary_black_list = std::move(addr_list); + _assign_secondary_black_list = std::move(hp_list); return "set ok"; } diff --git a/src/meta/partition_guardian.h b/src/meta/partition_guardian.h index 9c77da7e52..fd0d247e11 100644 --- a/src/meta/partition_guardian.h +++ b/src/meta/partition_guardian.h @@ -29,7 +29,7 @@ #include "dsn.layer2_types.h" #include "meta_admin_types.h" #include "meta_data.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/command_manager.h" #include "utils/zlocks.h" @@ -83,7 +83,7 @@ class partition_guardian _ddd_partitions[partition.config.pid] = std::move(partition); } - bool in_black_list(dsn::rpc_address addr) + bool in_black_list(dsn::host_port addr) { dsn::zauto_read_lock l(_black_list_lock); return _assign_secondary_black_list.count(addr) != 0; @@ -99,7 +99,7 @@ class partition_guardian // but when adding secondary, the black list is accessed in THREADPOOL_META_STATE // so we need a lock to protect it dsn::zrwlock_nr _black_list_lock; // [ - std::set _assign_secondary_black_list; + std::set _assign_secondary_black_list; // ] std::vector> _cmds; diff --git a/src/meta/server_load_balancer.cpp b/src/meta/server_load_balancer.cpp index 04b53f6720..219dc33f0a 100644 --- a/src/meta/server_load_balancer.cpp +++ b/src/meta/server_load_balancer.cpp @@ -32,6 +32,7 @@ #include "dsn.layer2_types.h" #include "meta/meta_data.h" #include "meta_admin_types.h" +#include "runtime/rpc/rpc_address.h" #include "utils/error_code.h" #include "utils/fmt_logging.h" @@ -130,7 +131,7 @@ void newly_partitions::newly_remove_partition(int32_t app_id) --total_partitions; } -newly_partitions *get_newly_partitions(node_mapper &mapper, const dsn::rpc_address &addr) +newly_partitions *get_newly_partitions(node_mapper &mapper, const dsn::host_port &addr) { node_state *ns = get_node_state(mapper, addr, false); if (ns == nullptr) @@ -174,9 +175,10 @@ void server_load_balancer::register_proposals(meta_view view, // for these proposals, they should keep the target empty and // the meta-server will fill primary as target. if (act.target.is_invalid()) { - if (!pc.primary.is_invalid()) + if (!pc.hp_primary.is_invalid()) { act.target = pc.primary; - else { + act.__set_hp_target(pc.hp_primary); + } else { resp.err = ERR_INVALID_PARAMETERS; return; } diff --git a/src/meta/server_load_balancer.h b/src/meta/server_load_balancer.h index 774b7bb86a..7d03be5b9f 100644 --- a/src/meta/server_load_balancer.h +++ b/src/meta/server_load_balancer.h @@ -35,7 +35,7 @@ #include #include "meta_data.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/extensible_object.h" namespace dsn { @@ -83,7 +83,7 @@ class newly_partitions static void s_delete(void *_this); }; typedef dsn::object_extension_helper newly_partitions_ext; -newly_partitions *get_newly_partitions(node_mapper &mapper, const dsn::rpc_address &addr); +newly_partitions *get_newly_partitions(node_mapper &mapper, const dsn::host_port &addr); // The interface of the server load balancer which defines the scheduling policy of how to // place the partition replica to the nodes. @@ -165,10 +165,10 @@ class server_load_balancer virtual std::string get_balance_operation_count(const std::vector &args) = 0; public: - typedef std::function node_comparator; + typedef std::function node_comparator; static node_comparator primary_comparator(const node_mapper &nodes) { - return [&nodes](const rpc_address &r1, const rpc_address &r2) { + return [&nodes](const host_port &r1, const host_port &r2) { int p1 = nodes.find(r1)->second.primary_count(); int p2 = nodes.find(r2)->second.primary_count(); if (p1 != p2) @@ -179,7 +179,7 @@ class server_load_balancer static node_comparator partition_comparator(const node_mapper &nodes) { - return [&nodes](const rpc_address &r1, const rpc_address &r2) { + return [&nodes](const host_port &r1, const host_port &r2) { int p1 = nodes.find(r1)->second.partition_count(); int p2 = nodes.find(r2)->second.partition_count(); if (p1 != p2) diff --git a/src/meta/server_state.cpp b/src/meta/server_state.cpp index 9cb3e5cccc..e029b10347 100644 --- a/src/meta/server_state.cpp +++ b/src/meta/server_state.cpp @@ -38,7 +38,6 @@ #include // IWYU pragma: keep #include #include -#include #include #include "app_env_validator.h" @@ -60,7 +59,9 @@ #include "metadata_types.h" #include "replica_admin_types.h" #include "runtime/api_layer1.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "security/access_controller.h" @@ -552,6 +553,7 @@ error_code server_state::sync_apps_to_remote_storage() init_app_partition_node(app, i, init_callback); } } + tracker.wait_outstanding_tasks(); t = _meta_svc->get_remote_storage()->set_data(_apps_root, blob(unlock_state, 0, strlen(unlock_state)), @@ -582,6 +584,9 @@ dsn::error_code server_state::sync_apps_from_remote_storage() const blob &value) mutable { if (ec == ERR_OK) { partition_configuration pc; + pc.__isset.hp_secondaries = true; + pc.__isset.hp_last_drops = true; + pc.__isset.hp_primary = true; dsn::json::json_forwarder::decode(value, pc); CHECK(pc.pid.get_app_id() == app->app_id && @@ -590,8 +595,8 @@ dsn::error_code server_state::sync_apps_from_remote_storage() { zauto_write_lock l(_lock); app->partitions[partition_id] = pc; - for (const dsn::rpc_address &addr : pc.last_drops) { - app->helpers->contexts[partition_id].record_drop_history(addr); + for (const auto &hp : pc.hp_last_drops) { + app->helpers->contexts[partition_id].record_drop_history(hp); } if (app->status == app_status::AS_CREATING && @@ -735,11 +740,12 @@ void server_state::initialize_node_state() for (auto &app_pair : _all_apps) { app_state &app = *(app_pair.second); for (partition_configuration &pc : app.partitions) { - if (!pc.primary.is_invalid()) { - node_state *ns = get_node_state(_nodes, pc.primary, true); + if (!pc.hp_primary.is_invalid()) { + node_state *ns = get_node_state(_nodes, pc.hp_primary, true); ns->put_partition(pc.pid, true); } - for (auto &ep : pc.secondaries) { + + for (auto &ep : pc.hp_secondaries) { CHECK(!ep.is_invalid(), "invalid secondary address, addr = {}", ep); node_state *ns = get_node_state(_nodes, ep, true); ns->put_partition(pc.pid, false); @@ -802,7 +808,12 @@ void server_state::on_config_sync(configuration_query_by_node_rpc rpc) bool reject_this_request = false; response.__isset.gc_replicas = false; - LOG_INFO("got config sync request from {}, stored_replicas_count({})", + + host_port hp_node; + GET_HOST_PORT(request, node, hp_node); + + LOG_INFO("got config sync request from {}({}), stored_replicas_count({})", + hp_node, request.node, request.stored_replicas.size()); @@ -810,9 +821,9 @@ void server_state::on_config_sync(configuration_query_by_node_rpc rpc) zauto_read_lock l(_lock); // sync the partitions to the replica server - node_state *ns = get_node_state(_nodes, request.node, false); + node_state *ns = get_node_state(_nodes, hp_node, false); if (ns == nullptr) { - LOG_INFO("node({}) not found in meta server", request.node); + LOG_INFO("node({}({})) not found in meta server", hp_node, request.node); response.err = ERR_OBJECT_NOT_FOUND; } else { response.err = ERR_OK; @@ -831,7 +842,10 @@ void server_state::on_config_sync(configuration_query_by_node_rpc rpc) // when register child partition, stage is config_status::pending_remote_sync, // but cc.pending_sync_request is not set, see more in function // 'register_child_on_meta' - if (req == nullptr || req->node == request.node) + if (req == nullptr) + return false; + if ((req->__isset.hp_node && req->hp_node == hp_node) || + req->node == request.node) return false; } @@ -866,7 +880,8 @@ void server_state::on_config_sync(configuration_query_by_node_rpc rpc) // the app is deleted but not expired, we need to ignore it // if the app is deleted and expired, we need to gc it for (const replica_info &rep : replicas) { - LOG_DEBUG("receive stored replica from {}, pid({})", request.node, rep.pid); + LOG_DEBUG( + "receive stored replica from {}({}), pid({})", hp_node, request.node, rep.pid); std::shared_ptr app = get_app(rep.pid.get_app_id()); if (app == nullptr || rep.pid.get_partition_index() >= app->partition_count) { // This app has garbage partition after cancel split, the canceled child @@ -876,55 +891,63 @@ void server_state::on_config_sync(configuration_query_by_node_rpc rpc) rep.status == partition_status::PS_ERROR) { response.gc_replicas.push_back(rep); LOG_WARNING( - "notify node({}) to gc replica({}) because it is useless partition " + "notify node({}({})) to gc replica({}) because it is useless partition " "which is caused by cancel split", + hp_node, request.node, rep.pid); } else { // app is not recognized or partition is not recognized CHECK(false, - "gpid({}) on node({}) is not exist on meta server, administrator " + "gpid({}) on node({}({})) is not exist on meta server, administrator " "should check consistency of meta data", rep.pid, + hp_node, request.node); } } else if (app->status == app_status::AS_DROPPED) { if (app->expire_second == 0) { - LOG_INFO("gpid({}) on node({}) is of dropped table, but expire second is " - "not specified, do not delete it for safety reason", - rep.pid, - request.node); + LOG_INFO( + "gpid({}) on node({}({})) is of dropped table, but expire second is " + "not specified, do not delete it for safety reason", + rep.pid, + hp_node, + request.node); } else if (has_seconds_expired(app->expire_second)) { // can delete replica only when expire second is explicitely specified and // expired. if (level <= meta_function_level::fl_steady) { - LOG_INFO("gpid({}) on node({}) is of dropped and expired table, but " - "current function level is {}, do not delete it for safety " - "reason", - rep.pid, - request.node, - _meta_function_level_VALUES_TO_NAMES.find(level)->second); + LOG_INFO( + "gpid({}) on node({}({})) is of dropped and expired table, but " + "current function level is {}, do not delete it for safety " + "reason", + rep.pid, + hp_node, + request.node, + _meta_function_level_VALUES_TO_NAMES.find(level)->second); } else { response.gc_replicas.push_back(rep); - LOG_WARNING("notify node({}) to gc replica({}) coz the app is " + LOG_WARNING("notify node({}({})) to gc replica({}) coz the app is " "dropped and expired", + hp_node, request.node, rep.pid); } } } else if (app->status == app_status::AS_AVAILABLE) { - bool is_useful_replica = - collect_replica({&_all_apps, &_nodes}, request.node, rep); + bool is_useful_replica = collect_replica({&_all_apps, &_nodes}, hp_node, rep); if (!is_useful_replica) { if (level <= meta_function_level::fl_steady) { - LOG_INFO("gpid({}) on node({}) is useless, but current function " + LOG_INFO("gpid({}) on node({}({})) is useless, but current function " "level is {}, do not delete it for safety reason", rep.pid, + hp_node, request.node, _meta_function_level_VALUES_TO_NAMES.find(level)->second); } else { response.gc_replicas.push_back(rep); - LOG_WARNING("notify node({}) to gc replica({}) coz it is useless", + LOG_WARNING("notify node({}({})) to gc replica({}) coz it is useless", + hp_node, request.node, rep.pid); } @@ -942,8 +965,9 @@ void server_state::on_config_sync(configuration_query_by_node_rpc rpc) response.err = ERR_BUSY; response.partitions.clear(); } - LOG_INFO("send config sync response to {}, err({}), partitions_count({}), " + LOG_INFO("send config sync response to {}({}), err({}), partitions_count({}), " "gc_replicas_count({})", + hp_node, request.node, response.err, response.partitions.size(), @@ -1042,6 +1066,7 @@ void server_state::init_app_partition_node(std::shared_ptr &app, std::string app_partition_path = get_partition_path(*app, pidx); dsn::blob value = dsn::json::json_forwarder::encode(app->partitions[pidx]); + _meta_svc->get_remote_storage()->create_node( app_partition_path, LPC_META_STATE_HIGH, on_create_app_partition, value); } @@ -1422,13 +1447,16 @@ void server_state::list_apps(const configuration_list_apps_request &request, response.err = dsn::ERR_OK; } -void server_state::send_proposal(rpc_address target, const configuration_update_request &proposal) +void server_state::send_proposal(host_port target, const configuration_update_request &proposal) { - LOG_INFO("send proposal {} for gpid({}), ballot = {}, target = {}, node = {}", + host_port hp_node; + GET_HOST_PORT(proposal, node, hp_node); + LOG_INFO("send proposal {} for gpid({}), ballot = {}, target = {}, node = {}({})", ::dsn::enum_to_string(proposal.type), proposal.config.pid, proposal.config.ballot, target, + hp_node, proposal.node); dsn::message_ex *msg = dsn::message_ex::create_request(RPC_CONFIG_PROPOSAL, 0, proposal.config.pid.thread_hash()); @@ -1444,8 +1472,9 @@ void server_state::send_proposal(const configuration_proposal_action &action, request.info = app; request.type = action.type; request.node = action.node; + request.__set_hp_node(action.hp_node); request.config = pc; - send_proposal(action.target, request); + send_proposal(action.hp_target, request); } void server_state::request_check(const partition_configuration &old, @@ -1455,39 +1484,84 @@ void server_state::request_check(const partition_configuration &old, switch (request.type) { case config_type::CT_ASSIGN_PRIMARY: - CHECK_NE(old.primary, request.node); - CHECK(std::find(old.secondaries.begin(), old.secondaries.end(), request.node) == - old.secondaries.end(), - ""); + if (request.__isset.hp_node) { + CHECK_NE(old.hp_primary, request.hp_node); + CHECK(std::find(old.hp_secondaries.begin(), + old.hp_secondaries.end(), + request.hp_node) == old.hp_secondaries.end(), + ""); + } else { + CHECK_NE(old.primary, request.node); + CHECK(std::find(old.secondaries.begin(), old.secondaries.end(), request.node) == + old.secondaries.end(), + ""); + } break; case config_type::CT_UPGRADE_TO_PRIMARY: - CHECK_NE(old.primary, request.node); - CHECK(std::find(old.secondaries.begin(), old.secondaries.end(), request.node) != - old.secondaries.end(), - ""); + if (request.__isset.hp_node) { + CHECK_NE(old.hp_primary, request.hp_node); + CHECK(std::find(old.hp_secondaries.begin(), + old.hp_secondaries.end(), + request.hp_node) != old.hp_secondaries.end(), + ""); + } else { + CHECK_NE(old.primary, request.node); + CHECK(std::find(old.secondaries.begin(), old.secondaries.end(), request.node) != + old.secondaries.end(), + ""); + } break; case config_type::CT_DOWNGRADE_TO_SECONDARY: - CHECK_EQ(old.primary, request.node); - CHECK(std::find(old.secondaries.begin(), old.secondaries.end(), request.node) == - old.secondaries.end(), - ""); + if (request.__isset.hp_node) { + CHECK_EQ(old.hp_primary, request.hp_node); + CHECK(std::find(old.hp_secondaries.begin(), + old.hp_secondaries.end(), + request.hp_node) == old.hp_secondaries.end(), + ""); + } else { + CHECK_EQ(old.primary, request.node); + CHECK(std::find(old.secondaries.begin(), old.secondaries.end(), request.node) == + old.secondaries.end(), + ""); + } break; case config_type::CT_DOWNGRADE_TO_INACTIVE: case config_type::CT_REMOVE: - CHECK(old.primary == request.node || - std::find(old.secondaries.begin(), old.secondaries.end(), request.node) != - old.secondaries.end(), - ""); + if (request.__isset.hp_node) { + CHECK(old.hp_primary == request.hp_node || + std::find(old.hp_secondaries.begin(), + old.hp_secondaries.end(), + request.hp_node) != old.hp_secondaries.end(), + ""); + } else { + CHECK(old.primary == request.node || + std::find(old.secondaries.begin(), old.secondaries.end(), request.node) != + old.secondaries.end(), + ""); + } break; case config_type::CT_UPGRADE_TO_SECONDARY: - CHECK_NE(old.primary, request.node); - CHECK(std::find(old.secondaries.begin(), old.secondaries.end(), request.node) == - old.secondaries.end(), - ""); + if (request.__isset.hp_node) { + CHECK_NE(old.hp_primary, request.hp_node); + CHECK(std::find(old.hp_secondaries.begin(), + old.hp_secondaries.end(), + request.hp_node) == old.hp_secondaries.end(), + ""); + } else { + CHECK_NE(old.primary, request.node); + CHECK(std::find(old.secondaries.begin(), old.secondaries.end(), request.node) == + old.secondaries.end(), + ""); + } break; case config_type::CT_PRIMARY_FORCE_UPDATE_BALLOT: - CHECK_EQ(old.primary, new_config.primary); - CHECK(old.secondaries == new_config.secondaries, ""); + if (request.__isset.hp_node) { + CHECK_EQ(old.hp_primary, new_config.hp_primary); + CHECK(old.hp_secondaries == new_config.hp_secondaries, ""); + } else { + CHECK_EQ(old.primary, new_config.primary); + CHECK(old.secondaries == new_config.secondaries, ""); + } break; default: break; @@ -1506,6 +1580,9 @@ void server_state::update_configuration_locally( health_status old_health_status = partition_health_status(old_cfg, min_2pc_count); health_status new_health_status = partition_health_status(new_cfg, min_2pc_count); + host_port hp_node; + GET_HOST_PORT(*config_request, node, hp_node); + if (app.is_stateful) { CHECK(old_cfg.ballot == invalid_ballot || old_cfg.ballot + 1 == new_cfg.ballot, "invalid configuration update request, old ballot {}, new ballot {}", @@ -1514,8 +1591,9 @@ void server_state::update_configuration_locally( node_state *ns = nullptr; if (config_request->type != config_type::CT_DROP_PARTITION) { - ns = get_node_state(_nodes, config_request->node, false); - CHECK_NOTNULL(ns, "invalid node address, address = {}", config_request->node); + ns = get_node_state(_nodes, hp_node, false); + CHECK_NOTNULL( + ns, "invalid node address, address = {}({})", hp_node, config_request->node); } #ifndef NDEBUG request_check(old_cfg, *config_request); @@ -1543,7 +1621,7 @@ void server_state::update_configuration_locally( break; case config_type::CT_DROP_PARTITION: - for (const rpc_address &node : new_cfg.last_drops) { + for (const auto &node : new_cfg.hp_last_drops) { ns = get_node_state(_nodes, node, false); if (ns != nullptr) ns->remove_partition(gpid, false); @@ -1556,9 +1634,17 @@ void server_state::update_configuration_locally( break; case config_type::CT_REGISTER_CHILD: { ns->put_partition(gpid, true); - for (auto &secondary : config_request->config.secondaries) { - auto secondary_node = get_node_state(_nodes, secondary, false); - secondary_node->put_partition(gpid, false); + if (config_request->config.__isset.hp_secondaries) { + for (const auto &secondary : config_request->config.hp_secondaries) { + auto secondary_node = get_node_state(_nodes, secondary, false); + secondary_node->put_partition(gpid, false); + } + } else { + for (const auto &secondary : config_request->config.secondaries) { + auto hp_secondary = host_port(secondary); + auto secondary_node = get_node_state(_nodes, hp_secondary, false); + secondary_node->put_partition(gpid, false); + } } break; } @@ -1569,22 +1655,25 @@ void server_state::update_configuration_locally( } else { CHECK_EQ(old_cfg.ballot, new_cfg.ballot); + auto host_node = host_port(config_request->host_node); new_cfg = old_cfg; partition_configuration_stateless pcs(new_cfg); if (config_request->type == config_type::type::CT_ADD_SECONDARY) { - pcs.hosts().emplace_back(config_request->host_node); - pcs.workers().emplace_back(config_request->node); + pcs.hosts().emplace_back(host_node); + pcs.workers().emplace_back(hp_node); } else { - auto it = - std::remove(pcs.hosts().begin(), pcs.hosts().end(), config_request->host_node); + auto it = std::remove(pcs.hosts().begin(), pcs.hosts().end(), host_node); pcs.hosts().erase(it); - it = std::remove(pcs.workers().begin(), pcs.workers().end(), config_request->node); + it = std::remove(pcs.workers().begin(), pcs.workers().end(), hp_node); pcs.workers().erase(it); } - auto it = _nodes.find(config_request->host_node); - CHECK(it != _nodes.end(), "invalid node address, address = {}", config_request->host_node); + auto it = _nodes.find(host_node); + CHECK(it != _nodes.end(), + "invalid node address, address = {}({})", + host_node, + config_request->host_node); if (config_type::CT_REMOVE == config_request->type) { it->second.remove_partition(gpid, false); } else { @@ -1710,8 +1799,9 @@ void server_state::on_update_configuration_on_remote_reply( } else { config_request->type = action.type; config_request->node = action.node; + config_request->__set_hp_node(action.hp_node); config_request->info = *app; - send_proposal(action.target, *config_request); + send_proposal(action.hp_target, *config_request); } } } @@ -1760,16 +1850,25 @@ void server_state::drop_partition(std::shared_ptr &app, int pidx) request.info = *app; request.type = config_type::CT_DROP_PARTITION; request.node = pc.primary; + request.__set_hp_node(pc.hp_primary); request.config = pc; + for (auto &node : pc.hp_secondaries) { + maintain_drops(request.config.hp_last_drops, node, request.type); + } for (auto &node : pc.secondaries) { maintain_drops(request.config.last_drops, node, request.type); } + if (!pc.hp_primary.is_invalid()) { + maintain_drops(request.config.hp_last_drops, pc.hp_primary, request.type); + } if (!pc.primary.is_invalid()) { maintain_drops(request.config.last_drops, pc.primary, request.type); } request.config.primary.set_invalid(); request.config.secondaries.clear(); + request.config.hp_primary.reset(); + request.config.hp_secondaries.clear(); CHECK_EQ((pc.partition_flags & pc_flags::dropped), 0); request.config.partition_flags |= pc_flags::dropped; @@ -1812,8 +1911,9 @@ void server_state::downgrade_primary_to_inactive(std::shared_ptr &app return; } else { LOG_WARNING("gpid({}) is syncing another request with remote, cancel it due to the " - "primary({}) is down", + "primary({}({})) is down", pc.pid, + pc.hp_primary, pc.primary); cc.cancel_sync(); } @@ -1826,8 +1926,11 @@ void server_state::downgrade_primary_to_inactive(std::shared_ptr &app request.config = pc; request.type = config_type::CT_DOWNGRADE_TO_INACTIVE; request.node = pc.primary; + request.__set_hp_node(pc.hp_primary); request.config.ballot++; request.config.primary.set_invalid(); + request.config.__set_hp_primary(host_port()); + maintain_drops(request.config.hp_last_drops, pc.hp_primary, request.type); maintain_drops(request.config.last_drops, pc.primary, request.type); cc.stage = config_status::pending_remote_sync; @@ -1839,19 +1942,20 @@ void server_state::downgrade_primary_to_inactive(std::shared_ptr &app void server_state::downgrade_secondary_to_inactive(std::shared_ptr &app, int pidx, - const rpc_address &node) + const host_port &node) { partition_configuration &pc = app->partitions[pidx]; config_context &cc = app->helpers->contexts[pidx]; - CHECK(!pc.primary.is_invalid(), "this shouldn't be called if the primary is invalid"); + CHECK(!pc.hp_primary.is_invalid(), "this shouldn't be called if the primary is invalid"); if (config_status::pending_remote_sync != cc.stage) { configuration_update_request request; request.info = *app; request.config = pc; request.type = config_type::CT_DOWNGRADE_TO_INACTIVE; - request.node = node; - send_proposal(pc.primary, request); + request.node = _meta_svc->get_dns_resolver()->resolve_address(node); + request.__set_hp_node(node); + send_proposal(pc.hp_primary, request); } else { LOG_INFO("gpid({}.{}) is syncing with remote storage, ignore the remove seconary({})", app->app_id, @@ -1862,34 +1966,40 @@ void server_state::downgrade_secondary_to_inactive(std::shared_ptr &a void server_state::downgrade_stateless_nodes(std::shared_ptr &app, int pidx, - const rpc_address &address) + const host_port &address) { std::shared_ptr req = std::make_shared(); req->info = *app; req->type = config_type::CT_REMOVE; - req->host_node = address; + req->host_node = _meta_svc->get_dns_resolver()->resolve_address(address); req->node.set_invalid(); + req->hp_node.reset(); req->config = app->partitions[pidx]; config_context &cc = app->helpers->contexts[pidx]; partition_configuration &pc = req->config; unsigned i = 0; - for (; i < pc.secondaries.size(); ++i) { - if (pc.secondaries[i] == address) { + for (; i < pc.hp_secondaries.size(); ++i) { + if (pc.hp_secondaries[i] == address) { req->node = pc.last_drops[i]; + req->__set_hp_node(pc.hp_last_drops[i]); break; } } CHECK(!req->node.is_invalid(), "invalid node address, address = {}", req->node); // remove host_node & node from secondaries/last_drops, as it will be sync to remote storage - for (++i; i < pc.secondaries.size(); ++i) { + for (++i; i < pc.hp_secondaries.size(); ++i) { pc.secondaries[i - 1] = pc.secondaries[i]; pc.last_drops[i - 1] = pc.last_drops[i]; + pc.hp_secondaries[i - 1] = pc.hp_secondaries[i]; + pc.hp_last_drops[i - 1] = pc.hp_last_drops[i]; } pc.secondaries.pop_back(); pc.last_drops.pop_back(); + pc.hp_secondaries.pop_back(); + pc.hp_last_drops.pop_back(); if (config_status::pending_remote_sync == cc.stage) { LOG_WARNING("gpid({}) is syncing another request with remote, cancel it due to meta is " @@ -1956,6 +2066,7 @@ void server_state::on_update_configuration( msg->release_ref(); return; } else { + maintain_drops(cfg_request->config.hp_last_drops, cfg_request->hp_node, cfg_request->type); maintain_drops(cfg_request->config.last_drops, cfg_request->node, cfg_request->type); } @@ -1975,14 +2086,14 @@ void server_state::on_update_configuration( void server_state::on_partition_node_dead(std::shared_ptr &app, int pidx, - const dsn::rpc_address &address) + const dsn::host_port &address) { partition_configuration &pc = app->partitions[pidx]; if (app->is_stateful) { if (is_primary(pc, address)) downgrade_primary_to_inactive(app, pidx); else if (is_secondary(pc, address)) { - if (!pc.primary.is_invalid()) + if (!pc.hp_primary.is_invalid()) downgrade_secondary_to_inactive(app, pidx, address); else if (is_secondary(pc, address)) { LOG_INFO("gpid({}): secondary({}) is down, ignored it due to no primary for this " @@ -1998,7 +2109,7 @@ void server_state::on_partition_node_dead(std::shared_ptr &app, } } -void server_state::on_change_node_state(rpc_address node, bool is_alive) +void server_state::on_change_node_state(host_port node, bool is_alive) { LOG_DEBUG("change node({}) state to {}", node, is_alive ? "alive" : "dead"); zauto_write_lock l(_lock); @@ -2048,7 +2159,7 @@ void server_state::on_propose_balancer(const configuration_balancer_request &req error_code server_state::construct_apps(const std::vector &query_app_responses, - const std::vector &replica_nodes, + const std::vector &replica_nodes, std::string &hint_message) { int max_app_id = 0; @@ -2142,7 +2253,7 @@ server_state::construct_apps(const std::vector &query_a error_code server_state::construct_partitions( const std::vector &query_replica_responses, - const std::vector &replica_nodes, + const std::vector &replica_nodes, bool skip_lost_partitions, std::string &hint_message) { @@ -2177,18 +2288,20 @@ error_code server_state::construct_partitions( LOG_INFO("ignore constructing partitions for dropping app({})", app->app_id); } else { for (partition_configuration &pc : app->partitions) { - bool is_succeed = - construct_replica({&_all_apps, &_nodes}, pc.pid, app->max_replica_count); + bool is_succeed = construct_replica({&_all_apps, &_nodes}, + pc.pid, + app->max_replica_count, + _meta_svc->get_dns_resolver()); if (is_succeed) { LOG_INFO("construct partition({}.{}) succeed: {}", app->app_id, pc.pid.get_partition_index(), boost::lexical_cast(pc)); - if (pc.last_drops.size() + 1 < pc.max_replica_count) { + if (pc.hp_last_drops.size() + 1 < pc.max_replica_count) { std::ostringstream oss; oss << "WARNING: partition(" << app->app_id << "." << pc.pid.get_partition_index() << ") only collects " - << (pc.last_drops.size() + 1) << "/" << pc.max_replica_count + << (pc.hp_last_drops.size() + 1) << "/" << pc.max_replica_count << " of replicas, may lost data" << std::endl; hint_message += oss.str(); } @@ -2231,7 +2344,7 @@ error_code server_state::construct_partitions( } dsn::error_code -server_state::sync_apps_from_replica_nodes(const std::vector &replica_nodes, +server_state::sync_apps_from_replica_nodes(const std::vector &replica_nodes, bool skip_bad_nodes, bool skip_lost_partitions, std::string &hint_message) @@ -2248,8 +2361,10 @@ server_state::sync_apps_from_replica_nodes(const std::vector & auto app_query_req = std::make_unique(); app_query_req->meta_server = dsn_primary_address(); + app_query_req->__set_hp_meta_server(dsn_primary_host_port()); query_app_info_rpc app_rpc(std::move(app_query_req), RPC_QUERY_APP_INFO); - app_rpc.call(replica_nodes[i], + auto addr = _meta_svc->get_dns_resolver()->resolve_address(replica_nodes[i]); + app_rpc.call(addr, &tracker, [app_rpc, i, &replica_nodes, &query_app_errors, &query_app_responses]( error_code err) mutable { @@ -2266,10 +2381,10 @@ server_state::sync_apps_from_replica_nodes(const std::vector & }); auto replica_query_req = std::make_unique(); - replica_query_req->node = replica_nodes[i]; + replica_query_req->node = addr; query_replica_info_rpc replica_rpc(std::move(replica_query_req), RPC_QUERY_REPLICA_INFO); replica_rpc.call( - replica_nodes[i], + addr, &tracker, [replica_rpc, i, &replica_nodes, &query_replica_errors, &query_replica_responses]( error_code err) mutable { @@ -2307,16 +2422,18 @@ server_state::sync_apps_from_replica_nodes(const std::vector & failed_count++; query_app_errors[i] = err; query_replica_errors[i] = err; - std::ostringstream oss; if (skip_bad_nodes) { - oss << "WARNING: collect app and replica info from node(" << replica_nodes[i] - << ") failed with err(" << err << "), skip the bad node" << std::endl; + hint_message += fmt::format("WARNING: collect app and replica info from node({}) " + "failed with err({}), skip the bad node", + replica_nodes[i], + err); } else { - oss << "ERROR: collect app and replica info from node(" << replica_nodes[i] - << ") failed with err(" << err - << "), you can skip it by set skip_bad_nodes option" << std::endl; + hint_message += + fmt::format("ERROR: collect app and replica info from node({}) failed with " + "err({}), you can skip it by set skip_bad_nodes option", + replica_nodes[i], + err); } - hint_message += oss.str(); } else { succeed_count++; } @@ -2358,8 +2475,16 @@ void server_state::on_start_recovery(const configuration_recovery_request &req, req.skip_bad_nodes ? "true" : "false", req.skip_lost_partitions ? "true" : "false"); - resp.err = sync_apps_from_replica_nodes( - req.recovery_set, req.skip_bad_nodes, req.skip_lost_partitions, resp.hint_message); + if (req.__isset.hp_recovery_set) { + resp.err = sync_apps_from_replica_nodes( + req.hp_recovery_set, req.skip_bad_nodes, req.skip_lost_partitions, resp.hint_message); + } else { + auto hp_recovery_set = std::vector(); + host_port::fill_host_ports_from_addresses(req.recovery_set, hp_recovery_set); + resp.err = sync_apps_from_replica_nodes( + hp_recovery_set, req.skip_bad_nodes, req.skip_lost_partitions, resp.hint_message); + } + if (resp.err != dsn::ERR_OK) { LOG_ERROR("sync apps from replica nodes failed when do recovery, err = {}", resp.err); _all_apps.clear(); @@ -2394,7 +2519,7 @@ bool server_state::can_run_balancer() if (iter->second.partition_count() != 0) { LOG_INFO( "don't do replica migration coz dead node({}) has {} partitions not removed", - iter->second.addr(), + iter->second.host_port(), iter->second.partition_count()); return false; } @@ -2463,7 +2588,7 @@ bool server_state::check_all_partitions() std::vector add_secondary_actions; std::vector add_secondary_gpids; std::vector add_secondary_proposed; - std::map add_secondary_running_nodes; // node --> running_count + std::map add_secondary_running_nodes; // node --> running_count for (auto &app_pair : _exist_apps) { std::shared_ptr &app = app_pair.second; if (app->status == app_status::AS_CREATING || app->status == app_status::AS_DROPPING) { @@ -2508,10 +2633,11 @@ bool server_state::check_all_partitions() for (int i = 0; i < add_secondary_actions.size(); ++i) { gpid &pid = add_secondary_gpids[i]; partition_configuration &pc = *get_config(_all_apps, pid); - if (!add_secondary_proposed[i] && pc.secondaries.empty()) { + if (!add_secondary_proposed[i] && pc.hp_secondaries.empty()) { configuration_proposal_action &action = add_secondary_actions[i]; if (_add_secondary_enable_flow_control && - add_secondary_running_nodes[action.node] >= _add_secondary_max_count_for_one_node) { + add_secondary_running_nodes[action.hp_node] >= + _add_secondary_max_count_for_one_node) { // ignore continue; } @@ -2519,7 +2645,7 @@ bool server_state::check_all_partitions() send_proposal(action, pc, *app); send_proposal_count++; add_secondary_proposed[i] = true; - add_secondary_running_nodes[action.node]++; + add_secondary_running_nodes[action.hp_node]++; } } @@ -2530,7 +2656,8 @@ bool server_state::check_all_partitions() gpid pid = add_secondary_gpids[i]; partition_configuration &pc = *get_config(_all_apps, pid); if (_add_secondary_enable_flow_control && - add_secondary_running_nodes[action.node] >= _add_secondary_max_count_for_one_node) { + add_secondary_running_nodes[action.hp_node] >= + _add_secondary_max_count_for_one_node) { LOG_INFO("do not send {} proposal for gpid({}) for flow control reason, target = " "{}, node = {}", ::dsn::enum_to_string(action.type), @@ -2543,7 +2670,7 @@ bool server_state::check_all_partitions() send_proposal(action, pc, *app); send_proposal_count++; add_secondary_proposed[i] = true; - add_secondary_running_nodes[action.node]++; + add_secondary_running_nodes[action.hp_node]++; } } @@ -2629,22 +2756,23 @@ void server_state::check_consistency(const dsn::gpid &gpid) partition_configuration &config = app.partitions[gpid.get_partition_index()]; if (app.is_stateful) { - if (config.primary.is_invalid() == false) { - auto it = _nodes.find(config.primary); - CHECK(it != _nodes.end(), "invalid primary address, address = {}", config.primary); + if (config.hp_primary.is_invalid() == false) { + auto it = _nodes.find(config.hp_primary); + CHECK(it != _nodes.end(), "invalid primary address, address = {}", config.hp_primary); CHECK_EQ(it->second.served_as(gpid), partition_status::PS_PRIMARY); - CHECK(std::find(config.last_drops.begin(), config.last_drops.end(), config.primary) == - config.last_drops.end(), + CHECK(std::find(config.hp_last_drops.begin(), + config.hp_last_drops.end(), + config.hp_primary) == config.hp_last_drops.end(), "primary shouldn't appear in last_drops, address = {}", - config.primary); + config.hp_primary); } - for (auto &ep : config.secondaries) { + for (auto &ep : config.hp_secondaries) { auto it = _nodes.find(ep); CHECK(it != _nodes.end(), "invalid secondary address, address = {}", ep); CHECK_EQ(it->second.served_as(gpid), partition_status::PS_SECONDARY); - CHECK(std::find(config.last_drops.begin(), config.last_drops.end(), ep) == - config.last_drops.end(), + CHECK(std::find(config.hp_last_drops.begin(), config.hp_last_drops.end(), ep) == + config.hp_last_drops.end(), "secondary shouldn't appear in last_drops, address = {}", ep); } diff --git a/src/meta/server_state.h b/src/meta/server_state.h index e8c1c4f070..b6b20a790c 100644 --- a/src/meta/server_state.h +++ b/src/meta/server_state.h @@ -53,7 +53,7 @@ namespace dsn { class blob; class command_deregister; class message_ex; -class rpc_address; +class host_port; namespace replication { class configuration_balancer_request; @@ -182,7 +182,7 @@ class server_state error_code dump_from_remote_storage(const char *local_path, bool sync_immediately); error_code restore_from_local_storage(const char *local_path); - void on_change_node_state(rpc_address node, bool is_alive); + void on_change_node_state(host_port node, bool is_alive); void on_propose_balancer(const configuration_balancer_request &request, configuration_balancer_response &response); void on_start_recovery(const configuration_recovery_request &request, @@ -233,7 +233,7 @@ class server_state // else indicate error that remote storage responses error_code sync_apps_to_remote_storage(); - error_code sync_apps_from_replica_nodes(const std::vector &node_list, + error_code sync_apps_from_replica_nodes(const std::vector &node_list, bool skip_bad_nodes, bool skip_lost_partitions, std::string &hint_message); @@ -249,11 +249,11 @@ class server_state void check_consistency(const dsn::gpid &gpid); error_code construct_apps(const std::vector &query_app_responses, - const std::vector &replica_nodes, + const std::vector &replica_nodes, std::string &hint_message); error_code construct_partitions( const std::vector &query_replica_info_responses, - const std::vector &replica_nodes, + const std::vector &replica_nodes, bool skip_lost_partitions, std::string &hint_message); @@ -282,15 +282,14 @@ class server_state void downgrade_primary_to_inactive(std::shared_ptr &app, int pidx); void downgrade_secondary_to_inactive(std::shared_ptr &app, int pidx, - const rpc_address &node); - void downgrade_stateless_nodes(std::shared_ptr &app, - int pidx, - const rpc_address &address); + const host_port &node); + void + downgrade_stateless_nodes(std::shared_ptr &app, int pidx, const host_port &address); void on_partition_node_dead(std::shared_ptr &app, int pidx, - const dsn::rpc_address &address); - void send_proposal(rpc_address target, const configuration_update_request &proposal); + const dsn::host_port &address); + void send_proposal(host_port target, const configuration_update_request &proposal); void send_proposal(const configuration_proposal_action &action, const partition_configuration &pc, const app_state &app); diff --git a/src/meta/server_state_restore.cpp b/src/meta/server_state_restore.cpp index b2dada4a25..7dac1b424f 100644 --- a/src/meta/server_state_restore.cpp +++ b/src/meta/server_state_restore.cpp @@ -40,7 +40,7 @@ #include "meta/table_metrics.h" #include "meta_admin_types.h" #include "meta_service.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/task.h" @@ -251,7 +251,7 @@ void server_state::on_query_restore_status(configuration_query_restore_rpc rpc) for (int32_t i = 0; i < app->partition_count; i++) { const auto &r_state = app->helpers->restore_states[i]; const auto &p = app->partitions[i]; - if (!p.primary.is_invalid() || !p.secondaries.empty()) { + if (!p.hp_primary.is_invalid() || !p.hp_secondaries.empty()) { // already have primary, restore succeed continue; } diff --git a/src/meta/test/backup_test.cpp b/src/meta/test/backup_test.cpp index b27ba49958..ef7b4dd12a 100644 --- a/src/meta/test/backup_test.cpp +++ b/src/meta/test/backup_test.cpp @@ -45,6 +45,7 @@ #include "runtime/api_layer1.h" #include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_holder.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" @@ -187,7 +188,7 @@ class progress_liar : public meta_service public: // req is held by callback, we don't need to handle the life-time of it virtual void send_request(dsn::message_ex *req, - const rpc_address &target, + const host_port &target, const rpc_response_task_ptr &callback) { // need to handle life-time manually @@ -498,14 +499,16 @@ TEST_F(policy_context_test, test_app_dropped_during_backup) int64_t cur_start_time_ms = static_cast(dsn_now_ms()); { zauto_lock l(_mp._lock); - std::vector node_list; + std::vector> node_list; generate_node_list(node_list, 3, 3); app_state *app = state->_all_apps[3].get(); app->status = dsn::app_status::AS_AVAILABLE; for (partition_configuration &pc : app->partitions) { - pc.primary = node_list[0]; - pc.secondaries = {node_list[1], node_list[2]}; + pc.primary = node_list[0].second; + pc.secondaries = {node_list[1].second, node_list[2].second}; + pc.__set_hp_primary(node_list[0].first); + pc.__set_hp_secondaries({node_list[1].first, node_list[2].first}); } _mp._backup_history.clear(); diff --git a/src/meta/test/balancer_simulator/balancer_simulator.cpp b/src/meta/test/balancer_simulator/balancer_simulator.cpp index 6c3e25feba..1dc54d0214 100644 --- a/src/meta/test/balancer_simulator/balancer_simulator.cpp +++ b/src/meta/test/balancer_simulator/balancer_simulator.cpp @@ -43,6 +43,7 @@ #include "meta_admin_types.h" #include "runtime/app_model.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/fmt_logging.h" using namespace dsn::replication; @@ -50,34 +51,34 @@ using namespace dsn::replication; class simple_priority_queue { public: - simple_priority_queue(const std::vector &nl, + simple_priority_queue(const std::vector &nl, server_load_balancer::node_comparator &&compare) : container(nl), cmp(std::move(compare)) { std::make_heap(container.begin(), container.end(), cmp); } - void push(const dsn::rpc_address &addr) + void push(const dsn::host_port &addr) { container.push_back(addr); std::push_heap(container.begin(), container.end(), cmp); } - dsn::rpc_address pop() + dsn::host_port pop() { std::pop_heap(container.begin(), container.end(), cmp); - dsn::rpc_address result = container.back(); + dsn::host_port result = container.back(); container.pop_back(); return result; } - dsn::rpc_address top() const { return container.front(); } + dsn::host_port top() const { return container.front(); } bool empty() const { return container.empty(); } private: - std::vector container; + std::vector container; server_load_balancer::node_comparator cmp; }; void generate_balanced_apps(/*out*/ app_mapper &apps, node_mapper &nodes, - const std::vector &node_list) + const std::vector &node_list) { nodes.clear(); for (const auto &node : node_list) @@ -98,22 +99,22 @@ void generate_balanced_apps(/*out*/ app_mapper &apps, simple_priority_queue pq1(node_list, server_load_balancer::primary_comparator(nodes)); // generate balanced primary for (dsn::partition_configuration &pc : the_app->partitions) { - dsn::rpc_address n = pq1.pop(); + dsn::host_port n = pq1.pop(); nodes[n].put_partition(pc.pid, true); - pc.primary = n; + pc.hp_primary = n; pq1.push(n); } // generate balanced secondary simple_priority_queue pq2(node_list, server_load_balancer::partition_comparator(nodes)); - std::vector temp; + std::vector temp; for (dsn::partition_configuration &pc : the_app->partitions) { temp.clear(); - while (pc.secondaries.size() + 1 < pc.max_replica_count) { - dsn::rpc_address n = pq2.pop(); + while (pc.hp_secondaries.size() + 1 < pc.max_replica_count) { + dsn::host_port n = pq2.pop(); if (!is_member(pc, n)) { - pc.secondaries.push_back(n); + pc.hp_secondaries.push_back(n); nodes[n].put_partition(pc.pid, false); } temp.push_back(n); @@ -153,9 +154,10 @@ void random_move_primary(app_mapper &apps, node_mapper &nodes, int primary_move_ int n = random32(1, space_size) / 100; if (n < primary_move_ratio) { int indice = random32(0, 1); - nodes[pc.primary].remove_partition(pc.pid, true); + nodes[pc.hp_primary].remove_partition(pc.pid, true); std::swap(pc.primary, pc.secondaries[indice]); - nodes[pc.primary].put_partition(pc.pid, true); + std::swap(pc.hp_primary, pc.hp_secondaries[indice]); + nodes[pc.hp_primary].put_partition(pc.pid, true); } } } @@ -164,9 +166,13 @@ void greedy_balancer_perfect_move_primary() { app_mapper apps; node_mapper nodes; - std::vector node_list; + std::vector> node_pairs; + std::vector node_list; + generate_node_list(node_pairs, 19, 100); + for (const auto &p : node_pairs) { + node_list.emplace_back(p.first); + } - generate_node_list(node_list, 20, 100); generate_balanced_apps(apps, nodes, node_list); random_move_primary(apps, nodes, 70); diff --git a/src/meta/test/balancer_validator.cpp b/src/meta/test/balancer_validator.cpp index 8e1d456543..d42d8ce672 100644 --- a/src/meta/test/balancer_validator.cpp +++ b/src/meta/test/balancer_validator.cpp @@ -48,6 +48,7 @@ #include "meta_service_test_app.h" #include "metadata_types.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/fmt_logging.h" namespace dsn { @@ -67,22 +68,23 @@ static void check_cure(app_mapper &apps, node_mapper &nodes, ::dsn::partition_co break; switch (act.type) { case config_type::CT_ASSIGN_PRIMARY: - CHECK(pc.primary.is_invalid(), ""); - CHECK(pc.secondaries.empty(), ""); - CHECK_EQ(act.node, act.target); - CHECK(nodes.find(act.node) != nodes.end(), ""); + CHECK(pc.hp_primary.is_invalid(), ""); + CHECK(pc.hp_secondaries.empty(), ""); + CHECK_EQ(act.hp_node, act.hp_target); + CHECK(nodes.find(act.hp_node) != nodes.end(), ""); - CHECK_EQ(nodes[act.node].served_as(pc.pid), partition_status::PS_INACTIVE); - nodes[act.node].put_partition(pc.pid, true); + CHECK_EQ(nodes[act.hp_node].served_as(pc.pid), partition_status::PS_INACTIVE); + nodes[act.hp_node].put_partition(pc.pid, true); pc.primary = act.node; + pc.hp_primary = act.hp_node; break; case config_type::CT_ADD_SECONDARY: - CHECK(!is_member(pc, act.node), ""); - CHECK_EQ(pc.primary, act.target); - CHECK(nodes.find(act.node) != nodes.end(), ""); - pc.secondaries.push_back(act.node); - ns = &nodes[act.node]; + CHECK(!is_member(pc, act.hp_node), ""); + CHECK_EQ(pc.hp_primary, act.hp_target); + CHECK(nodes.find(act.hp_node) != nodes.end(), ""); + pc.hp_secondaries.push_back(act.hp_node); + ns = &nodes[act.hp_node]; CHECK_EQ(ns->served_as(pc.pid), partition_status::PS_INACTIVE); ns->put_partition(pc.pid, false); break; @@ -94,20 +96,23 @@ static void check_cure(app_mapper &apps, node_mapper &nodes, ::dsn::partition_co } // test upgrade to primary - CHECK_EQ(nodes[pc.primary].served_as(pc.pid), partition_status::PS_PRIMARY); - nodes[pc.primary].remove_partition(pc.pid, true); + CHECK_EQ(nodes[pc.hp_primary].served_as(pc.pid), partition_status::PS_PRIMARY); + nodes[pc.hp_primary].remove_partition(pc.pid, true); pc.primary.set_invalid(); + pc.hp_primary.reset(); ps = guardian.cure({&apps, &nodes}, pc.pid, act); CHECK_EQ(act.type, config_type::CT_UPGRADE_TO_PRIMARY); - CHECK(pc.primary.is_invalid(), ""); - CHECK_EQ(act.node, act.target); - CHECK(is_secondary(pc, act.node), ""); - CHECK(nodes.find(act.node) != nodes.end(), ""); + CHECK(pc.hp_primary.is_invalid(), ""); + CHECK_EQ(act.hp_node, act.hp_target); + CHECK(is_secondary(pc, act.hp_node), ""); + CHECK(nodes.find(act.hp_node) != nodes.end(), ""); - ns = &nodes[act.node]; + ns = &nodes[act.hp_node]; pc.primary = act.node; + pc.__set_hp_primary(act.hp_node); std::remove(pc.secondaries.begin(), pc.secondaries.end(), pc.primary); + std::remove(pc.hp_secondaries.begin(), pc.hp_secondaries.end(), pc.hp_primary); CHECK_EQ(ns->served_as(pc.pid), partition_status::PS_SECONDARY); ns->put_partition(pc.pid, true); @@ -115,8 +120,12 @@ static void check_cure(app_mapper &apps, node_mapper &nodes, ::dsn::partition_co void meta_service_test_app::balancer_validator() { - std::vector node_list; - generate_node_list(node_list, 20, 100); + std::vector> node_pairs; + std::vector node_list; + generate_node_list(node_pairs, 20, 100); + for (const auto &p : node_pairs) { + node_list.emplace_back(p.first); + } app_mapper apps; node_mapper nodes; @@ -156,27 +165,29 @@ void meta_service_test_app::balancer_validator() std::shared_ptr &the_app = apps[1]; for (::dsn::partition_configuration &pc : the_app->partitions) { - CHECK(!pc.primary.is_invalid(), ""); + CHECK(!pc.hp_primary.is_invalid(), ""); CHECK_GE(pc.secondaries.size(), pc.max_replica_count - 1); } // now test the cure ::dsn::partition_configuration &pc = the_app->partitions[0]; - nodes[pc.primary].remove_partition(pc.pid, false); - for (const dsn::rpc_address &addr : pc.secondaries) - nodes[addr].remove_partition(pc.pid, false); + nodes[pc.hp_primary].remove_partition(pc.pid, false); + for (const dsn::host_port &hp : pc.hp_secondaries) + nodes[hp].remove_partition(pc.pid, false); pc.primary.set_invalid(); pc.secondaries.clear(); + pc.hp_primary.reset(); + pc.hp_secondaries.clear(); // cure test check_cure(apps, nodes, pc); } -dsn::rpc_address get_rpc_address(const std::string &ip_port) +dsn::host_port get_host_port(const std::string &ip_port) { int splitter = ip_port.find_first_of(':'); - return rpc_address(ip_port.substr(0, splitter).c_str(), - boost::lexical_cast(ip_port.substr(splitter + 1))); + return host_port(ip_port.substr(0, splitter).c_str(), + boost::lexical_cast(ip_port.substr(splitter + 1))); } static void load_apps_and_nodes(const char *file, app_mapper &apps, node_mapper &nodes) @@ -189,10 +200,10 @@ static void load_apps_and_nodes(const char *file, app_mapper &apps, node_mapper infile >> total_nodes; std::string ip_port; - std::vector node_list; + std::vector node_list; for (int i = 0; i < total_nodes; ++i) { infile >> ip_port; - node_list.push_back(get_rpc_address(ip_port)); + node_list.push_back(get_host_port(ip_port)); } int total_apps; @@ -212,10 +223,10 @@ static void load_apps_and_nodes(const char *file, app_mapper &apps, node_mapper int n; infile >> n; infile >> ip_port; - app->partitions[j].primary = get_rpc_address(ip_port); + app->partitions[j].hp_primary = get_host_port(ip_port); for (int k = 1; k < n; ++k) { infile >> ip_port; - app->partitions[j].secondaries.push_back(get_rpc_address(ip_port)); + app->partitions[j].hp_secondaries.push_back(get_host_port(ip_port)); } } } diff --git a/src/meta/test/cluster_balance_policy_test.cpp b/src/meta/test/cluster_balance_policy_test.cpp index 0ade35a6c9..3ab53725e7 100644 --- a/src/meta/test/cluster_balance_policy_test.cpp +++ b/src/meta/test/cluster_balance_policy_test.cpp @@ -33,9 +33,10 @@ #include "meta/cluster_balance_policy.h" #include "meta/load_balance_policy.h" #include "meta/meta_data.h" +#include "meta/meta_service.h" #include "meta_admin_types.h" #include "metadata_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/defer.h" #include "utils/fail_point.h" @@ -65,33 +66,27 @@ TEST(cluster_balance_policy, node_migration_info) { { cluster_balance_policy::node_migration_info info1; - info1.address = rpc_address(1, 10086); + info1.hp = host_port("localhost", 10000); cluster_balance_policy::node_migration_info info2; - info2.address = rpc_address(2, 10086); + info2.hp = host_port("localhost", 10086); ASSERT_LT(info1, info2); } { cluster_balance_policy::node_migration_info info1; - info1.address = rpc_address(1, 10000); + info1.hp = host_port("localhost", 10086); cluster_balance_policy::node_migration_info info2; - info2.address = rpc_address(1, 10086); - ASSERT_LT(info1, info2); - } - - { - cluster_balance_policy::node_migration_info info1; - info1.address = rpc_address(1, 10086); - cluster_balance_policy::node_migration_info info2; - info2.address = rpc_address(1, 10086); + info2.hp = host_port("localhost", 10086); ASSERT_EQ(info1, info2); } } TEST(cluster_balance_policy, get_skew) { - std::map count_map = { - {rpc_address(1, 10086), 1}, {rpc_address(2, 10086), 3}, {rpc_address(3, 10086), 5}, + std::map count_map = { + {host_port("localhost", 10085), 1}, + {host_port("localhost", 10086), 3}, + {host_port("localhost", 10087), 5}, }; ASSERT_EQ(get_skew(count_map), count_map.rbegin()->second - count_map.begin()->second); @@ -112,20 +107,21 @@ TEST(cluster_balance_policy, get_partition_count) TEST(cluster_balance_policy, get_app_migration_info) { - cluster_balance_policy policy(nullptr); + meta_service svc; + cluster_balance_policy policy(&svc); int appid = 1; std::string appname = "test"; - auto address = rpc_address(1, 10086); + auto address = host_port("localhost", 10086); app_info info; info.app_id = appid; info.app_name = appname; info.partition_count = 1; auto app = std::make_shared(info); - app->partitions[0].primary = address; + app->partitions[0].hp_primary = address; node_state ns; - ns.set_addr(address); + ns.set_hp(address); ns.put_partition(gpid(appid, 0), true); node_mapper nodes; nodes[address] = ns; @@ -145,7 +141,7 @@ TEST(cluster_balance_policy, get_app_migration_info) ASSERT_TRUE(res); ASSERT_EQ(migration_info.app_id, appid); ASSERT_EQ(migration_info.app_name, appname); - std::map pstatus_map; + std::map pstatus_map; pstatus_map[address] = partition_status::type::PS_PRIMARY; ASSERT_EQ(migration_info.partitions[0], pstatus_map); ASSERT_EQ(migration_info.replicas_count[address], 1); @@ -154,17 +150,18 @@ TEST(cluster_balance_policy, get_app_migration_info) TEST(cluster_balance_policy, get_node_migration_info) { - cluster_balance_policy policy(nullptr); + meta_service svc; + cluster_balance_policy policy(&svc); int appid = 1; std::string appname = "test"; - auto address = rpc_address(1, 10086); + auto address = host_port("localhost", 10086); app_info info; info.app_id = appid; info.app_name = appname; info.partition_count = 1; auto app = std::make_shared(info); - app->partitions[0].primary = address; + app->partitions[0].hp_primary = address; serving_replica sr; sr.node = address; std::string disk_tag = "disk1"; @@ -180,14 +177,14 @@ TEST(cluster_balance_policy, get_node_migration_info) all_apps[appid] = app; node_state ns; - ns.set_addr(address); + ns.set_hp(address); gpid pid = gpid(appid, 0); ns.put_partition(pid, true); cluster_balance_policy::node_migration_info migration_info; policy.get_node_migration_info(ns, all_apps, migration_info); - ASSERT_EQ(migration_info.address, address); + ASSERT_EQ(migration_info.hp, address); ASSERT_NE(migration_info.partitions.find(disk_tag), migration_info.partitions.end()); ASSERT_EQ(migration_info.partitions.at(disk_tag).size(), 1); ASSERT_EQ(*migration_info.partitions.at(disk_tag).begin(), pid); @@ -195,33 +192,34 @@ TEST(cluster_balance_policy, get_node_migration_info) TEST(cluster_balance_policy, get_min_max_set) { - std::map node_count_map; - node_count_map.emplace(rpc_address(1, 10086), 1); - node_count_map.emplace(rpc_address(2, 10086), 3); - node_count_map.emplace(rpc_address(3, 10086), 5); - node_count_map.emplace(rpc_address(4, 10086), 5); + std::map node_count_map; + node_count_map.emplace(host_port("localhost", 10081), 1); + node_count_map.emplace(host_port("localhost", 10082), 3); + node_count_map.emplace(host_port("localhost", 10083), 5); + node_count_map.emplace(host_port("localhost", 10084), 5); - std::set min_set, max_set; + std::set min_set, max_set; get_min_max_set(node_count_map, min_set, max_set); ASSERT_EQ(min_set.size(), 1); - ASSERT_EQ(*min_set.begin(), rpc_address(1, 10086)); + ASSERT_EQ(*min_set.begin(), host_port("localhost", 10081)); ASSERT_EQ(max_set.size(), 2); - ASSERT_EQ(*max_set.begin(), rpc_address(3, 10086)); - ASSERT_EQ(*max_set.rbegin(), rpc_address(4, 10086)); + ASSERT_EQ(*max_set.begin(), host_port("localhost", 10083)); + ASSERT_EQ(*max_set.rbegin(), host_port("localhost", 10084)); } TEST(cluster_balance_policy, get_disk_partitions_map) { - cluster_balance_policy policy(nullptr); + meta_service svc; + cluster_balance_policy policy(&svc); cluster_balance_policy::cluster_migration_info cluster_info; - rpc_address addr(1, 10086); + host_port addr("localhost", 10086); int32_t app_id = 1; auto disk_partitions = policy.get_disk_partitions_map(cluster_info, addr, app_id); ASSERT_TRUE(disk_partitions.empty()); - std::map partition; + std::map partition; partition[addr] = partition_status::PS_SECONDARY; cluster_balance_policy::app_migration_info app_info; app_info.partitions.push_back(partition); @@ -249,11 +247,11 @@ TEST(cluster_balance_policy, get_max_load_disk_set) cluster_info.type = balance_type::COPY_SECONDARY; int32_t app_id = 1; - rpc_address addr(1, 10086); - rpc_address addr2(2, 10086); - std::map partition; + host_port addr("localhost", 10086); + host_port addr2("localhost", 10087); + std::map partition; partition[addr] = partition_status::PS_SECONDARY; - std::map partition2; + std::map partition2; partition2[addr] = partition_status::PS_SECONDARY; partition2[addr2] = partition_status::PS_SECONDARY; cluster_balance_policy::app_migration_info app_info; @@ -282,8 +280,9 @@ TEST(cluster_balance_policy, get_max_load_disk_set) node_info2.partitions[disk_tag3] = partitions3; cluster_info.nodes_info[addr2] = node_info2; - cluster_balance_policy policy(nullptr); - std::set max_nodes; + meta_service svc; + cluster_balance_policy policy(&svc); + std::set max_nodes; max_nodes.insert(addr); max_nodes.insert(addr2); @@ -299,11 +298,11 @@ TEST(cluster_balance_policy, apply_move) int32_t app_id = 1; int32_t partition_index = 1; minfo.pid = gpid(app_id, partition_index); - rpc_address source_node(1, 10086); + host_port source_node("localhost", 10086); minfo.source_node = source_node; std::string disk_tag = "disk1"; minfo.source_disk_tag = disk_tag; - rpc_address target_node(2, 10086); + host_port target_node("localhost", 10087); minfo.target_node = target_node; minfo.type = balance_type::MOVE_PRIMARY; @@ -313,7 +312,8 @@ TEST(cluster_balance_policy, apply_move) view.apps = &apps; view.nodes = &nodes; - cluster_balance_policy policy(nullptr); + meta_service svc; + cluster_balance_policy policy(&svc); policy._global_view = &view; cluster_balance_policy::cluster_migration_info cluster_info; cluster_info.type = balance_type::COPY_SECONDARY; @@ -354,7 +354,7 @@ TEST(cluster_balance_policy, apply_move) ASSERT_FALSE(res); // all of the partition status are not PS_SECONDARY - std::map partition_status; + std::map partition_status; partition_status[source_node] = partition_status::type::PS_PRIMARY; cluster_info.apps_info[app_id].partitions.push_back(partition_status); cluster_info.apps_info[app_id].partitions.push_back(partition_status); @@ -389,15 +389,16 @@ TEST(cluster_balance_policy, apply_move) TEST(cluster_balance_policy, pick_up_partition) { cluster_balance_policy::cluster_migration_info cluster_info; - rpc_address addr(1, 10086); + host_port addr("localhost", 10086); int32_t app_id = 1; - std::map partition; + std::map partition; partition[addr] = partition_status::PS_SECONDARY; cluster_balance_policy::app_migration_info app_info; app_info.partitions.push_back(partition); cluster_info.apps_info[app_id] = app_info; - cluster_balance_policy policy(nullptr); + meta_service svc; + cluster_balance_policy policy(&svc); { // all of the partitions in max_load_partitions are not found in cluster_info partition_set max_load_partitions; @@ -441,7 +442,7 @@ TEST(cluster_balance_policy, pick_up_partition) gpid pid(app_id, 0); max_load_partitions.insert(pid); partition_set selected_pid; - rpc_address not_exist_addr(3, 12345); + host_port not_exist_addr("localhost", 12345); gpid picked_pid; auto found = policy.pick_up_partition( @@ -470,7 +471,8 @@ TEST(cluster_balance_policy, execute_balance) app->helpers->split_states.splitting_count = 0; app_mapper apps; apps[app_id] = app; - cluster_balance_policy policy(nullptr); + meta_service svc; + cluster_balance_policy policy(&svc); app->status = app_status::AS_DROPPED; auto res = policy.execute_balance(apps, false, false, true, balance_func); @@ -502,9 +504,9 @@ TEST(cluster_balance_policy, execute_balance) TEST(cluster_balance_policy, calc_potential_moving) { - auto addr1 = rpc_address(1, 1); - auto addr2 = rpc_address(1, 2); - auto addr3 = rpc_address(1, 3); + auto addr1 = host_port("localhost", 1); + auto addr2 = host_port("localhost", 2); + auto addr3 = host_port("localhost", 3); int32_t app_id = 1; dsn::app_info info; @@ -512,9 +514,9 @@ TEST(cluster_balance_policy, calc_potential_moving) info.partition_count = 4; std::shared_ptr app = app_state::create(info); partition_configuration pc; - pc.primary = addr1; - pc.secondaries.push_back(addr2); - pc.secondaries.push_back(addr3); + pc.hp_primary = addr1; + pc.hp_secondaries.push_back(addr2); + pc.hp_secondaries.push_back(addr3); app->partitions[0] = pc; app->partitions[1] = pc; @@ -536,7 +538,8 @@ TEST(cluster_balance_policy, calc_potential_moving) struct meta_view view; view.nodes = &nodes; view.apps = &apps; - cluster_balance_policy policy(nullptr); + meta_service svc; + cluster_balance_policy policy(&svc); policy._global_view = &view; auto gpids = policy.calc_potential_moving(app, addr1, addr2); diff --git a/src/meta/test/copy_replica_operation_test.cpp b/src/meta/test/copy_replica_operation_test.cpp index c2f681fd32..96fc28e4da 100644 --- a/src/meta/test/copy_replica_operation_test.cpp +++ b/src/meta/test/copy_replica_operation_test.cpp @@ -30,7 +30,8 @@ #include "meta/app_balance_policy.h" #include "meta/load_balance_policy.h" #include "meta/meta_data.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/fail_point.h" namespace dsn { @@ -46,9 +47,9 @@ TEST(copy_primary_operation, misc) app_mapper apps; apps[app_id] = app; - auto addr1 = rpc_address(1, 1); - auto addr2 = rpc_address(1, 2); - auto addr3 = rpc_address(1, 3); + auto addr1 = host_port("localhost", 1); + auto addr2 = host_port("localhost", 2); + auto addr3 = host_port("localhost", 3); node_mapper nodes; node_state ns1; @@ -63,21 +64,22 @@ TEST(copy_primary_operation, misc) ns3.put_partition(gpid(app_id, 2), false); nodes[addr3] = ns3; - std::vector address_vec{addr1, addr2, addr3}; - std::unordered_map address_id; - address_id[addr1] = 0; - address_id[addr2] = 1; - address_id[addr3] = 2; - copy_primary_operation op(app, apps, nodes, address_vec, address_id, false, 0); + std::vector host_port_vec{addr1, addr2, addr3}; + std::unordered_map host_port_id; + host_port_id[addr1] = 0; + host_port_id[addr2] = 1; + host_port_id[addr3] = 2; + auto resolver = std::make_shared(); + copy_primary_operation op(app, apps, nodes, host_port_vec, host_port_id, false, 0, resolver); /** - * Test init_ordered_address_ids + * Test init_ordered_host_port_ids */ - op.init_ordered_address_ids(); - ASSERT_EQ(op._ordered_address_ids.size(), 3); - ASSERT_EQ(*op._ordered_address_ids.begin(), 2); - ASSERT_EQ(*(++op._ordered_address_ids.begin()), 0); - ASSERT_EQ(*op._ordered_address_ids.rbegin(), 1); + op.init_ordered_host_port_ids(); + ASSERT_EQ(op._ordered_host_port_ids.size(), 3); + ASSERT_EQ(*op._ordered_host_port_ids.begin(), 2); + ASSERT_EQ(*(++op._ordered_host_port_ids.begin()), 0); + ASSERT_EQ(*op._ordered_host_port_ids.rbegin(), 1); ASSERT_EQ(op._partition_counts[0], 1); ASSERT_EQ(op._partition_counts[1], 2); ASSERT_EQ(op._partition_counts[2], 0); @@ -127,22 +129,22 @@ TEST(copy_primary_operation, misc) op._replicas_low = 0; nodes[addr2].remove_partition(gpid(app_id, 1), false); - op.init_ordered_address_ids(); + op.init_ordered_host_port_ids(); ASSERT_FALSE(op.can_continue()); nodes[addr2].put_partition(gpid(app_id, 1), true); /** - * Test update_ordered_address_ids + * Test update_ordered_host_port_ids */ nodes[addr1].put_partition(gpid(app_id, 3), true); nodes[addr2].put_partition(gpid(app_id, 4), true); nodes[addr2].put_partition(gpid(app_id, 5), true); - op.init_ordered_address_ids(); - op.update_ordered_address_ids(); - ASSERT_EQ(op._ordered_address_ids.size(), 3); - ASSERT_EQ(*op._ordered_address_ids.begin(), 2); - ASSERT_EQ(*(++op._ordered_address_ids.begin()), 0); - ASSERT_EQ(*op._ordered_address_ids.rbegin(), 1); + op.init_ordered_host_port_ids(); + op.update_ordered_host_port_ids(); + ASSERT_EQ(op._ordered_host_port_ids.size(), 3); + ASSERT_EQ(*op._ordered_host_port_ids.begin(), 2); + ASSERT_EQ(*(++op._ordered_host_port_ids.begin()), 0); + ASSERT_EQ(*op._ordered_host_port_ids.rbegin(), 1); ASSERT_EQ(op._partition_counts[0], 2); ASSERT_EQ(op._partition_counts[1], 3); ASSERT_EQ(op._partition_counts[2], 1); @@ -166,9 +168,11 @@ TEST(copy_primary_operation, can_select) { app_mapper apps; node_mapper nodes; - std::vector address_vec; - std::unordered_map address_id; - copy_primary_operation op(nullptr, apps, nodes, address_vec, address_id, false, false); + std::vector host_port_vec; + std::unordered_map host_port_id; + auto resolver = std::make_shared(); + copy_primary_operation op( + nullptr, apps, nodes, host_port_vec, host_port_id, false, false, resolver); gpid cannot_select_gpid(1, 1); gpid can_select_gpid(1, 2); @@ -183,9 +187,11 @@ TEST(copy_primary_operation, only_copy_primary) { app_mapper apps; node_mapper nodes; - std::vector address_vec; - std::unordered_map address_id; - copy_primary_operation op(nullptr, apps, nodes, address_vec, address_id, false, false); + std::vector host_port_vec; + std::unordered_map host_port_id; + auto resolver = std::make_shared(); + copy_primary_operation op( + nullptr, apps, nodes, host_port_vec, host_port_id, false, false, resolver); ASSERT_TRUE(op.only_copy_primary()); } @@ -200,9 +206,9 @@ TEST(copy_secondary_operation, misc) app_mapper apps; apps[app_id] = app; - auto addr1 = rpc_address(1, 1); - auto addr2 = rpc_address(1, 2); - auto addr3 = rpc_address(1, 3); + auto addr1 = host_port("localhost", 1); + auto addr2 = host_port("localhost", 2); + auto addr3 = host_port("localhost", 3); node_mapper nodes; node_state ns1; @@ -216,13 +222,14 @@ TEST(copy_secondary_operation, misc) node_state ns3; nodes[addr3] = ns3; - std::vector address_vec{addr1, addr2, addr3}; - std::unordered_map address_id; - address_id[addr1] = 0; - address_id[addr2] = 1; - address_id[addr3] = 2; - copy_secondary_operation op(app, apps, nodes, address_vec, address_id, 0); - op.init_ordered_address_ids(); + std::vector host_port_vec{addr1, addr2, addr3}; + std::unordered_map host_port_id; + host_port_id[addr1] = 0; + host_port_id[addr2] = 1; + host_port_id[addr3] = 2; + auto resolver = std::make_shared(); + copy_secondary_operation op(app, apps, nodes, host_port_vec, host_port_id, resolver, 0); + op.init_ordered_host_port_ids(); /** * Test copy_secondary_operation::get_partition_count @@ -243,7 +250,7 @@ TEST(copy_secondary_operation, misc) op._replicas_low = 0; nodes[addr3].put_partition(gpid(app_id, 2), false); - op.init_ordered_address_ids(); + op.init_ordered_host_port_ids(); res = op.can_continue(); ASSERT_FALSE(res); nodes[addr3].remove_partition(gpid(app_id, 2), false); @@ -252,7 +259,7 @@ TEST(copy_secondary_operation, misc) * Test copy_secondary_operation::can_select */ nodes[addr1].put_partition(gpid(app_id, 3), true); - op.init_ordered_address_ids(); + op.init_ordered_host_port_ids(); migration_list list; res = op.can_select(gpid(app_id, 3), &list); ASSERT_FALSE(res); @@ -264,12 +271,12 @@ TEST(copy_secondary_operation, misc) list.clear(); nodes[addr3].put_partition(secondary_gpid, true); - op.init_ordered_address_ids(); + op.init_ordered_host_port_ids(); res = op.can_select(secondary_gpid, &list); ASSERT_FALSE(res); nodes[addr3].remove_partition(secondary_gpid, false); - op.init_ordered_address_ids(); + op.init_ordered_host_port_ids(); res = op.can_select(secondary_gpid, &list); ASSERT_TRUE(res); diff --git a/src/meta/test/duplication_info_test.cpp b/src/meta/test/duplication_info_test.cpp index 9383958e47..9e11be6370 100644 --- a/src/meta/test/duplication_info_test.cpp +++ b/src/meta/test/duplication_info_test.cpp @@ -51,7 +51,7 @@ class duplication_info_test : public testing::Test 2, 0, "dsn://slave-cluster/temp", - std::vector(), + std::vector(), "/meta_test/101/duplication/1"); duplication_confirm_entry entry; ASSERT_FALSE(dup.alter_progress(0, entry)); @@ -104,7 +104,7 @@ class duplication_info_test : public testing::Test 4, 0, "dsn://slave-cluster/temp", - std::vector(), + std::vector(), "/meta_test/101/duplication/1"); ASSERT_FALSE(dup.is_altering()); ASSERT_EQ(dup._status, duplication_status::DS_INIT); @@ -134,7 +134,7 @@ class duplication_info_test : public testing::Test 4, 0, "dsn://slave-cluster/temp", - std::vector(), + std::vector(), "/meta_test/101/duplication/1"); dup.start(); @@ -153,7 +153,7 @@ class duplication_info_test : public testing::Test 4, 0, "slave-cluster", - std::vector(), + std::vector(), "/meta_test/101/duplication/1"); dup.start(); dup.persist_status(); @@ -187,7 +187,7 @@ TEST_F(duplication_info_test, alter_status_when_busy) 4, 0, "dsn://slave-cluster/temp", - std::vector(), + std::vector(), "/meta_test/101/duplication/1"); dup.start(); @@ -259,7 +259,7 @@ TEST_F(duplication_info_test, alter_status) 4, 0, "dsn://slave-cluster/temp", - std::vector(), + std::vector(), "/meta_test/101/duplication/1"); for (const auto from : tt.from_list) { force_update_status(dup, from); @@ -289,7 +289,7 @@ TEST_F(duplication_info_test, is_valid) 4, 0, "dsn://slave-cluster/temp", - std::vector(), + std::vector(), "/meta_test/101/duplication/1"); ASSERT_TRUE(dup.is_invalid_status()); diff --git a/src/meta/test/ford_fulkerson_test.cpp b/src/meta/test/ford_fulkerson_test.cpp index 00b49d9bee..a1c421ea53 100644 --- a/src/meta/test/ford_fulkerson_test.cpp +++ b/src/meta/test/ford_fulkerson_test.cpp @@ -18,6 +18,7 @@ // IWYU pragma: no_include #include #include +#include #include #include @@ -26,7 +27,7 @@ #include "gtest/gtest.h" #include "meta/load_balance_policy.h" #include "meta/meta_data.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" namespace dsn { namespace replication { @@ -41,11 +42,11 @@ TEST(ford_fulkerson, build_failure) node_mapper nodes; node_state ns; ns.put_partition(gpid(app_id, 0), true); - nodes[rpc_address(1, 1)] = ns; - nodes[rpc_address(2, 2)] = ns; - nodes[rpc_address(3, 3)] = ns; + nodes[host_port("localhost", 1)] = ns; + nodes[host_port("localhost", 2)] = ns; + nodes[host_port("localhost", 3)] = ns; - std::unordered_map address_id; + std::unordered_map address_id; auto ff = ford_fulkerson::builder(app, nodes, address_id).build(); ASSERT_EQ(ff, nullptr); } @@ -58,10 +59,10 @@ TEST(ford_fulkerson, add_edge) info.partition_count = 4; std::shared_ptr app = app_state::create(info); - std::unordered_map address_id; - auto addr1 = rpc_address(1, 1); - auto addr2 = rpc_address(1, 2); - auto addr3 = rpc_address(1, 3); + std::unordered_map address_id; + auto addr1 = host_port("localhost", 1); + auto addr2 = host_port("localhost", 2); + auto addr3 = host_port("localhost", 3); address_id[addr1] = 1; address_id[addr2] = 2; address_id[addr3] = 3; @@ -85,9 +86,9 @@ TEST(ford_fulkerson, add_edge) TEST(ford_fulkerson, update_decree) { - auto addr1 = rpc_address(1, 1); - auto addr2 = rpc_address(2, 2); - auto addr3 = rpc_address(3, 3); + auto addr1 = host_port("localhost", 1); + auto addr2 = host_port("localhost", 2); + auto addr3 = host_port("localhost", 3); int32_t app_id = 1; dsn::app_info info; @@ -95,8 +96,8 @@ TEST(ford_fulkerson, update_decree) info.partition_count = 1; std::shared_ptr app = app_state::create(info); partition_configuration pc; - pc.secondaries.push_back(addr2); - pc.secondaries.push_back(addr3); + pc.hp_secondaries.push_back(addr2); + pc.hp_secondaries.push_back(addr3); app->partitions.push_back(pc); app->partitions.push_back(pc); @@ -108,7 +109,7 @@ TEST(ford_fulkerson, update_decree) nodes[addr2] = ns; nodes[addr3] = ns; - std::unordered_map address_id; + std::unordered_map address_id; address_id[addr1] = 1; address_id[addr2] = 2; address_id[addr3] = 3; @@ -122,9 +123,9 @@ TEST(ford_fulkerson, update_decree) TEST(ford_fulkerson, find_shortest_path) { - auto addr1 = rpc_address(1, 1); - auto addr2 = rpc_address(2, 2); - auto addr3 = rpc_address(3, 3); + auto addr1 = host_port("localhost", 1); + auto addr2 = host_port("localhost", 2); + auto addr3 = host_port("localhost", 3); int32_t app_id = 1; dsn::app_info info; @@ -133,9 +134,9 @@ TEST(ford_fulkerson, find_shortest_path) std::shared_ptr app = app_state::create(info); partition_configuration pc; - pc.primary = addr1; - pc.secondaries.push_back(addr2); - pc.secondaries.push_back(addr3); + pc.hp_primary = addr1; + pc.hp_secondaries.push_back(addr2); + pc.hp_secondaries.push_back(addr3); app->partitions[0] = pc; app->partitions[1] = pc; @@ -151,7 +152,7 @@ TEST(ford_fulkerson, find_shortest_path) nodes[addr2] = ns2; nodes[addr3] = ns2; - std::unordered_map address_id; + std::unordered_map address_id; address_id[addr1] = 1; address_id[addr2] = 2; address_id[addr3] = 3; @@ -219,10 +220,10 @@ TEST(ford_fulkerson, max_value_pos) info.partition_count = 4; std::shared_ptr app = app_state::create(info); - std::unordered_map address_id; - auto addr1 = rpc_address(1, 1); - auto addr2 = rpc_address(1, 2); - auto addr3 = rpc_address(1, 3); + std::unordered_map address_id; + auto addr1 = host_port("localhost", 1); + auto addr2 = host_port("localhost", 2); + auto addr3 = host_port("localhost", 3); address_id[addr1] = 1; address_id[addr2] = 2; address_id[addr3] = 3; @@ -257,10 +258,10 @@ TEST(ford_fulkerson, select_node) info.partition_count = 4; std::shared_ptr app = app_state::create(info); - std::unordered_map address_id; - auto addr1 = rpc_address(1, 1); - auto addr2 = rpc_address(1, 2); - auto addr3 = rpc_address(1, 3); + std::unordered_map address_id; + auto addr1 = host_port("localhost", 1); + auto addr2 = host_port("localhost", 2); + auto addr3 = host_port("localhost", 3); address_id[addr1] = 1; address_id[addr2] = 2; address_id[addr3] = 3; diff --git a/src/meta/test/json_compacity.cpp b/src/meta/test/json_compacity.cpp index 66b870ede9..c1ca9a7511 100644 --- a/src/meta/test/json_compacity.cpp +++ b/src/meta/test/json_compacity.cpp @@ -39,6 +39,7 @@ #include "meta/meta_backup_service.h" #include "meta_service_test_app.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/blob.h" namespace dsn { @@ -86,14 +87,18 @@ void meta_service_test_app::json_compacity() // 4. old pc version const char *json3 = "{\"pid\":\"1.1\",\"ballot\":234,\"max_replica_count\":3," "\"primary\":\"invalid address\",\"secondaries\":[\"127.0.0.1:6\"]," + "\"hp_primary\":\"invalid host_port\",\"hp_secondaries\":[\"localhost:6\"]," "\"last_drops\":[],\"last_committed_decree\":157}"; dsn::partition_configuration pc; dsn::json::json_forwarder::decode( dsn::blob(json3, 0, strlen(json3)), pc); ASSERT_EQ(234, pc.ballot); + ASSERT_TRUE(pc.hp_primary.is_invalid()); ASSERT_TRUE(pc.primary.is_invalid()); + ASSERT_EQ(1, pc.hp_secondaries.size()); ASSERT_EQ(1, pc.secondaries.size()); ASSERT_STREQ("127.0.0.1:6", pc.secondaries[0].to_string()); + ASSERT_EQ("localhost:6", pc.hp_secondaries[0].to_string()); ASSERT_EQ(157, pc.last_committed_decree); ASSERT_EQ(0, pc.partition_flags); diff --git a/src/meta/test/meta_app_operation_test.cpp b/src/meta/test/meta_app_operation_test.cpp index a6f3cce2c1..7ac3070909 100644 --- a/src/meta/test/meta_app_operation_test.cpp +++ b/src/meta/test/meta_app_operation_test.cpp @@ -41,7 +41,7 @@ #include "meta_service_test_app.h" #include "meta_test_base.h" #include "misc/misc.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/task/task_tracker.h" #include "utils/defer.h" @@ -507,7 +507,7 @@ TEST_F(meta_app_operation_test, create_app) // keep the number of all nodes greater than that of alive nodes const int total_node_count = 10; - std::vector nodes = ensure_enough_alive_nodes(total_node_count); + std::vector nodes = ensure_enough_alive_nodes(total_node_count); // the meta function level will become freezed once // alive_nodes * 100 < total_nodes * _node_live_percentage_threshold_for_update diff --git a/src/meta/test/meta_backup_test.cpp b/src/meta/test/meta_backup_test.cpp index da74bc7bfb..d2293a032a 100644 --- a/src/meta/test/meta_backup_test.cpp +++ b/src/meta/test/meta_backup_test.cpp @@ -35,7 +35,8 @@ #include "meta/server_state.h" #include "meta_test_base.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/env.h" #include "utils/error_code.h" #include "utils/fail_point.h" @@ -222,7 +223,8 @@ class backup_engine_test : public meta_test_base meta_test_base::SetUp(); _ms->_backup_handler = std::make_shared(_ms.get(), _policy_root, _backup_root, nullptr); - _backup_engine = std::make_shared(_ms->_backup_handler.get()); + std::shared_ptr resolver = std::make_shared(); + _backup_engine = std::make_shared(_ms->_backup_handler.get(), resolver); _backup_engine->set_block_service("local_service"); zauto_lock lock(_backup_engine->_lock); @@ -250,7 +252,7 @@ class backup_engine_test : public meta_test_base int32_t progress) { gpid pid = gpid(_app_id, partition_index); - rpc_address mock_primary_address = rpc_address("127.0.0.1", 10000 + partition_index); + host_port mock_primary_address = host_port("localhost", 10000 + partition_index); backup_response resp; resp.backup_id = _backup_engine->_cur_backup.backup_id; @@ -264,7 +266,7 @@ class backup_engine_test : public meta_test_base void mock_on_backup_reply_when_timeout(int32_t partition_index, error_code rpc_err) { gpid pid = gpid(_app_id, partition_index); - rpc_address mock_primary_address = rpc_address("127.0.0.1", 10000 + partition_index); + host_port mock_primary_address = host_port("localhost", 10000 + partition_index); backup_response resp; _backup_engine->on_backup_reply(rpc_err, resp, pid, mock_primary_address); } diff --git a/src/meta/test/meta_bulk_load_ingestion_test.cpp b/src/meta/test/meta_bulk_load_ingestion_test.cpp index 73bb93ae60..daed55067c 100644 --- a/src/meta/test/meta_bulk_load_ingestion_test.cpp +++ b/src/meta/test/meta_bulk_load_ingestion_test.cpp @@ -27,7 +27,7 @@ #include "meta/meta_bulk_load_ingestion_context.h" #include "meta/meta_data.h" #include "meta_test_base.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/fail_point.h" namespace dsn { @@ -88,7 +88,7 @@ class node_context_test : public meta_test_base public: ingestion_context::node_context _context; - const rpc_address NODE = rpc_address("127.0.0.1", 10086); + const host_port NODE = host_port("localhost", 10086); const std::string TAG = "default"; const std::string TAG2 = "tag2"; }; @@ -182,7 +182,7 @@ class ingestion_context_test : public meta_test_base } bool check_node_ingestion(const uint32_t max_node_count, - const rpc_address &node, + const host_port &node, const std::string &tag) { _context->reset_all(); @@ -223,15 +223,15 @@ class ingestion_context_test : public meta_test_base } void mock_partition(const uint32_t pidx, - std::vector nodes, + std::vector nodes, const std::vector tags, partition_configuration &config, config_context &cc) { config.pid = gpid(APP_ID, pidx); - config.primary = nodes[0]; - config.secondaries.emplace_back(nodes[1]); - config.secondaries.emplace_back(nodes[2]); + config.hp_primary = nodes[0]; + config.hp_secondaries.emplace_back(nodes[1]); + config.hp_secondaries.emplace_back(nodes[2]); auto count = nodes.size(); for (auto i = 0; i < count; i++) { @@ -242,7 +242,7 @@ class ingestion_context_test : public meta_test_base } } - void add_node_context(std::vector nodes) + void add_node_context(std::vector nodes) { for (const auto &address : nodes) { ingestion_context::node_context node(address, TAG1); @@ -276,7 +276,7 @@ class ingestion_context_test : public meta_test_base void reset_app() { return _context->reset_app(APP_ID); } - int32_t get_node_running_count(const rpc_address &node) + int32_t get_node_running_count(const host_port &node) { if (_context->_nodes_context.find(node) == _context->_nodes_context.end()) { return 0; @@ -284,7 +284,7 @@ class ingestion_context_test : public meta_test_base return _context->_nodes_context[node].node_ingesting_count; } - uint32_t get_disk_running_count(const rpc_address &node, const std::string &disk_tag) + uint32_t get_disk_running_count(const host_port &node, const std::string &disk_tag) { if (_context->_nodes_context.find(node) == _context->_nodes_context.end()) { return 0; @@ -296,7 +296,7 @@ class ingestion_context_test : public meta_test_base return node_cc.disk_ingesting_counts[disk_tag]; } - bool validate_count(const rpc_address &node, + bool validate_count(const host_port &node, const uint32_t expected_node_count, const uint32_t expected_disk1_count, const uint32_t expected_disk2_count) @@ -313,10 +313,10 @@ class ingestion_context_test : public meta_test_base const uint32_t PARTITION_COUNT = 4; const uint32_t MAX_NODE_COUNT = 2; const uint32_t MIN_DISK_COUNT = 2; - const rpc_address NODE1 = rpc_address("127.0.0.1", 10086); - const rpc_address NODE2 = rpc_address("127.0.0.1", 10085); - const rpc_address NODE3 = rpc_address("127.0.0.1", 10087); - const rpc_address NODE4 = rpc_address("127.0.0.1", 10088); + const host_port NODE1 = host_port("localhost", 10086); + const host_port NODE2 = host_port("localhost", 10085); + const host_port NODE3 = host_port("localhost", 10087); + const host_port NODE4 = host_port("localhost", 10088); const std::string TAG1 = "tag1"; const std::string TAG2 = "tag2"; }; @@ -325,7 +325,7 @@ TEST_F(ingestion_context_test, check_node_ingestion_test) { struct check_node_ingestion_test { - rpc_address node; + host_port node; std::string tag; uint32_t max_node_count; bool expected_result; diff --git a/src/meta/test/meta_bulk_load_service_test.cpp b/src/meta/test/meta_bulk_load_service_test.cpp index d3ae38a1e8..3ff18c4fdd 100644 --- a/src/meta/test/meta_bulk_load_service_test.cpp +++ b/src/meta/test/meta_bulk_load_service_test.cpp @@ -50,6 +50,7 @@ #include "meta_test_base.h" #include "metadata_types.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/blob.h" #include "utils/error_code.h" #include "utils/fail_point.h" @@ -177,6 +178,10 @@ class bulk_load_service_test : public meta_test_base config.primary = PRIMARY; config.secondaries.emplace_back(SECONDARY1); config.secondaries.emplace_back(SECONDARY2); + config.hp_primary = PRIMARY_HP; + config.__set_hp_secondaries(std::vector()); + config.hp_secondaries.emplace_back(SECONDARY1_HP); + config.hp_secondaries.emplace_back(SECONDARY2_HP); app->partitions.clear(); app->partitions.emplace_back(config); mock_meta_bulk_load_context(app->app_id, app->partition_count, status); @@ -192,9 +197,11 @@ class bulk_load_service_test : public meta_test_base std::shared_ptr app = find_app(name); if (mock_primary_invalid) { app->partitions[pid.get_partition_index()].primary.set_invalid(); + app->partitions[pid.get_partition_index()].hp_primary.reset(); } if (mock_lack_secondary) { app->partitions[pid.get_partition_index()].secondaries.clear(); + app->partitions[pid.get_partition_index()].hp_secondaries.clear(); } partition_configuration pconfig; bool flag = bulk_svc().check_partition_status( @@ -213,13 +220,13 @@ class bulk_load_service_test : public meta_test_base { partition_bulk_load_info &pinfo = bulk_svc()._partition_bulk_load_info[pid]; pinfo.status = bulk_load_status::BLS_INGESTING; - pinfo.addresses.clear(); - pinfo.addresses.emplace_back(PRIMARY); - pinfo.addresses.emplace_back(SECONDARY1); + pinfo.host_ports.clear(); + pinfo.host_ports.emplace_back(PRIMARY_HP); + pinfo.host_ports.emplace_back(SECONDARY1_HP); if (use_secondary3) { - pinfo.addresses.emplace_back(SECONDARY3); + pinfo.host_ports.emplace_back(SECONDARY3_HP); } else { - pinfo.addresses.emplace_back(SECONDARY2); + pinfo.host_ports.emplace_back(SECONDARY2_HP); } pinfo.ever_ingest_succeed = ever_ingest_succeed; } @@ -233,16 +240,24 @@ class bulk_load_service_test : public meta_test_base partition_configuration config; config.pid = pid; config.primary = PRIMARY; + config.__set_hp_primary(PRIMARY_HP); + config.__set_hp_secondaries(std::vector()); if (same) { config.secondaries.emplace_back(SECONDARY1); config.secondaries.emplace_back(SECONDARY2); + config.hp_secondaries.emplace_back(SECONDARY1_HP); + config.hp_secondaries.emplace_back(SECONDARY2_HP); } else { config.secondaries.emplace_back(SECONDARY1); + config.hp_secondaries.emplace_back(SECONDARY1_HP); if (secondary_count == 2) { config.secondaries.emplace_back(SECONDARY3); + config.hp_secondaries.emplace_back(SECONDARY3_HP); } else if (secondary_count >= 3) { config.secondaries.emplace_back(SECONDARY2); config.secondaries.emplace_back(SECONDARY3); + config.hp_secondaries.emplace_back(SECONDARY2_HP); + config.hp_secondaries.emplace_back(SECONDARY3_HP); } } auto flag = bulk_svc().check_ever_ingestion_succeed(config, APP_NAME, pid); @@ -291,7 +306,8 @@ class bulk_load_service_test : public meta_test_base const gpid &pid, error_code rpc_err = ERR_OK) { - bulk_svc().on_partition_ingestion_reply(rpc_err, std::move(resp), APP_NAME, pid, PRIMARY); + bulk_svc().on_partition_ingestion_reply( + rpc_err, std::move(resp), APP_NAME, pid, PRIMARY_HP); wait_all(); } @@ -355,7 +371,8 @@ class bulk_load_service_test : public meta_test_base state->initialize_data_structure(); _ms->set_function_level(meta_function_level::fl_steady); - _ms->_failure_detector.reset(new meta_server_failure_detector(_ms.get())); + _ms->_failure_detector.reset( + new meta_server_failure_detector(_ms->_dns_resolver, _ms.get())); _ss = _ms->_state; } @@ -506,6 +523,11 @@ class bulk_load_service_test : public meta_test_base const rpc_address SECONDARY1 = rpc_address("127.0.0.1", 10085); const rpc_address SECONDARY2 = rpc_address("127.0.0.1", 10087); const rpc_address SECONDARY3 = rpc_address("127.0.0.1", 10080); + + const host_port PRIMARY_HP = host_port("localhost", 10086); + const host_port SECONDARY1_HP = host_port("localhost", 10085); + const host_port SECONDARY2_HP = host_port("localhost", 10087); + const host_port SECONDARY3_HP = host_port("localhost", 10080); }; /// start bulk load unit tests @@ -771,6 +793,7 @@ class bulk_load_process_test : public bulk_load_service_test _req.pid = gpid(_app_id, _pidx); _req.primary_addr = PRIMARY; _req.meta_bulk_load_status = status; + _req.__set_hp_primary(PRIMARY_HP); } void create_basic_response(error_code err, bulk_load_status::type status) @@ -797,6 +820,11 @@ class bulk_load_process_test : public bulk_load_service_test _resp.group_bulk_load_state[SECONDARY1] = state; _resp.group_bulk_load_state[SECONDARY2] = state2; _resp.__set_total_download_progress(total_progress); + + _resp.__set_hp_group_bulk_load_state({}); + _resp.hp_group_bulk_load_state[PRIMARY_HP] = state; + _resp.hp_group_bulk_load_state[SECONDARY1_HP] = state; + _resp.hp_group_bulk_load_state[SECONDARY2_HP] = state2; } void mock_response_bulk_load_metadata() @@ -828,6 +856,12 @@ class bulk_load_process_test : public bulk_load_service_test _resp.group_bulk_load_state[SECONDARY1] = state; _resp.group_bulk_load_state[SECONDARY2] = state2; _resp.__set_is_group_ingestion_finished(secondary_istatus == ingestion_status::IS_SUCCEED); + + _resp.__set_hp_group_bulk_load_state({}); + _resp.hp_group_bulk_load_state[PRIMARY_HP] = state; + _resp.hp_group_bulk_load_state[SECONDARY1_HP] = state; + _resp.hp_group_bulk_load_state[SECONDARY2_HP] = state2; + set_app_ingesting_count(_app_id, ingestion_count); } @@ -840,8 +874,13 @@ class bulk_load_process_test : public bulk_load_service_test _resp.group_bulk_load_state[PRIMARY] = state; _resp.group_bulk_load_state[SECONDARY1] = state; + _resp.__set_hp_group_bulk_load_state({}); + _resp.hp_group_bulk_load_state[PRIMARY_HP] = state; + _resp.hp_group_bulk_load_state[SECONDARY1_HP] = state; + state2.__set_is_cleaned_up(all_cleaned_up); _resp.group_bulk_load_state[SECONDARY2] = state2; + _resp.hp_group_bulk_load_state[SECONDARY2_HP] = state2; _resp.__set_is_group_bulk_load_context_cleaned_up(all_cleaned_up); } @@ -856,6 +895,12 @@ class bulk_load_process_test : public bulk_load_service_test _resp.group_bulk_load_state[PRIMARY] = state; _resp.group_bulk_load_state[SECONDARY1] = state; _resp.group_bulk_load_state[SECONDARY2] = state2; + + _resp.__set_hp_group_bulk_load_state({}); + _resp.hp_group_bulk_load_state[PRIMARY_HP] = state; + _resp.hp_group_bulk_load_state[SECONDARY1_HP] = state; + _resp.hp_group_bulk_load_state[SECONDARY2_HP] = state2; + _resp.__set_is_group_bulk_load_paused(is_group_paused); } @@ -975,7 +1020,7 @@ TEST_F(bulk_load_process_test, ingestion_one_succeed) const auto &pinfo = get_partition_bulk_load_info(gpid(_app_id, _pidx)); ASSERT_EQ(pinfo.status, bulk_load_status::BLS_SUCCEED); ASSERT_TRUE(pinfo.ever_ingest_succeed); - ASSERT_EQ(pinfo.addresses.size(), 3); + ASSERT_EQ(pinfo.host_ports.size(), 3); } TEST_F(bulk_load_process_test, ingestion_one_succeed_update) @@ -989,9 +1034,9 @@ TEST_F(bulk_load_process_test, ingestion_one_succeed_update) const auto &pinfo = get_partition_bulk_load_info(pid); ASSERT_EQ(pinfo.status, bulk_load_status::BLS_SUCCEED); ASSERT_TRUE(pinfo.ever_ingest_succeed); - ASSERT_EQ(pinfo.addresses.size(), 3); - ASSERT_EQ(std::find(pinfo.addresses.begin(), pinfo.addresses.end(), SECONDARY3), - pinfo.addresses.end()); + ASSERT_EQ(pinfo.host_ports.size(), 3); + ASSERT_EQ(std::find(pinfo.host_ports.begin(), pinfo.host_ports.end(), SECONDARY3_HP), + pinfo.host_ports.end()); } TEST_F(bulk_load_process_test, normal_succeed) diff --git a/src/meta/test/meta_data.cpp b/src/meta/test/meta_data.cpp index 16bcf7f1c4..12b23557be 100644 --- a/src/meta/test/meta_data.cpp +++ b/src/meta/test/meta_data.cpp @@ -26,6 +26,7 @@ #include #include +#include #include #include "client/partition_resolver.h" @@ -35,13 +36,15 @@ #include "meta/meta_data.h" #include "metadata_types.h" #include "misc/misc.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" using namespace dsn::replication; TEST(meta_data, dropped_cmp) { - dsn::rpc_address n; + dsn::host_port n; dropped_replica d1, d2; // time not equal @@ -129,11 +132,14 @@ TEST(meta_data, collect_replica) dsn::partition_configuration &pc = *get_config(app, rep.pid); config_context &cc = *get_config_context(app, rep.pid); - std::vector node_list; + std::vector> node_list; generate_node_list(node_list, 10, 10); #define CLEAR_REPLICA \ do { \ + pc.__set_hp_primary(dsn::host_port()); \ + pc.__set_hp_secondaries({}); \ + pc.__set_hp_last_drops({}); \ pc.primary.set_invalid(); \ pc.secondaries.clear(); \ pc.last_drops.clear(); \ @@ -153,52 +159,54 @@ TEST(meta_data, collect_replica) CLEAR_ALL; rep.ballot = 10; pc.ballot = 9; - pc.primary = node_list[0]; - ASSERT_TRUE(collect_replica(view, node_list[0], rep)); + pc.primary = node_list[0].second; + pc.__set_hp_primary(node_list[0].first); + ASSERT_TRUE(collect_replica(view, node_list[0].first, rep)); } { // replica is secondary of partition CLEAR_ALL; - pc.secondaries.push_back(node_list[0]); - ASSERT_TRUE(collect_replica(view, node_list[0], rep)); + pc.secondaries.push_back(node_list[0].second); + pc.hp_secondaries.push_back(node_list[0].first); + ASSERT_TRUE(collect_replica(view, node_list[0].first, rep)); } { // replica has been in the drop_list CLEAR_ALL; - cc.dropped.push_back({node_list[0], 5, 0, 0}); - ASSERT_TRUE(collect_replica(view, node_list[0], rep)); + cc.dropped.push_back({node_list[0].first, 5, 0, 0}); + ASSERT_TRUE(collect_replica(view, node_list[0].first, rep)); } { // drop_list all have timestamp, full CLEAR_ALL; cc.dropped = { - dropped_replica{node_list[0], 5, 1, 1, 2}, - dropped_replica{node_list[1], 6, 1, 1, 2}, - dropped_replica{node_list[2], 7, 1, 1, 2}, - dropped_replica{node_list[3], 8, 1, 1, 2}, + dropped_replica{node_list[0].first, 5, 1, 1, 2}, + dropped_replica{node_list[1].first, 6, 1, 1, 2}, + dropped_replica{node_list[2].first, 7, 1, 1, 2}, + dropped_replica{node_list[3].first, 8, 1, 1, 2}, }; rep.ballot = 10; rep.last_prepared_decree = 10; - ASSERT_FALSE(collect_replica(view, node_list[5], rep)); + ASSERT_FALSE(collect_replica(view, node_list[5].first, rep)); } { // drop_list all have timestamp, not full CLEAR_ALL; cc.dropped = { - dropped_replica{node_list[0], 5, 1, 1, 2}, - dropped_replica{node_list[1], 6, 1, 1, 2}, - dropped_replica{node_list[2], 7, 1, 1, 2}, + dropped_replica{node_list[0].first, 5, 1, 1, 2}, + dropped_replica{node_list[1].first, 6, 1, 1, 2}, + dropped_replica{node_list[2].first, 7, 1, 1, 2}, }; rep.ballot = 10; rep.last_durable_decree = 6; rep.last_committed_decree = 8; rep.last_prepared_decree = 10; - ASSERT_TRUE(collect_replica(view, node_list[4], rep)); + ASSERT_TRUE(collect_replica(view, node_list[4].first, rep)); dropped_replica &d = cc.dropped.front(); ASSERT_EQ(d.ballot, rep.ballot); ASSERT_EQ(d.last_prepared_decree, rep.last_prepared_decree); @@ -208,33 +216,33 @@ TEST(meta_data, collect_replica) // drop_list mixed, full, minimal position CLEAR_ALL; cc.dropped = { - dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 2, 3, 5}, - dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 5}, - dropped_replica{node_list[2], 7, 1, 1, 5}, - dropped_replica{node_list[3], 8, 1, 1, 5}, + dropped_replica{node_list[0].first, dropped_replica::INVALID_TIMESTAMP, 2, 3, 5}, + dropped_replica{node_list[1].first, dropped_replica::INVALID_TIMESTAMP, 2, 4, 5}, + dropped_replica{node_list[2].first, 7, 1, 1, 5}, + dropped_replica{node_list[3].first, 8, 1, 1, 5}, }; rep.ballot = 1; rep.last_committed_decree = 3; rep.last_prepared_decree = 5; - ASSERT_FALSE(collect_replica(view, node_list[5], rep)); + ASSERT_FALSE(collect_replica(view, node_list[5].first, rep)); } { // drop_list mixed, not full, minimal position CLEAR_ALL; cc.dropped = { - dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 2, 3, 5}, - dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 5}, - dropped_replica{node_list[2], 7, 1, 1, 6}, + dropped_replica{node_list[0].first, dropped_replica::INVALID_TIMESTAMP, 2, 3, 5}, + dropped_replica{node_list[1].first, dropped_replica::INVALID_TIMESTAMP, 2, 4, 5}, + dropped_replica{node_list[2].first, 7, 1, 1, 6}, }; rep.ballot = 1; rep.last_committed_decree = 3; rep.last_prepared_decree = 5; - ASSERT_TRUE(collect_replica(view, node_list[5], rep)); + ASSERT_TRUE(collect_replica(view, node_list[5].first, rep)); dropped_replica &d = cc.dropped.front(); - ASSERT_EQ(d.node, node_list[5]); + ASSERT_EQ(d.node, node_list[5].first); ASSERT_EQ(d.ballot, rep.ballot); ASSERT_EQ(d.last_prepared_decree, rep.last_prepared_decree); } @@ -243,16 +251,16 @@ TEST(meta_data, collect_replica) // drop_list mixed, full, not minimal position CLEAR_ALL; cc.dropped = { - dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 2, 2, 6}, - dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 6}, - dropped_replica{node_list[2], 7, 1, 1, 6}, - dropped_replica{node_list[3], 8, 1, 1, 6}, + dropped_replica{node_list[0].first, dropped_replica::INVALID_TIMESTAMP, 2, 2, 6}, + dropped_replica{node_list[1].first, dropped_replica::INVALID_TIMESTAMP, 2, 4, 6}, + dropped_replica{node_list[2].first, 7, 1, 1, 6}, + dropped_replica{node_list[3].first, 8, 1, 1, 6}, }; rep.ballot = 2; rep.last_committed_decree = 3; rep.last_prepared_decree = 6; - ASSERT_TRUE(collect_replica(view, node_list[5], rep)); + ASSERT_TRUE(collect_replica(view, node_list[5].first, rep)); dropped_replica &d = cc.dropped.front(); ASSERT_EQ(rep.ballot, d.ballot); ASSERT_EQ(rep.last_committed_decree, rep.last_committed_decree); @@ -263,20 +271,21 @@ TEST(meta_data, collect_replica) { // drop_list mixed, not full, not minimal position CLEAR_ALL; - cc.dropped = {dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 2, 2, 6}, - dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 6}, - dropped_replica{node_list[2], 7, 1, 1, 6}}; + cc.dropped = { + dropped_replica{node_list[0].first, dropped_replica::INVALID_TIMESTAMP, 2, 2, 6}, + dropped_replica{node_list[1].first, dropped_replica::INVALID_TIMESTAMP, 2, 4, 6}, + dropped_replica{node_list[2].first, 7, 1, 1, 6}}; rep.ballot = 3; rep.last_committed_decree = 1; rep.last_prepared_decree = 6; - ASSERT_TRUE(collect_replica(view, node_list[5], rep)); + ASSERT_TRUE(collect_replica(view, node_list[5].first, rep)); std::vector result_dropped = { - dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 2, 2, 6}, - dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 6}, - dropped_replica{node_list[5], dropped_replica::INVALID_TIMESTAMP, 3, 1, 6}, - dropped_replica{node_list[2], 7, 1, 1, 6}}; + dropped_replica{node_list[0].first, dropped_replica::INVALID_TIMESTAMP, 2, 2, 6}, + dropped_replica{node_list[1].first, dropped_replica::INVALID_TIMESTAMP, 2, 4, 6}, + dropped_replica{node_list[5].first, dropped_replica::INVALID_TIMESTAMP, 3, 1, 6}, + dropped_replica{node_list[2].first, 7, 1, 1, 6}}; ASSERT_TRUE(vec_equal(result_dropped, cc.dropped)); } @@ -285,38 +294,38 @@ TEST(meta_data, collect_replica) // drop_list no timestamp, full, minimal position CLEAR_ALL; cc.dropped = { - dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 2, 2, 8}, - dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 8}, - dropped_replica{node_list[2], dropped_replica::INVALID_TIMESTAMP, 2, 6, 8}, - dropped_replica{node_list[3], dropped_replica::INVALID_TIMESTAMP, 4, 2, 8}, + dropped_replica{node_list[0].first, dropped_replica::INVALID_TIMESTAMP, 2, 2, 8}, + dropped_replica{node_list[1].first, dropped_replica::INVALID_TIMESTAMP, 2, 4, 8}, + dropped_replica{node_list[2].first, dropped_replica::INVALID_TIMESTAMP, 2, 6, 8}, + dropped_replica{node_list[3].first, dropped_replica::INVALID_TIMESTAMP, 4, 2, 8}, }; rep.ballot = 1; rep.last_committed_decree = 7; rep.last_prepared_decree = 10; - ASSERT_FALSE(collect_replica(view, node_list[5], rep)); + ASSERT_FALSE(collect_replica(view, node_list[5].first, rep)); } { // drop_list no timestamp, full, middle position CLEAR_ALL; cc.dropped = { - dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 2, 2, 8}, - dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 8}, - dropped_replica{node_list[2], dropped_replica::INVALID_TIMESTAMP, 2, 6, 8}, - dropped_replica{node_list[3], dropped_replica::INVALID_TIMESTAMP, 4, 2, 8}, + dropped_replica{node_list[0].first, dropped_replica::INVALID_TIMESTAMP, 2, 2, 8}, + dropped_replica{node_list[1].first, dropped_replica::INVALID_TIMESTAMP, 2, 4, 8}, + dropped_replica{node_list[2].first, dropped_replica::INVALID_TIMESTAMP, 2, 6, 8}, + dropped_replica{node_list[3].first, dropped_replica::INVALID_TIMESTAMP, 4, 2, 8}, }; rep.ballot = 3; rep.last_committed_decree = 6; rep.last_prepared_decree = 8; - ASSERT_TRUE(collect_replica(view, node_list[5], rep)); + ASSERT_TRUE(collect_replica(view, node_list[5].first, rep)); std::vector result_dropped = { - dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 8}, - dropped_replica{node_list[2], dropped_replica::INVALID_TIMESTAMP, 2, 6, 8}, - dropped_replica{node_list[5], dropped_replica::INVALID_TIMESTAMP, 3, 6, 8}, - dropped_replica{node_list[3], dropped_replica::INVALID_TIMESTAMP, 4, 2, 8}, + dropped_replica{node_list[1].first, dropped_replica::INVALID_TIMESTAMP, 2, 4, 8}, + dropped_replica{node_list[2].first, dropped_replica::INVALID_TIMESTAMP, 2, 6, 8}, + dropped_replica{node_list[5].first, dropped_replica::INVALID_TIMESTAMP, 3, 6, 8}, + dropped_replica{node_list[3].first, dropped_replica::INVALID_TIMESTAMP, 4, 2, 8}, }; ASSERT_TRUE(vec_equal(result_dropped, cc.dropped)); @@ -325,21 +334,22 @@ TEST(meta_data, collect_replica) { // drop_list no timestamp, full, largest position CLEAR_ALL; - cc.dropped = {dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 8}, - dropped_replica{node_list[2], dropped_replica::INVALID_TIMESTAMP, 2, 6, 8}, - dropped_replica{node_list[3], dropped_replica::INVALID_TIMESTAMP, 4, 2, 8}, - dropped_replica{node_list[4], dropped_replica::INVALID_TIMESTAMP, 4, 6, 8}}; + cc.dropped = { + dropped_replica{node_list[1].first, dropped_replica::INVALID_TIMESTAMP, 2, 4, 8}, + dropped_replica{node_list[2].first, dropped_replica::INVALID_TIMESTAMP, 2, 6, 8}, + dropped_replica{node_list[3].first, dropped_replica::INVALID_TIMESTAMP, 4, 2, 8}, + dropped_replica{node_list[4].first, dropped_replica::INVALID_TIMESTAMP, 4, 6, 8}}; rep.ballot = 4; rep.last_committed_decree = 8; rep.last_prepared_decree = 8; - ASSERT_TRUE(collect_replica(view, node_list[5], rep)); + ASSERT_TRUE(collect_replica(view, node_list[5].first, rep)); std::vector result_dropped = { - dropped_replica{node_list[2], dropped_replica::INVALID_TIMESTAMP, 2, 6, 8}, - dropped_replica{node_list[3], dropped_replica::INVALID_TIMESTAMP, 4, 2, 8}, - dropped_replica{node_list[4], dropped_replica::INVALID_TIMESTAMP, 4, 6, 8}, - dropped_replica{node_list[5], dropped_replica::INVALID_TIMESTAMP, 4, 8, 8}}; + dropped_replica{node_list[2].first, dropped_replica::INVALID_TIMESTAMP, 2, 6, 8}, + dropped_replica{node_list[3].first, dropped_replica::INVALID_TIMESTAMP, 4, 2, 8}, + dropped_replica{node_list[4].first, dropped_replica::INVALID_TIMESTAMP, 4, 6, 8}, + dropped_replica{node_list[5].first, dropped_replica::INVALID_TIMESTAMP, 4, 8, 8}}; ASSERT_TRUE(vec_equal(result_dropped, cc.dropped)); } @@ -372,14 +382,17 @@ TEST(meta_data, construct_replica) dsn::partition_configuration &pc = *get_config(app, rep.pid); config_context &cc = *get_config_context(app, rep.pid); - std::vector node_list; + std::vector> node_list; generate_node_list(node_list, 10, 10); #define CLEAR_REPLICA \ do { \ - pc.primary.set_invalid(); \ - pc.secondaries.clear(); \ - pc.last_drops.clear(); \ + pc.hp_primary.reset(); \ + pc.hp_secondaries.clear(); \ + pc.hp_last_drops.clear(); \ + pc.__set_hp_primary(dsn::host_port()); \ + pc.__set_hp_secondaries({}); \ + pc.__set_hp_last_drops({}); \ } while (false) #define CLEAR_DROP_LIST \ @@ -391,20 +404,23 @@ TEST(meta_data, construct_replica) CLEAR_REPLICA; \ CLEAR_DROP_LIST + auto resolver = std::make_shared(); + // drop_list is empty, can't construct replica { CLEAR_ALL; - ASSERT_FALSE(construct_replica(view, rep.pid, 3)); + ASSERT_FALSE(construct_replica(view, rep.pid, 3, resolver)); ASSERT_EQ(0, replica_count(pc)); } // only have one node in drop_list { CLEAR_ALL; - cc.dropped = {dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 5, 10, 12}}; - ASSERT_TRUE(construct_replica(view, rep.pid, 3)); - ASSERT_EQ(node_list[0], pc.primary); - ASSERT_TRUE(pc.secondaries.empty()); + cc.dropped = { + dropped_replica{node_list[0].first, dropped_replica::INVALID_TIMESTAMP, 5, 10, 12}}; + ASSERT_TRUE(construct_replica(view, rep.pid, 3, resolver)); + ASSERT_EQ(node_list[0].first, pc.hp_primary); + ASSERT_TRUE(pc.hp_secondaries.empty()); ASSERT_TRUE(cc.dropped.empty()); ASSERT_EQ(-1, cc.prefered_dropped); } @@ -412,16 +428,17 @@ TEST(meta_data, construct_replica) // have multiple nodes, ballots are not same { CLEAR_ALL; - cc.dropped = {dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 6, 10, 12}, - dropped_replica{node_list[2], dropped_replica::INVALID_TIMESTAMP, 7, 10, 12}, - dropped_replica{node_list[3], dropped_replica::INVALID_TIMESTAMP, 8, 10, 12}, - dropped_replica{node_list[4], dropped_replica::INVALID_TIMESTAMP, 9, 11, 12}}; - ASSERT_TRUE(construct_replica(view, rep.pid, 3)); - ASSERT_EQ(node_list[4], pc.primary); - ASSERT_TRUE(pc.secondaries.empty()); - - std::vector nodes = {node_list[2], node_list[3]}; - ASSERT_EQ(nodes, pc.last_drops); + cc.dropped = { + dropped_replica{node_list[1].first, dropped_replica::INVALID_TIMESTAMP, 6, 10, 12}, + dropped_replica{node_list[2].first, dropped_replica::INVALID_TIMESTAMP, 7, 10, 12}, + dropped_replica{node_list[3].first, dropped_replica::INVALID_TIMESTAMP, 8, 10, 12}, + dropped_replica{node_list[4].first, dropped_replica::INVALID_TIMESTAMP, 9, 11, 12}}; + ASSERT_TRUE(construct_replica(view, rep.pid, 3, resolver)); + ASSERT_EQ(node_list[4].first, pc.hp_primary); + ASSERT_TRUE(pc.hp_secondaries.empty()); + + std::vector nodes = {node_list[2].first, node_list[3].first}; + ASSERT_EQ(nodes, pc.hp_last_drops); ASSERT_EQ(3, cc.dropped.size()); ASSERT_EQ(2, cc.prefered_dropped); } @@ -429,16 +446,17 @@ TEST(meta_data, construct_replica) // have multiple node, two have same ballots { CLEAR_ALL; - cc.dropped = {dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 5, 10, 12}, - dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 7, 11, 12}, - dropped_replica{node_list[2], dropped_replica::INVALID_TIMESTAMP, 7, 12, 12}}; + cc.dropped = { + dropped_replica{node_list[0].first, dropped_replica::INVALID_TIMESTAMP, 5, 10, 12}, + dropped_replica{node_list[1].first, dropped_replica::INVALID_TIMESTAMP, 7, 11, 12}, + dropped_replica{node_list[2].first, dropped_replica::INVALID_TIMESTAMP, 7, 12, 12}}; - ASSERT_TRUE(construct_replica(view, rep.pid, 3)); - ASSERT_EQ(node_list[2], pc.primary); - ASSERT_TRUE(pc.secondaries.empty()); + ASSERT_TRUE(construct_replica(view, rep.pid, 3, resolver)); + ASSERT_EQ(node_list[2].first, pc.hp_primary); + ASSERT_TRUE(pc.hp_secondaries.empty()); - std::vector nodes = {node_list[0], node_list[1]}; - ASSERT_EQ(nodes, pc.last_drops); + std::vector nodes = {node_list[0].first, node_list[1].first}; + ASSERT_EQ(nodes, pc.hp_last_drops); ASSERT_EQ(2, cc.dropped.size()); ASSERT_EQ(1, cc.prefered_dropped); } @@ -446,17 +464,18 @@ TEST(meta_data, construct_replica) // have multiple nodes, all have same ballots { CLEAR_ALL; - cc.dropped = {dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 7, 11, 14}, - dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 7, 12, 14}, - dropped_replica{node_list[2], dropped_replica::INVALID_TIMESTAMP, 7, 13, 14}, - dropped_replica{node_list[3], dropped_replica::INVALID_TIMESTAMP, 7, 14, 14}}; + cc.dropped = { + dropped_replica{node_list[0].first, dropped_replica::INVALID_TIMESTAMP, 7, 11, 14}, + dropped_replica{node_list[1].first, dropped_replica::INVALID_TIMESTAMP, 7, 12, 14}, + dropped_replica{node_list[2].first, dropped_replica::INVALID_TIMESTAMP, 7, 13, 14}, + dropped_replica{node_list[3].first, dropped_replica::INVALID_TIMESTAMP, 7, 14, 14}}; - ASSERT_TRUE(construct_replica(view, rep.pid, 3)); - ASSERT_EQ(node_list[3], pc.primary); - ASSERT_TRUE(pc.secondaries.empty()); + ASSERT_TRUE(construct_replica(view, rep.pid, 3, resolver)); + ASSERT_EQ(node_list[3].first, pc.hp_primary); + ASSERT_TRUE(pc.hp_secondaries.empty()); - std::vector nodes = {node_list[1], node_list[2]}; - ASSERT_EQ(nodes, pc.last_drops); + std::vector nodes = {node_list[1].first, node_list[2].first}; + ASSERT_EQ(nodes, pc.hp_last_drops); ASSERT_EQ(3, cc.dropped.size()); ASSERT_EQ(2, cc.prefered_dropped); diff --git a/src/meta/test/meta_duplication_service_test.cpp b/src/meta/test/meta_duplication_service_test.cpp index 644c8c17c6..3dbf7d9544 100644 --- a/src/meta/test/meta_duplication_service_test.cpp +++ b/src/meta/test/meta_duplication_service_test.cpp @@ -57,7 +57,9 @@ #include "meta/server_state.h" #include "meta/test/misc/misc.h" #include "meta_test_base.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/blob.h" #include "utils/error_code.h" #include "utils/fail_point.h" @@ -127,11 +129,13 @@ class meta_duplication_service_test : public meta_test_base } duplication_sync_response - duplication_sync(const rpc_address &node, + duplication_sync(const rpc_address &addr, + const host_port &hp, std::map> confirm_list) { auto req = std::make_unique(); - req->node = node; + req->node = addr; + req->__set_hp_node(hp); req->confirm_list = confirm_list; duplication_sync_rpc rpc(std::move(req), RPC_CM_DUPLICATION_SYNC); @@ -180,8 +184,7 @@ class meta_duplication_service_test : public meta_test_base int last_dup = 0; for (int i = 0; i < 1000; i++) { - auto dup = dup_svc().new_dup_from_init( - remote_cluster_address, std::vector(), app); + auto dup = dup_svc().new_dup_from_init(remote_cluster_address, {}, app); ASSERT_GT(dup->id, 0); ASSERT_FALSE(dup->is_altering()); @@ -537,8 +540,10 @@ TEST_F(meta_duplication_service_test, remove_dup) TEST_F(meta_duplication_service_test, duplication_sync) { - std::vector server_nodes = ensure_enough_alive_nodes(3); - rpc_address node = server_nodes[0]; + auto resolver = std::make_shared(); + const auto &server_nodes = ensure_enough_alive_nodes(3); + const auto &node = server_nodes[0]; + const auto &addr = resolver->resolve_address(server_nodes[0]); std::string test_app = "test_app_0"; create_app(test_app); @@ -547,9 +552,10 @@ TEST_F(meta_duplication_service_test, duplication_sync) // generate all primaries on node[0] for (partition_configuration &pc : app->partitions) { pc.ballot = random32(1, 10000); - pc.primary = server_nodes[0]; - pc.secondaries.push_back(server_nodes[1]); - pc.secondaries.push_back(server_nodes[2]); + pc.primary = addr; + pc.__set_hp_primary(server_nodes[0]); + pc.hp_secondaries.push_back(server_nodes[1]); + pc.hp_secondaries.push_back(server_nodes[2]); } initialize_node_state(); @@ -574,7 +580,7 @@ TEST_F(meta_duplication_service_test, duplication_sync) ce.confirmed_decree = 7; confirm_list[gpid(app->app_id, 3)].push_back(ce); - duplication_sync_response resp = duplication_sync(node, confirm_list); + duplication_sync_response resp = duplication_sync(addr, node, confirm_list); ASSERT_EQ(resp.err, ERR_OK); ASSERT_EQ(resp.dup_map.size(), 1); ASSERT_EQ(resp.dup_map[app->app_id].size(), 1); @@ -605,7 +611,7 @@ TEST_F(meta_duplication_service_test, duplication_sync) ce.confirmed_decree = 5; confirm_list[gpid(app->app_id, 1)].push_back(ce); - duplication_sync_response resp = duplication_sync(node, confirm_list); + duplication_sync_response resp = duplication_sync(addr, node, confirm_list); ASSERT_EQ(resp.err, ERR_OK); ASSERT_EQ(resp.dup_map.size(), 1); ASSERT_TRUE(resp.dup_map[app->app_id].find(dupid + 1) == resp.dup_map[app->app_id].end()); @@ -619,7 +625,7 @@ TEST_F(meta_duplication_service_test, duplication_sync) ce.confirmed_decree = 5; confirm_list[gpid(app->app_id + 1, 1)].push_back(ce); - duplication_sync_response resp = duplication_sync(node, confirm_list); + duplication_sync_response resp = duplication_sync(addr, node, confirm_list); ASSERT_EQ(resp.err, ERR_OK); ASSERT_EQ(resp.dup_map.size(), 1); ASSERT_TRUE(resp.dup_map.find(app->app_id + 1) == resp.dup_map.end()); @@ -635,7 +641,7 @@ TEST_F(meta_duplication_service_test, duplication_sync) ce.confirmed_decree = 5; confirm_list[gpid(app->app_id, 1)].push_back(ce); - duplication_sync_response resp = duplication_sync(node, confirm_list); + duplication_sync_response resp = duplication_sync(addr, node, confirm_list); ASSERT_EQ(resp.err, ERR_OK); ASSERT_EQ(resp.dup_map.size(), 0); } @@ -772,13 +778,13 @@ TEST_F(meta_duplication_service_test, fail_mode) ASSERT_EQ(dup->status(), duplication_status::DS_PAUSE); // ensure dup_sync will synchronize fail_mode - std::vector server_nodes = generate_node_list(3); - rpc_address node = server_nodes[0]; + auto node = generate_node_list(3)[0]; for (partition_configuration &pc : app->partitions) { - pc.primary = server_nodes[0]; + pc.primary = node.second; + pc.__set_hp_primary(node.first); } initialize_node_state(); - duplication_sync_response sync_resp = duplication_sync(node, {}); + duplication_sync_response sync_resp = duplication_sync(node.second, node.first, {}); ASSERT_TRUE(sync_resp.dup_map[app->app_id][dup->id].__isset.fail_mode); ASSERT_EQ(sync_resp.dup_map[app->app_id][dup->id].fail_mode, duplication_fail_mode::FAIL_SKIP); diff --git a/src/meta/test/meta_partition_guardian_test.cpp b/src/meta/test/meta_partition_guardian_test.cpp index e4f62f1fb5..45ac62e4ee 100644 --- a/src/meta/test/meta_partition_guardian_test.cpp +++ b/src/meta/test/meta_partition_guardian_test.cpp @@ -55,6 +55,7 @@ #include "meta_test_base.h" #include "metadata_types.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" @@ -78,33 +79,41 @@ static void apply_update_request(/*in-out*/ configuration_update_request &update case config_type::CT_ASSIGN_PRIMARY: case config_type::CT_UPGRADE_TO_PRIMARY: pc.primary = update_req.node; + pc.__set_hp_primary(update_req.hp_node); replica_helper::remove_node(update_req.node, pc.secondaries); + replica_helper::remove_node(update_req.hp_node, pc.hp_secondaries); break; case config_type::CT_ADD_SECONDARY: case config_type::CT_ADD_SECONDARY_FOR_LB: pc.secondaries.push_back(update_req.node); + pc.hp_secondaries.push_back(update_req.hp_node); update_req.type = config_type::CT_UPGRADE_TO_SECONDARY; break; case config_type::CT_REMOVE: case config_type::CT_DOWNGRADE_TO_INACTIVE: - if (update_req.node == pc.primary) + if (update_req.hp_node == pc.hp_primary) { pc.primary.set_invalid(); - else + pc.hp_primary.reset(); + } else { replica_helper::remove_node(update_req.node, pc.secondaries); + replica_helper::remove_node(update_req.hp_node, pc.hp_secondaries); + } break; case config_type::CT_DOWNGRADE_TO_SECONDARY: pc.secondaries.push_back(pc.primary); + pc.hp_secondaries.push_back(pc.hp_primary); pc.primary.set_invalid(); + pc.hp_primary.reset(); break; default: break; } } -static auto default_filter = [](const dsn::rpc_address &target, dsn::message_ex *request) { +static auto default_filter = [](const dsn::host_port &target, dsn::message_ex *request) { dsn::message_ex *recv_request = create_corresponding_receive(request); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); @@ -140,7 +149,7 @@ class meta_partition_guardian_test : public meta_test_base class message_filter : public dsn::replication::meta_service { public: - typedef std::function filter; + typedef std::function filter; message_filter(meta_partition_guardian_test *app) : meta_service(), _app(app) {} void set_filter(const filter &f) { _filter = f; } virtual void reply_message(dsn::message_ex *request, dsn::message_ex *response) override @@ -148,7 +157,7 @@ class message_filter : public dsn::replication::meta_service destroy_message(response); } - virtual void send_message(const dsn::rpc_address &target, dsn::message_ex *request) override + virtual void send_message(const dsn::host_port &target, dsn::message_ex *request) override { // we expect this is a configuration_update_request proposal cur_ptr update_request = _filter(target, request); @@ -169,9 +178,10 @@ void meta_partition_guardian_test::cure_test() dsn::error_code ec; dsn::task_ptr t; std::shared_ptr svc(new message_filter(this)); - svc->_failure_detector.reset(new dsn::replication::meta_server_failure_detector(svc.get())); + svc->_failure_detector.reset( + new dsn::replication::meta_server_failure_detector(_ms->_dns_resolver, svc.get())); bool proposal_sent; - dsn::rpc_address last_addr; + dsn::host_port last_addr; ec = svc->remote_storage_initialize(); ASSERT_EQ(ec, dsn::ERR_OK); @@ -195,8 +205,12 @@ void meta_partition_guardian_test::cure_test() ASSERT_TRUE(state->spin_wait_staging(20)); svc->_started = true; - std::vector nodes; + std::vector> nodes; generate_node_list(nodes, 4, 4); + std::vector nodes_list; + for (const auto &p : nodes) { + nodes_list.emplace_back(p.first); + } dsn::partition_configuration &pc = app->partitions[0]; config_context &cc = *get_config_context(state->_all_apps, dsn::gpid(1, 0)); @@ -211,24 +225,26 @@ void meta_partition_guardian_test::cure_test() // initialize state->_nodes.clear(); pc.primary.set_invalid(); - pc.secondaries = {nodes[0], nodes[1]}; + pc.hp_primary.reset(); + pc.secondaries = {nodes[0].second, nodes[1].second}; + pc.__set_hp_secondaries({nodes[0].first, nodes[1].first}); pc.ballot = 1; state->initialize_node_state(); - svc->set_node_state(nodes, true); + svc->set_node_state(nodes_list, true); proposal_sent = false; // check partitions, then ignore the proposal - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_UPGRADE_TO_PRIMARY); - EXPECT_TRUE(is_secondary(pc, update_req->node)); - EXPECT_EQ(target, update_req->node); + EXPECT_TRUE(is_secondary(pc, update_req->hp_node)); + EXPECT_EQ(target, update_req->hp_node); - last_addr = update_req->node; + last_addr = update_req->hp_node; proposal_sent = true; return nullptr; }); @@ -241,15 +257,15 @@ void meta_partition_guardian_test::cure_test() PROPOSAL_FLAG_CHECK; // check partitions again - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(config_type::CT_UPGRADE_TO_PRIMARY, update_req->type); - EXPECT_EQ(update_req->node, last_addr); - EXPECT_EQ(target, update_req->node); + EXPECT_EQ(update_req->hp_node, last_addr); + EXPECT_EQ(target, update_req->hp_node); proposal_sent = true; apply_update_request(*update_req); @@ -264,32 +280,34 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); PROPOSAL_FLAG_CHECK; - CONDITION_CHECK([&] { return pc.primary == last_addr; }); + CONDITION_CHECK([&] { return pc.hp_primary == last_addr; }); std::this_thread::sleep_for(std::chrono::milliseconds(500)); std::cerr << "Case: upgrade secondary to primary, and the candidate died" << std::endl; // initialize state->_nodes.clear(); pc.primary.set_invalid(); - pc.secondaries = {nodes[0], nodes[1]}; + pc.hp_primary.reset(); + pc.secondaries = {nodes[0].second, nodes[1].second}; + pc.__set_hp_secondaries({nodes[0].first, nodes[1].first}); pc.ballot = 1; state->initialize_node_state(); - svc->set_node_state(nodes, true); + svc->set_node_state(nodes_list, true); proposal_sent = false; // check partitions, then inject a event that node[0] is dead - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_UPGRADE_TO_PRIMARY); - EXPECT_TRUE(is_secondary(pc, update_req->node)); - EXPECT_EQ(target, update_req->node); + EXPECT_TRUE(is_secondary(pc, update_req->hp_node)); + EXPECT_EQ(target, update_req->hp_node); proposal_sent = true; - last_addr = update_req->node; + last_addr = update_req->hp_node; svc->set_node_state({target}, false); return nullptr; }); @@ -302,15 +320,15 @@ void meta_partition_guardian_test::cure_test() PROPOSAL_FLAG_CHECK; // check partitions again - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_UPGRADE_TO_PRIMARY); - EXPECT_TRUE(is_secondary(pc, update_req->node)); - EXPECT_EQ(target, update_req->node); + EXPECT_TRUE(is_secondary(pc, update_req->hp_node)); + EXPECT_EQ(target, update_req->hp_node); EXPECT_NE(target, last_addr); proposal_sent = true; @@ -325,31 +343,33 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); PROPOSAL_FLAG_CHECK; - CONDITION_CHECK([&] { return !pc.primary.is_invalid() && pc.primary != last_addr; }); + CONDITION_CHECK([&] { return !pc.hp_primary.is_invalid() && pc.hp_primary != last_addr; }); std::this_thread::sleep_for(std::chrono::milliseconds(500)); std::cerr << "Case: add secondary, and the message lost" << std::endl; // initialize state->_nodes.clear(); - pc.primary = nodes[0]; - pc.secondaries = {nodes[1]}; + pc.primary = nodes[0].second; + pc.secondaries = {nodes[1].second}; + pc.__set_hp_primary(nodes[0].first); + pc.__set_hp_secondaries({nodes[1].first}); pc.ballot = 1; state->initialize_node_state(); - svc->set_node_state(nodes, true); + svc->set_node_state(nodes_list, true); proposal_sent = false; // check partitions, then ignore the proposal - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_ADD_SECONDARY); - EXPECT_FALSE(is_secondary(pc, update_req->node)); - EXPECT_EQ(target, nodes[0]); + EXPECT_FALSE(is_secondary(pc, update_req->hp_node)); + EXPECT_EQ(target, nodes[0].first); - last_addr = update_req->node; + last_addr = update_req->hp_node; proposal_sent = true; return nullptr; }); @@ -362,15 +382,15 @@ void meta_partition_guardian_test::cure_test() PROPOSAL_FLAG_CHECK; // check partitions again - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_ADD_SECONDARY); - EXPECT_EQ(update_req->node, last_addr); - EXPECT_EQ(target, nodes[0]); + EXPECT_EQ(update_req->hp_node, last_addr); + EXPECT_EQ(target, nodes[0].first); proposal_sent = true; apply_update_request(*update_req); @@ -384,34 +404,38 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); PROPOSAL_FLAG_CHECK; - CONDITION_CHECK([&] { return pc.secondaries.size() == 2 && is_secondary(pc, last_addr); }); + CONDITION_CHECK([&] { return pc.hp_secondaries.size() == 2 && is_secondary(pc, last_addr); }); std::this_thread::sleep_for(std::chrono::milliseconds(500)); std::cerr << "Case: add secondary, but the primary is removing another" << std::endl; // initialize state->_nodes.clear(); - pc.primary = nodes[0]; - pc.secondaries = {nodes[1]}; + pc.primary = nodes[0].second; + pc.secondaries = {nodes[1].second}; + pc.__set_hp_primary(nodes[0].first); + pc.__set_hp_secondaries({nodes[1].first}); pc.ballot = 1; state->initialize_node_state(); - svc->set_node_state(nodes, true); + svc->set_node_state(nodes_list, true); proposal_sent = false; // check partitions, then inject another update_request - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_ADD_SECONDARY); - EXPECT_FALSE(is_secondary(pc, update_req->node)); - EXPECT_EQ(target, nodes[0]); + EXPECT_FALSE(is_secondary(pc, update_req->hp_node)); + EXPECT_EQ(target, nodes[0].first); update_req->config.ballot++; update_req->type = config_type::CT_DOWNGRADE_TO_INACTIVE; update_req->node = update_req->config.secondaries[0]; + update_req->hp_node = update_req->config.hp_secondaries[0]; update_req->config.secondaries.clear(); + update_req->config.hp_secondaries.clear(); proposal_sent = true; @@ -425,32 +449,34 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); PROPOSAL_FLAG_CHECK; - CONDITION_CHECK([&] { return pc.secondaries.size() == 2; }); + CONDITION_CHECK([&] { return pc.hp_secondaries.size() == 2; }); std::this_thread::sleep_for(std::chrono::milliseconds(500)); std::cerr << "Case: add secondary, and the added secondary is dead" << std::endl; // initialize state->_nodes.clear(); - pc.primary = nodes[0]; - pc.secondaries = {nodes[1]}; + pc.primary = nodes[0].second; + pc.secondaries = {nodes[1].second}; + pc.__set_hp_primary(nodes[0].first); + pc.__set_hp_secondaries({nodes[1].first}); pc.ballot = 1; state->initialize_node_state(); - svc->set_node_state(nodes, true); + svc->set_node_state(nodes_list, true); proposal_sent = false; // check partitions, then inject the nodes[2] dead - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_ADD_SECONDARY); - EXPECT_FALSE(is_secondary(pc, update_req->node)); - EXPECT_EQ(target, nodes[0]); + EXPECT_FALSE(is_secondary(pc, update_req->hp_node)); + EXPECT_EQ(target, nodes[0].first); - last_addr = update_req->node; - svc->set_node_state({update_req->node}, false); + last_addr = update_req->hp_node; + svc->set_node_state({update_req->hp_node}, false); proposal_sent = true; return nullptr; }); @@ -463,19 +489,19 @@ void meta_partition_guardian_test::cure_test() PROPOSAL_FLAG_CHECK; // check partitions again - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_ADD_SECONDARY); - EXPECT_NE(update_req->node, last_addr); - EXPECT_FALSE(is_secondary(pc, update_req->node)); - EXPECT_EQ(target, nodes[0]); + EXPECT_NE(update_req->hp_node, last_addr); + EXPECT_FALSE(is_secondary(pc, update_req->hp_node)); + EXPECT_EQ(target, nodes[0].first); proposal_sent = true; - last_addr = update_req->node; + last_addr = update_req->hp_node; apply_update_request(*update_req); svc->set_filter(default_filter); return update_req; @@ -487,32 +513,34 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); PROPOSAL_FLAG_CHECK; - CONDITION_CHECK([&] { return pc.secondaries.size() == 2 && is_secondary(pc, last_addr); }); + CONDITION_CHECK([&] { return pc.hp_secondaries.size() == 2 && is_secondary(pc, last_addr); }); std::this_thread::sleep_for(std::chrono::milliseconds(500)); std::cerr << "Case: add secondary, and the primary is dead" << std::endl; // initialize state->_nodes.clear(); - pc.primary = nodes[0]; - pc.secondaries = {nodes[1]}; + pc.primary = nodes[0].second; + pc.__set_hp_primary(nodes[0].first); + pc.secondaries = {nodes[1].second}; + pc.__set_hp_secondaries({nodes[1].first}); pc.ballot = 1; state->initialize_node_state(); - svc->set_node_state(nodes, true); + svc->set_node_state(nodes_list, true); proposal_sent = false; // check partitions, then ignore the proposal - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_ADD_SECONDARY); - EXPECT_FALSE(is_secondary(pc, update_req->node)); - EXPECT_EQ(target, pc.primary); + EXPECT_FALSE(is_secondary(pc, update_req->hp_node)); + EXPECT_EQ(target, pc.hp_primary); proposal_sent = true; - svc->set_node_state({pc.primary}, false); + svc->set_node_state({pc.hp_primary}, false); svc->set_filter(default_filter); return nullptr; }); @@ -523,26 +551,28 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); PROPOSAL_FLAG_CHECK; - CONDITION_CHECK([&] { return pc.primary == nodes[1]; }); + CONDITION_CHECK([&] { return pc.hp_primary == nodes[1].first; }); std::this_thread::sleep_for(std::chrono::milliseconds(500)); state->_nodes.clear(); pc.primary.set_invalid(); - pc.secondaries.clear(); - pc.last_drops = {nodes[0], nodes[1], nodes[2]}; + pc.hp_primary.reset(); + pc.hp_secondaries.clear(); + pc.last_drops = {nodes[0].second, nodes[1].second, nodes[2].second}; + pc.__set_hp_last_drops({nodes[0].first, nodes[1].first, nodes[2].first}); pc.ballot = 4; state->initialize_node_state(); - svc->set_node_state(nodes, true); + svc->set_node_state(nodes_list, true); - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_ASSIGN_PRIMARY); - EXPECT_EQ(update_req->node, nodes[2]); - EXPECT_EQ(target, nodes[2]); + EXPECT_EQ(update_req->hp_node, nodes[2].first); + EXPECT_EQ(target, nodes[2].first); proposal_sent = true; svc->set_filter(default_filter); @@ -551,11 +581,11 @@ void meta_partition_guardian_test::cure_test() }); std::cerr << "Case: recover from DDD state, nodes[1] isn't alive" << std::endl; - svc->set_node_state({nodes[1]}, false); + svc->set_node_state({nodes[1].first}, false); cc.dropped = { - dropped_replica{nodes[0], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, - dropped_replica{nodes[1], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, - dropped_replica{nodes[2], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, + dropped_replica{nodes[0].first, dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, + dropped_replica{nodes[1].first, dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, + dropped_replica{nodes[2].first, dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, }; t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL, nullptr, @@ -563,13 +593,13 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); ASSERT_FALSE(proposal_sent); - CONDITION_CHECK([&] { return pc.primary.is_invalid(); }); + CONDITION_CHECK([&] { return pc.hp_primary.is_invalid(); }); std::this_thread::sleep_for(std::chrono::milliseconds(500)); std::cerr << "Case: recover from DDD state, nodes[2] is not in dropped" << std::endl; - svc->set_node_state({nodes[1]}, true); - cc.dropped = {dropped_replica{nodes[0], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, - dropped_replica{nodes[1], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}}; + svc->set_node_state({nodes[1].first}, true); + cc.dropped = {dropped_replica{nodes[0].first, dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, + dropped_replica{nodes[1].first, dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}}; t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL, nullptr, @@ -577,15 +607,15 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); ASSERT_FALSE(proposal_sent); - CONDITION_CHECK([&] { return pc.primary.is_invalid(); }); + CONDITION_CHECK([&] { return pc.hp_primary.is_invalid(); }); std::this_thread::sleep_for(std::chrono::milliseconds(200)); std::cerr << "Case: recover from DDD state, haven't collect nodes[2]'s info from replica, and " "nodes[2]'s info haven't updated" << std::endl; - cc.dropped = {dropped_replica{nodes[0], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, - dropped_replica{nodes[1], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, - dropped_replica{nodes[2], 500, -1, -1, -1}}; + cc.dropped = {dropped_replica{nodes[0].first, dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, + dropped_replica{nodes[1].first, dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, + dropped_replica{nodes[2].first, 500, -1, -1, -1}}; t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL, nullptr, @@ -593,21 +623,21 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); ASSERT_FALSE(proposal_sent); - CONDITION_CHECK([&] { return pc.primary.is_invalid(); }); + CONDITION_CHECK([&] { return pc.hp_primary.is_invalid(); }); std::this_thread::sleep_for(std::chrono::milliseconds(200)); std::cerr << "Case: recover from DDD state, haven't collect nodes[2]'s info from replica, and " "nodes[2]'s info have updated" << std::endl; - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_ASSIGN_PRIMARY); - EXPECT_EQ(update_req->node, nodes[1]); - EXPECT_EQ(target, nodes[1]); + EXPECT_EQ(update_req->hp_node, nodes[1].first); + EXPECT_EQ(target, nodes[1].first); proposal_sent = true; svc->set_filter(default_filter); @@ -615,11 +645,11 @@ void meta_partition_guardian_test::cure_test() return update_req; }); - cc.dropped = {dropped_replica{nodes[0], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, - dropped_replica{nodes[1], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, - dropped_replica{nodes[2], 500, -1, -1, -1}}; + cc.dropped = {dropped_replica{nodes[0].first, dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, + dropped_replica{nodes[1].first, dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, + dropped_replica{nodes[2].first, 500, -1, -1, -1}}; pc.last_committed_decree = 0; - get_node_state(state->_nodes, nodes[2], false)->set_replicas_collect_flag(true); + get_node_state(state->_nodes, nodes[2].first, false)->set_replicas_collect_flag(true); t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL, nullptr, std::bind(&server_state::check_all_partitions, state), @@ -627,21 +657,23 @@ void meta_partition_guardian_test::cure_test() t->wait(); PROPOSAL_FLAG_CHECK; - CONDITION_CHECK([&] { return pc.primary == nodes[1]; }); + CONDITION_CHECK([&] { return pc.hp_primary == nodes[1].first; }); std::this_thread::sleep_for(std::chrono::milliseconds(200)); std::cerr << "Case: recover from DDD, haven't collect nodes[1/2]'s info from replica, and " "nodes[1/2]'s info both have updated" << std::endl; - cc.dropped = {dropped_replica{nodes[0], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, - dropped_replica{nodes[1], 500, -1, -1, -1}, - dropped_replica{nodes[2], 500, -1, -1, -1}}; - get_node_state(state->_nodes, nodes[1], false)->set_replicas_collect_flag(true); - get_node_state(state->_nodes, nodes[2], false)->set_replicas_collect_flag(true); + cc.dropped = {dropped_replica{nodes[0].first, dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, + dropped_replica{nodes[1].first, 500, -1, -1, -1}, + dropped_replica{nodes[2].first, 500, -1, -1, -1}}; + get_node_state(state->_nodes, nodes[1].first, false)->set_replicas_collect_flag(true); + get_node_state(state->_nodes, nodes[2].first, false)->set_replicas_collect_flag(true); pc.primary.set_invalid(); - pc.secondaries.clear(); - pc.last_drops = {nodes[0], nodes[1], nodes[2]}; + pc.hp_primary.reset(); + pc.hp_secondaries.clear(); + pc.last_drops = {nodes[0].second, nodes[1].second, nodes[2].second}; + pc.__set_hp_last_drops({nodes[0].first, nodes[1].first, nodes[2].first}); t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL, nullptr, @@ -649,15 +681,15 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); ASSERT_FALSE(proposal_sent); - CONDITION_CHECK([&] { return pc.primary.is_invalid(); }); + CONDITION_CHECK([&] { return pc.hp_primary.is_invalid(); }); std::this_thread::sleep_for(std::chrono::milliseconds(200)); std::cerr << "Case: recover from DDD state, larger ballot not match with larger decree" << std::endl; cc.dropped = { - dropped_replica{nodes[0], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, - dropped_replica{nodes[1], dropped_replica::INVALID_TIMESTAMP, 1, 0, 1}, - dropped_replica{nodes[2], dropped_replica::INVALID_TIMESTAMP, 0, 1, 1}, + dropped_replica{nodes[0].first, dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, + dropped_replica{nodes[1].first, dropped_replica::INVALID_TIMESTAMP, 1, 0, 1}, + dropped_replica{nodes[2].first, dropped_replica::INVALID_TIMESTAMP, 0, 1, 1}, }; t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL, @@ -666,14 +698,14 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); ASSERT_FALSE(proposal_sent); - CONDITION_CHECK([&] { return pc.primary.is_invalid(); }); + CONDITION_CHECK([&] { return pc.hp_primary.is_invalid(); }); std::this_thread::sleep_for(std::chrono::milliseconds(200)); std::cerr << "Case: recover from DDD state, committed decree less than meta's" << std::endl; cc.dropped = { - dropped_replica{nodes[0], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, - dropped_replica{nodes[1], dropped_replica::INVALID_TIMESTAMP, 1, 10, 15}, - dropped_replica{nodes[2], dropped_replica::INVALID_TIMESTAMP, 1, 15, 15}, + dropped_replica{nodes[0].first, dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}, + dropped_replica{nodes[1].first, dropped_replica::INVALID_TIMESTAMP, 1, 10, 15}, + dropped_replica{nodes[2].first, dropped_replica::INVALID_TIMESTAMP, 1, 15, 15}, }; pc.last_committed_decree = 30; t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL, @@ -682,26 +714,26 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); ASSERT_FALSE(proposal_sent); - CONDITION_CHECK([&] { return pc.primary.is_invalid(); }); + CONDITION_CHECK([&] { return pc.hp_primary.is_invalid(); }); std::this_thread::sleep_for(std::chrono::milliseconds(200)); std::cerr << "Case: recover from DDD state, select primary from config_context::dropped" << std::endl; cc.dropped = { - dropped_replica{nodes[0], 12344, -1, -1, -1}, - dropped_replica{nodes[2], dropped_replica::INVALID_TIMESTAMP, 4, 2, 4}, - dropped_replica{nodes[1], dropped_replica::INVALID_TIMESTAMP, 4, 3, 4}, + dropped_replica{nodes[0].first, 12344, -1, -1, -1}, + dropped_replica{nodes[2].first, dropped_replica::INVALID_TIMESTAMP, 4, 2, 4}, + dropped_replica{nodes[1].first, dropped_replica::INVALID_TIMESTAMP, 4, 3, 4}, }; pc.last_committed_decree = 2; - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_ASSIGN_PRIMARY); - EXPECT_EQ(update_req->node, nodes[1]); - EXPECT_EQ(target, nodes[1]); + EXPECT_EQ(update_req->hp_node, nodes[1].first); + EXPECT_EQ(target, nodes[1].first); proposal_sent = true; svc->set_filter(default_filter); @@ -715,19 +747,19 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); PROPOSAL_FLAG_CHECK; - CONDITION_CHECK([&] { return pc.primary == nodes[1]; }); + CONDITION_CHECK([&] { return pc.hp_primary == nodes[1].first; }); std::this_thread::sleep_for(std::chrono::milliseconds(200)); std::cerr << "Case: recover from DDD state, only one primary" << std::endl; - svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr { + svc->set_filter([&](const dsn::host_port &target, dsn::message_ex *req) -> cur_ptr { dsn::message_ex *recv_request = create_corresponding_receive(req); cur_ptr update_req = std::make_shared(); ::dsn::unmarshall(recv_request, *update_req); destroy_message(recv_request); EXPECT_EQ(update_req->type, config_type::CT_ASSIGN_PRIMARY); - EXPECT_EQ(update_req->node, nodes[0]); - EXPECT_EQ(target, nodes[0]); + EXPECT_EQ(update_req->hp_node, nodes[0].first); + EXPECT_EQ(target, nodes[0].first); proposal_sent = true; svc->set_filter(default_filter); @@ -736,12 +768,14 @@ void meta_partition_guardian_test::cure_test() }); pc.primary.set_invalid(); - pc.secondaries.clear(); - pc.last_drops = {nodes[0]}; + pc.hp_primary.reset(); + pc.hp_secondaries.clear(); + pc.last_drops = {nodes[0].second}; + pc.__set_hp_last_drops({nodes[0].first}); state->_nodes.clear(); pc.ballot = 1; state->initialize_node_state(); - svc->set_node_state({nodes[0], nodes[1], nodes[2]}, true); + svc->set_node_state({nodes[0].first, nodes[1].first, nodes[2].first}, true); t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL, nullptr, @@ -749,7 +783,7 @@ void meta_partition_guardian_test::cure_test() server_state::sStateHash); t->wait(); PROPOSAL_FLAG_CHECK; - CONDITION_CHECK([&] { return pc.primary == nodes[0]; }); + CONDITION_CHECK([&] { return pc.hp_primary == nodes[0].first; }); } static void check_nodes_loads(node_mapper &nodes) @@ -770,8 +804,12 @@ static void check_nodes_loads(node_mapper &nodes) void meta_partition_guardian_test::cure() { - std::vector node_list; - generate_node_list(node_list, 20, 100); + std::vector> nodes_pairs; + std::vector nodes_list; + generate_node_list(nodes_pairs, 20, 100); + for (const auto &p : nodes_pairs) { + nodes_list.emplace_back(p.first); + } app_mapper app; node_mapper nodes; @@ -789,8 +827,8 @@ void meta_partition_guardian_test::cure() std::shared_ptr the_app = app_state::create(info); app.emplace(the_app->app_id, the_app); - for (const auto &address : node_list) { - get_node_state(nodes, address, true)->set_alive(true); + for (const auto &hp : nodes_list) { + get_node_state(nodes, hp, true)->set_alive(true); } bool all_partitions_healthy = false; @@ -811,6 +849,7 @@ void meta_partition_guardian_test::cure() fake_request.config = the_app->partitions[i]; fake_request.type = action.type; fake_request.node = action.node; + fake_request.__set_hp_node(action.hp_node); fake_request.host_node = action.node; guardian.reconfig({&app, &nodes}, fake_request); @@ -822,8 +861,8 @@ void meta_partition_guardian_test::cure() void meta_partition_guardian_test::from_proposal_test() { - std::vector node_list; - generate_node_list(node_list, 3, 3); + std::vector> nodes_list; + generate_node_list(nodes_list, 3, 3); app_mapper app; node_mapper nodes; @@ -842,8 +881,8 @@ void meta_partition_guardian_test::from_proposal_test() std::shared_ptr the_app = app_state::create(info); app.emplace(the_app->app_id, the_app); - for (const dsn::rpc_address &addr : node_list) { - get_node_state(nodes, addr, true)->set_alive(true); + for (const auto &p : nodes_list) { + get_node_state(nodes, p.first, true)->set_alive(true); } meta_view mv{&app, &nodes}; @@ -859,69 +898,109 @@ void meta_partition_guardian_test::from_proposal_test() ASSERT_EQ(config_type::CT_INVALID, cpa.type); std::cerr << "Case 2: test invalid proposal: invalid target" << std::endl; - cpa2 = - new_proposal_action(dsn::rpc_address(), node_list[0], config_type::CT_UPGRADE_TO_PRIMARY); + cpa2 = new_proposal_action(dsn::rpc_address(), + nodes_list[0].second, + dsn::host_port(), + nodes_list[0].first, + config_type::CT_UPGRADE_TO_PRIMARY); cc.lb_actions.assign_balancer_proposals({cpa2}); ASSERT_FALSE(guardian.from_proposals(mv, p, cpa)); ASSERT_EQ(config_type::CT_INVALID, cpa.type); std::cerr << "Case 3: test invalid proposal: invalid node" << std::endl; - cpa2 = - new_proposal_action(node_list[0], dsn::rpc_address(), config_type::CT_UPGRADE_TO_PRIMARY); + cpa2 = new_proposal_action(nodes_list[0].second, + dsn::rpc_address(), + nodes_list[0].first, + dsn::host_port(), + config_type::CT_UPGRADE_TO_PRIMARY); cc.lb_actions.assign_balancer_proposals({cpa2}); ASSERT_FALSE(guardian.from_proposals(mv, p, cpa)); ASSERT_EQ(config_type::CT_INVALID, cpa.type); std::cerr << "Case 4: test invalid proposal: dead target" << std::endl; - cpa2 = new_proposal_action(node_list[0], node_list[0], config_type::CT_UPGRADE_TO_PRIMARY); + cpa2 = new_proposal_action(nodes_list[0].second, + nodes_list[0].second, + nodes_list[0].first, + nodes_list[0].first, + config_type::CT_UPGRADE_TO_PRIMARY); cc.lb_actions.assign_balancer_proposals({cpa2}); - get_node_state(nodes, node_list[0], false)->set_alive(false); + get_node_state(nodes, nodes_list[0].first, false)->set_alive(false); ASSERT_FALSE(guardian.from_proposals(mv, p, cpa)); ASSERT_EQ(config_type::CT_INVALID, cpa.type); - get_node_state(nodes, node_list[0], false)->set_alive(true); + get_node_state(nodes, nodes_list[0].first, false)->set_alive(true); std::cerr << "Case 5: test invalid proposal: dead node" << std::endl; - cpa2 = new_proposal_action(node_list[0], node_list[1], config_type::CT_ADD_SECONDARY); + cpa2 = new_proposal_action(nodes_list[0].second, + nodes_list[1].second, + nodes_list[0].first, + nodes_list[1].first, + config_type::CT_ADD_SECONDARY); cc.lb_actions.assign_balancer_proposals({cpa2}); - get_node_state(nodes, node_list[1], false)->set_alive(false); + get_node_state(nodes, nodes_list[1].first, false)->set_alive(false); ASSERT_FALSE(guardian.from_proposals(mv, p, cpa)); ASSERT_EQ(config_type::CT_INVALID, cpa.type); - get_node_state(nodes, node_list[1], false)->set_alive(true); + get_node_state(nodes, nodes_list[1].first, false)->set_alive(true); std::cerr << "Case 6: test invalid proposal: already have priamry but assign" << std::endl; - cpa2 = new_proposal_action(node_list[0], node_list[0], config_type::CT_ASSIGN_PRIMARY); + cpa2 = new_proposal_action(nodes_list[0].second, + nodes_list[0].second, + nodes_list[0].first, + nodes_list[0].first, + config_type::CT_ASSIGN_PRIMARY); cc.lb_actions.assign_balancer_proposals({cpa2}); - pc.primary = node_list[1]; + pc.primary = nodes_list[1].second; + pc.__set_hp_primary(nodes_list[1].first); ASSERT_FALSE(guardian.from_proposals(mv, p, cpa)); ASSERT_EQ(config_type::CT_INVALID, cpa.type); std::cerr << "Case 7: test invalid proposal: upgrade non-secondary" << std::endl; - cpa2 = new_proposal_action(node_list[0], node_list[0], config_type::CT_UPGRADE_TO_PRIMARY); + cpa2 = new_proposal_action(nodes_list[0].second, + nodes_list[0].second, + nodes_list[0].first, + nodes_list[0].first, + config_type::CT_UPGRADE_TO_PRIMARY); cc.lb_actions.assign_balancer_proposals({cpa2}); pc.primary.set_invalid(); + pc.hp_primary.reset(); ASSERT_FALSE(guardian.from_proposals(mv, p, cpa)); ASSERT_EQ(config_type::CT_INVALID, cpa.type); std::cerr << "Case 8: test invalid proposal: add exist secondary" << std::endl; - cpa2 = new_proposal_action(node_list[0], node_list[1], config_type::CT_ADD_SECONDARY); + cpa2 = new_proposal_action(nodes_list[0].second, + nodes_list[1].second, + nodes_list[0].first, + nodes_list[1].first, + config_type::CT_ADD_SECONDARY); cc.lb_actions.assign_balancer_proposals({cpa2}); - pc.primary = node_list[0]; - pc.secondaries = {node_list[1]}; + pc.primary = nodes_list[1].second; + pc.__set_hp_primary(nodes_list[1].first); + pc.secondaries = {nodes_list[1].second}; + pc.__set_hp_secondaries({nodes_list[1].first}); ASSERT_FALSE(guardian.from_proposals(mv, p, cpa)); ASSERT_EQ(config_type::CT_INVALID, cpa.type); std::cerr << "Case 9: test invalid proposal: downgrade non member" << std::endl; - cpa2 = new_proposal_action(node_list[0], node_list[1], config_type::CT_REMOVE); + cpa2 = new_proposal_action(nodes_list[0].second, + nodes_list[1].second, + nodes_list[0].first, + nodes_list[1].first, + config_type::CT_REMOVE); cc.lb_actions.assign_balancer_proposals({cpa2}); - pc.primary = node_list[0]; - pc.secondaries.clear(); + pc.primary = nodes_list[0].second; + pc.__set_hp_primary(nodes_list[0].first); + pc.hp_secondaries.clear(); ASSERT_FALSE(guardian.from_proposals(mv, p, cpa)); ASSERT_EQ(config_type::CT_INVALID, cpa.type); std::cerr << "Case 10: test abnormal learning detect" << std::endl; - cpa2 = new_proposal_action(node_list[0], node_list[1], config_type::CT_ADD_SECONDARY); - pc.primary = node_list[0]; - pc.secondaries.clear(); + cpa2 = new_proposal_action(nodes_list[0].second, + nodes_list[1].second, + nodes_list[0].first, + nodes_list[1].first, + config_type::CT_ADD_SECONDARY); + pc.primary = nodes_list[0].second; + pc.__set_hp_primary(nodes_list[0].first); + pc.hp_secondaries.clear(); cc.lb_actions.assign_balancer_proposals({cpa2}); replica_info i; @@ -932,12 +1011,12 @@ void meta_partition_guardian_test::from_proposal_test() i.last_committed_decree = 10; i.last_prepared_decree = 10; - collect_replica(mv, node_list[1], i); + collect_replica(mv, nodes_list[1].first, i); ASSERT_TRUE(guardian.from_proposals(mv, p, cpa)); ASSERT_EQ(config_type::CT_ADD_SECONDARY, cpa.type); i.status = partition_status::PS_ERROR; - collect_replica(mv, node_list[1], i); + collect_replica(mv, nodes_list[1].first, i); ASSERT_FALSE(guardian.from_proposals(mv, p, cpa)); ASSERT_EQ(config_type::CT_INVALID, cpa.type); } diff --git a/src/meta/test/meta_service_test.cpp b/src/meta/test/meta_service_test.cpp index 44fc1ea628..df485c1142 100644 --- a/src/meta/test/meta_service_test.cpp +++ b/src/meta/test/meta_service_test.cpp @@ -29,6 +29,7 @@ #include "runtime/rpc/network.sim.h" #include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_holder.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "utils/autoref_ptr.h" @@ -44,17 +45,17 @@ class meta_service_test : public meta_test_base void check_status_failure() { fail::setup(); - fail::cfg("meta_server_failure_detector_get_leader", "return(false#1.2.3.4:10086)"); + fail::cfg("meta_server_failure_detector_get_leader", "return(false#localhost:10086)"); /** can't forward to others */ RPC_MOCKING(app_env_rpc) { - rpc_address leader; + host_port leader; auto rpc = create_fake_rpc(); rpc.dsn_request()->header->context.u.is_forward_supported = false; ASSERT_FALSE(_ms->check_status_and_authz(rpc, &leader)); ASSERT_EQ(ERR_FORWARD_TO_OTHERS, rpc.response().err); - ASSERT_EQ(leader.to_std_string(), "1.2.3.4:10086"); + ASSERT_EQ(leader.to_string(), "localhost:10086"); ASSERT_EQ(app_env_rpc::forward_mail_box().size(), 0); } @@ -65,7 +66,7 @@ class meta_service_test : public meta_test_base ASSERT_FALSE(_ms->check_status_and_authz(rpc)); ASSERT_EQ(app_env_rpc::forward_mail_box().size(), 1); ASSERT_EQ(app_env_rpc::forward_mail_box()[0].remote_address().to_std_string(), - "1.2.3.4:10086"); + "127.0.0.1:10086"); } fail::teardown(); @@ -78,7 +79,7 @@ class meta_service_test : public meta_test_base RPC_MOCKING(app_env_rpc) { - rpc_address leader; + host_port leader; auto rpc = create_fake_rpc(); ASSERT_TRUE(_ms->check_status_and_authz(rpc, &leader)); ASSERT_EQ(app_env_rpc::forward_mail_box().size(), 0); diff --git a/src/meta/test/meta_split_service_test.cpp b/src/meta/test/meta_split_service_test.cpp index 12cb81e203..13c7e4d152 100644 --- a/src/meta/test/meta_split_service_test.cpp +++ b/src/meta/test/meta_split_service_test.cpp @@ -60,6 +60,7 @@ #include "metadata_types.h" #include "partition_split_types.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/blob.h" #include "utils/error_code.h" #include "utils/fmt_logging.h" @@ -148,7 +149,7 @@ class meta_split_service_test : public meta_test_base request->app.app_id = app->app_id; request->parent_config = parent_config; request->child_config = child_config; - request->primary_address = NODE; + request->hp_primary = NODE; register_child_rpc rpc(std::move(request), RPC_CM_REGISTER_CHILD_REPLICA); split_svc().register_child_on_meta(rpc); @@ -306,7 +307,8 @@ class meta_split_service_test : public meta_test_base create_splitting_app_on_remote_stroage(state->_apps_root); state->initialize_data_structure(); - _ms->_failure_detector.reset(new meta_server_failure_detector(_ms.get())); + _ms->_failure_detector.reset( + new meta_server_failure_detector(_ms->_dns_resolver, _ms.get())); _ss = _ms->_state; } @@ -377,7 +379,8 @@ class meta_split_service_test : public meta_test_base const int32_t PARENT_BALLOT = 3; const int32_t PARENT_INDEX = 0; const int32_t CHILD_INDEX = 4; - const rpc_address NODE = rpc_address("127.0.0.1", 10086); + const host_port NODE = host_port("localhost", 10086); + const rpc_address NODE_ADDR = rpc_address("127.0.0.1", 10086); std::shared_ptr app; }; @@ -505,7 +508,8 @@ TEST_F(meta_split_service_test, on_config_sync_test) info1.pid = pid1; info2.pid = pid2; configuration_query_by_node_request req; - req.node = NODE; + req.node = NODE_ADDR; + req.__set_hp_node(NODE); req.__isset.stored_replicas = true; req.stored_replicas.emplace_back(info1); req.stored_replicas.emplace_back(info2); diff --git a/src/meta/test/meta_test_base.cpp b/src/meta/test/meta_test_base.cpp index 720efcfee9..9ecaccc82a 100644 --- a/src/meta/test/meta_test_base.cpp +++ b/src/meta/test/meta_test_base.cpp @@ -38,7 +38,7 @@ #include "meta/server_state.h" #include "meta/test/misc/misc.h" #include "meta_service_test_app.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/task/task_tracker.h" #include "utils/error_code.h" @@ -60,7 +60,7 @@ meta_test_base::~meta_test_base() {} void meta_test_base::SetUp() { _ms = std::make_unique(); - _ms->_failure_detector.reset(new meta_server_failure_detector(_ms.get())); + _ms->_failure_detector.reset(new meta_server_failure_detector(_ms->_dns_resolver, _ms.get())); _ms->_balancer.reset(utils::factory_store::create( FLAGS_server_load_balancer_type, PROVIDER_TYPE_MAIN, _ms.get())); _ms->_partition_guardian.reset(utils::factory_store::create( @@ -121,9 +121,9 @@ void meta_test_base::set_node_live_percentage_threshold_for_update(uint64_t perc _ms->_node_live_percentage_threshold_for_update = percentage_threshold; } -std::vector meta_test_base::get_alive_nodes() const +std::vector meta_test_base::get_alive_nodes() const { - std::vector nodes; + std::vector nodes; zauto_read_lock l(_ss->_lock); @@ -136,13 +136,13 @@ std::vector meta_test_base::get_alive_nodes() const return nodes; } -std::vector meta_test_base::ensure_enough_alive_nodes(int min_node_count) +std::vector meta_test_base::ensure_enough_alive_nodes(int min_node_count) { if (min_node_count < 1) { - return std::vector(); + return std::vector(); } - std::vector nodes(get_alive_nodes()); + std::vector nodes(get_alive_nodes()); if (!nodes.empty()) { auto node_count = static_cast(nodes.size()); CHECK_GE_MSG(node_count, @@ -161,12 +161,15 @@ std::vector meta_test_base::ensure_enough_alive_nodes(int min_node_ return nodes; } - nodes = generate_node_list(min_node_count); + auto node_pairs = generate_node_list(min_node_count); + for (const auto &p : node_pairs) { + nodes.emplace_back(p.first); + } _ms->set_node_state(nodes, true); while (true) { { - std::vector alive_nodes(get_alive_nodes()); + std::vector alive_nodes(get_alive_nodes()); if (static_cast(alive_nodes.size()) >= min_node_count) { break; } @@ -242,7 +245,7 @@ meta_test_base::update_app_envs(const std::string &app_name, return rpc.response(); } -void meta_test_base::mock_node_state(const rpc_address &addr, const node_state &node) +void meta_test_base::mock_node_state(const host_port &addr, const node_state &node) { _ss->_nodes[addr] = node; } diff --git a/src/meta/test/meta_test_base.h b/src/meta/test/meta_test_base.h index 9422e215c0..8285709ffb 100644 --- a/src/meta/test/meta_test_base.h +++ b/src/meta/test/meta_test_base.h @@ -25,7 +25,7 @@ #include "meta/meta_service.h" // IWYU pragma: keep namespace dsn { -class rpc_address; +class host_port; namespace replication { @@ -55,7 +55,7 @@ class meta_test_base : public testing::Test void set_node_live_percentage_threshold_for_update(uint64_t percentage_threshold); - std::vector ensure_enough_alive_nodes(int min_node_count); + std::vector ensure_enough_alive_nodes(int min_node_count); // create an app for test with specified name and specified partition count void create_app(const std::string &name, uint32_t partition_count); @@ -69,7 +69,7 @@ class meta_test_base : public testing::Test const std::vector &env_keys, const std::vector &env_vals); - void mock_node_state(const rpc_address &addr, const node_state &node); + void mock_node_state(const host_port &addr, const node_state &node); std::shared_ptr find_app(const std::string &name); @@ -84,7 +84,7 @@ class meta_test_base : public testing::Test std::string _app_root; private: - std::vector get_alive_nodes() const; + std::vector get_alive_nodes() const; }; } // namespace replication diff --git a/src/meta/test/misc/misc.cpp b/src/meta/test/misc/misc.cpp index 500c7aaf19..95f3ce6cb7 100644 --- a/src/meta/test/misc/misc.cpp +++ b/src/meta/test/misc/misc.cpp @@ -47,6 +47,9 @@ #include "duplication_types.h" #include "meta_admin_types.h" #include "metadata_types.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/fmt_logging.h" #include "utils/rand.h" @@ -58,12 +61,16 @@ uint32_t random32(uint32_t min, uint32_t max) return res + min; } -void generate_node_list(std::vector &output_list, int min_count, int max_count) +void generate_node_list(std::vector> &output_list, + int min_count, + int max_count) { int count = random32(min_count, max_count); output_list.resize(count); - for (int i = 0; i < count; ++i) - output_list[i].assign_ipv4("127.0.0.1", i + 1); + for (int i = 0; i < count; ++i) { + output_list[i] = std::make_pair(dsn::host_port("localhost", i + 1), + dsn::rpc_address("127.0.0.1", i + 1)); + } } void verbose_apps(const app_mapper &input_apps) @@ -73,10 +80,10 @@ void verbose_apps(const app_mapper &input_apps) const std::shared_ptr &app = apps.second; std::cout << apps.first << " " << app->partition_count << std::endl; for (int i = 0; i < app->partition_count; ++i) { - std::cout << app->partitions[i].secondaries.size() + 1 << " " - << app->partitions[i].primary; - for (int j = 0; j < app->partitions[i].secondaries.size(); ++j) { - std::cout << " " << app->partitions[i].secondaries[j]; + std::cout << app->partitions[i].hp_secondaries.size() + 1 << " " + << app->partitions[i].hp_primary; + for (int j = 0; j < app->partitions[i].hp_secondaries.size(); ++j) { + std::cout << " " << app->partitions[i].hp_secondaries[j]; } std::cout << std::endl; } @@ -86,22 +93,22 @@ void verbose_apps(const app_mapper &input_apps) void generate_node_mapper( /*out*/ node_mapper &output_nodes, const app_mapper &input_apps, - const std::vector &input_node_list) + const std::vector &input_node_list) { output_nodes.clear(); - for (auto &addr : input_node_list) { - get_node_state(output_nodes, addr, true)->set_alive(true); + for (auto &hp : input_node_list) { + get_node_state(output_nodes, hp, true)->set_alive(true); } for (auto &kv : input_apps) { const std::shared_ptr &app = kv.second; for (const dsn::partition_configuration &pc : app->partitions) { node_state *ns; - if (!pc.primary.is_invalid()) { - ns = get_node_state(output_nodes, pc.primary, true); + if (!pc.hp_primary.is_invalid()) { + ns = get_node_state(output_nodes, pc.hp_primary, true); ns->put_partition(pc.pid, true); } - for (const dsn::rpc_address &sec : pc.secondaries) { + for (const dsn::host_port &sec : pc.hp_secondaries) { CHECK(!sec.is_invalid(), ""); ns = get_node_state(output_nodes, sec, true); ns->put_partition(pc.pid, false); @@ -111,8 +118,9 @@ void generate_node_mapper( } void generate_app(/*out*/ std::shared_ptr &app, - const std::vector &node_list) + const std::vector &node_list) { + auto resolver = std::make_shared(); for (dsn::partition_configuration &pc : app->partitions) { pc.ballot = random32(1, 10000); std::vector indices(3, 0); @@ -121,16 +129,20 @@ void generate_app(/*out*/ std::shared_ptr &app, indices[2] = random32(indices[1] + 1, node_list.size() - 1); int p = random32(0, 2); - pc.primary = node_list[indices[p]]; - pc.secondaries.clear(); - for (unsigned int i = 0; i != indices.size(); ++i) - if (i != p) - pc.secondaries.push_back(node_list[indices[i]]); + pc.__set_hp_primary(node_list[indices[p]]); + pc.__set_hp_secondaries({}); + pc.primary = resolver->resolve_address(node_list[indices[p]]); + for (unsigned int i = 0; i != indices.size(); ++i) { + if (i != p) { + pc.secondaries.push_back(resolver->resolve_address(node_list[indices[i]])); + pc.hp_secondaries.push_back(node_list[indices[i]]); + } + } - CHECK(!pc.primary.is_invalid(), ""); - CHECK(!is_secondary(pc, pc.primary), ""); - CHECK_EQ(pc.secondaries.size(), 2); - CHECK_NE(pc.secondaries[0], pc.secondaries[1]); + CHECK(!pc.hp_primary.is_invalid(), ""); + CHECK(!is_secondary(pc, pc.hp_primary), ""); + CHECK_EQ(pc.hp_secondaries.size(), 2); + CHECK_NE(pc.hp_secondaries[0], pc.hp_secondaries[1]); } } @@ -145,18 +157,18 @@ void generate_app_serving_replica_info(/*out*/ std::shared_ptr &node_list, + const std::vector &node_list, int apps_count, int disks_per_node, std::pair partitions_range, @@ -202,10 +214,10 @@ void generate_node_fs_manager(const app_mapper &apps, for (const auto &kv : nodes) { const node_state &ns = kv.second; - if (nfm.find(ns.addr()) == nfm.end()) { - nfm.emplace(ns.addr(), std::make_shared()); + if (nfm.find(ns.host_port()) == nfm.end()) { + nfm.emplace(ns.host_port(), std::make_shared()); } - fs_manager &manager = *(nfm.find(ns.addr())->second); + fs_manager &manager = *(nfm.find(ns.host_port())->second); manager.initialize(data_dirs, tags); ns.for_each_partition([&](const dsn::gpid &pid) { const config_context &cc = *get_config_context(apps, pid); @@ -213,10 +225,10 @@ void generate_node_fs_manager(const app_mapper &apps, 256, "%s%s/%d.%d.test", prefix, - cc.find_from_serving(ns.addr())->disk_tag.c_str(), + cc.find_from_serving(ns.host_port())->disk_tag.c_str(), pid.get_app_id(), pid.get_partition_index()); - LOG_DEBUG("concat pid_dir({}) of node({})", pid_dir, ns.addr()); + LOG_DEBUG("concat pid_dir({}) of node({})", pid_dir, ns.host_port()); manager.add_replica(pid, pid_dir); return true; }); @@ -232,9 +244,13 @@ void track_disk_info_check_and_apply(const dsn::replication::configuration_propo config_context *cc = get_config_context(apps, pid); CHECK_NOTNULL(cc, ""); - fs_manager *target_manager = get_fs_manager(manager, act.target); + dsn::host_port hp_target, hp_node; + GET_HOST_PORT(act, target, hp_target); + GET_HOST_PORT(act, node, hp_node); + + fs_manager *target_manager = get_fs_manager(manager, hp_target); CHECK_NOTNULL(target_manager, ""); - fs_manager *node_manager = get_fs_manager(manager, act.node); + fs_manager *node_manager = get_fs_manager(manager, hp_node); CHECK_NOTNULL(node_manager, ""); std::string dir; @@ -244,7 +260,7 @@ void track_disk_info_check_and_apply(const dsn::replication::configuration_propo auto selected = target_manager->find_best_dir_for_new_replica(pid); CHECK_NOTNULL(selected, ""); selected->holding_replicas[pid.get_app_id()].emplace(pid); - cc->collect_serving_replica(act.target, ri); + cc->collect_serving_replica(hp_target, ri); break; } case config_type::CT_ADD_SECONDARY: @@ -252,7 +268,7 @@ void track_disk_info_check_and_apply(const dsn::replication::configuration_propo auto selected = node_manager->find_best_dir_for_new_replica(pid); CHECK_NOTNULL(selected, ""); selected->holding_replicas[pid.get_app_id()].emplace(pid); - cc->collect_serving_replica(act.node, ri); + cc->collect_serving_replica(hp_node, ri); break; } case config_type::CT_DOWNGRADE_TO_SECONDARY: @@ -262,7 +278,7 @@ void track_disk_info_check_and_apply(const dsn::replication::configuration_propo case config_type::CT_REMOVE: case config_type::CT_DOWNGRADE_TO_INACTIVE: node_manager->remove_replica(pid); - cc->remove_from_serving(act.node); + cc->remove_from_serving(hp_node); break; default: @@ -289,24 +305,33 @@ void proposal_action_check_and_apply(const configuration_proposal_action &act, track_disk_info_check_and_apply(act, pid, apps, nodes, *manager); } + dsn::host_port hp_target, hp_node; + GET_HOST_PORT(act, target, hp_target); + GET_HOST_PORT(act, node, hp_node); + switch (act.type) { case config_type::CT_ASSIGN_PRIMARY: CHECK_EQ(act.node, act.target); + CHECK(pc.hp_primary.is_invalid(), ""); CHECK(pc.primary.is_invalid(), ""); + CHECK(pc.hp_secondaries.empty(), ""); CHECK(pc.secondaries.empty(), ""); pc.primary = act.node; - ns = &nodes[act.node]; + pc.__set_hp_primary(hp_node); + ns = &nodes[hp_node]; CHECK_EQ(ns->served_as(pc.pid), partition_status::PS_INACTIVE); ns->put_partition(pc.pid, true); break; case config_type::CT_ADD_SECONDARY: + CHECK_EQ(hp_target, pc.hp_primary); CHECK_EQ(act.target, pc.primary); - CHECK(!is_member(pc, act.node), ""); + CHECK(!is_member(pc, hp_node), ""); + pc.hp_secondaries.push_back(hp_node); pc.secondaries.push_back(act.node); - ns = &nodes[act.node]; + ns = &nodes[hp_node]; CHECK_EQ(ns->served_as(pc.pid), partition_status::PS_INACTIVE); ns->put_partition(pc.pid, false); @@ -314,33 +339,47 @@ void proposal_action_check_and_apply(const configuration_proposal_action &act, case config_type::CT_DOWNGRADE_TO_SECONDARY: CHECK_EQ(act.node, act.target); + CHECK_EQ(hp_node, hp_target); CHECK_EQ(act.node, pc.primary); - CHECK(nodes.find(act.node) != nodes.end(), ""); - CHECK(!is_secondary(pc, pc.primary), ""); - nodes[act.node].remove_partition(pc.pid, true); + CHECK_EQ(hp_node, pc.hp_primary); + CHECK(nodes.find(hp_node) != nodes.end(), ""); + CHECK(!is_secondary(pc, pc.hp_primary), ""); + nodes[hp_node].remove_partition(pc.pid, true); pc.secondaries.push_back(pc.primary); + pc.hp_secondaries.push_back(pc.hp_primary); pc.primary.set_invalid(); + pc.__set_hp_primary(dsn::host_port()); break; case config_type::CT_UPGRADE_TO_PRIMARY: + CHECK(pc.hp_primary.is_invalid(), ""); CHECK(pc.primary.is_invalid(), ""); + CHECK_EQ(hp_node, hp_target); CHECK_EQ(act.node, act.target); - CHECK(is_secondary(pc, act.node), ""); - CHECK(nodes.find(act.node) != nodes.end(), ""); + CHECK(is_secondary(pc, hp_node), ""); + CHECK(nodes.find(hp_node) != nodes.end(), ""); - ns = &nodes[act.node]; + ns = &nodes[hp_node]; + pc.hp_primary = hp_node; pc.primary = act.node; + CHECK(replica_helper::remove_node(hp_node, pc.hp_secondaries), ""); CHECK(replica_helper::remove_node(act.node, pc.secondaries), ""); ns->put_partition(pc.pid, true); break; case config_type::CT_ADD_SECONDARY_FOR_LB: + CHECK_EQ(hp_target, pc.hp_primary); CHECK_EQ(act.target, pc.primary); - CHECK(!is_member(pc, act.node), ""); + CHECK(!is_member(pc, hp_node), ""); + CHECK(!act.hp_node.is_invalid(), ""); CHECK(!act.node.is_invalid(), ""); + if (!pc.__isset.hp_secondaries) { + pc.__set_hp_secondaries({}); + } + pc.hp_secondaries.push_back(hp_node); pc.secondaries.push_back(act.node); - ns = &nodes[act.node]; + ns = &nodes[hp_node]; ns->put_partition(pc.pid, false); CHECK_EQ(ns->served_as(pc.pid), partition_status::PS_SECONDARY); break; @@ -348,13 +387,16 @@ void proposal_action_check_and_apply(const configuration_proposal_action &act, // in balancer, remove primary is not allowed case config_type::CT_REMOVE: case config_type::CT_DOWNGRADE_TO_INACTIVE: + CHECK(!pc.hp_primary.is_invalid(), ""); CHECK(!pc.primary.is_invalid(), ""); + CHECK_EQ(pc.hp_primary, hp_target); CHECK_EQ(pc.primary, act.target); - CHECK(is_secondary(pc, act.node), ""); - CHECK(nodes.find(act.node) != nodes.end(), ""); + CHECK(is_secondary(pc, hp_node), ""); + CHECK(nodes.find(hp_node) != nodes.end(), ""); + CHECK(replica_helper::remove_node(hp_node, pc.hp_secondaries), ""); CHECK(replica_helper::remove_node(act.node, pc.secondaries), ""); - ns = &nodes[act.node]; + ns = &nodes[hp_node]; CHECK_EQ(ns->served_as(pc.pid), partition_status::PS_SECONDARY); ns->remove_partition(pc.pid, false); break; @@ -381,19 +423,21 @@ void migration_check_and_apply(app_mapper &apps, dsn::partition_configuration &pc = the_app->partitions[proposal->gpid.get_partition_index()]; - CHECK(!pc.primary.is_invalid(), ""); - CHECK_EQ(pc.secondaries.size(), 2); - for (auto &addr : pc.secondaries) { - CHECK(!addr.is_invalid(), ""); + CHECK(!pc.hp_primary.is_invalid(), ""); + CHECK_EQ(pc.hp_secondaries.size(), 2); + for (auto &host_port : pc.hp_secondaries) { + CHECK(!host_port.is_invalid(), ""); } - CHECK(!is_secondary(pc, pc.primary), ""); + CHECK(!is_secondary(pc, pc.hp_primary), ""); for (unsigned int j = 0; j < proposal->action_list.size(); ++j) { configuration_proposal_action &act = proposal->action_list[j]; - LOG_DEBUG("the {}th round of action, type: {}, node: {}, target: {}", + LOG_DEBUG("the {}th round of action, type: {}, node: {}({}), target: {}({})", j, dsn::enum_to_string(act.type), + act.hp_node, act.node, + act.hp_target, act.target); proposal_action_check_and_apply(act, proposal->gpid, apps, nodes, manager); } diff --git a/src/meta/test/misc/misc.h b/src/meta/test/misc/misc.h index 8fef86fe6f..e4eb59be0b 100644 --- a/src/meta/test/misc/misc.h +++ b/src/meta/test/misc/misc.h @@ -37,19 +37,21 @@ #include "meta/meta_data.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" namespace dsn { class gpid; + namespace replication { class configuration_proposal_action; class fs_manager; } // namespace replication } // namespace dsn -typedef std::map> nodes_fs_manager; +typedef std::map> nodes_fs_manager; inline dsn::replication::fs_manager *get_fs_manager(nodes_fs_manager &nfm, - const dsn::rpc_address &node) + const dsn::host_port &node) { auto iter = nfm.find(node); if (nfm.end() == iter) @@ -62,17 +64,21 @@ uint32_t random32(uint32_t min, uint32_t max); // Generates a random number [min_count, max_count] of node addresses // each node is given a random port value in range of [min_count, max_count] -void generate_node_list(/*out*/ std::vector &output_list, - int min_count, - int max_count); +void generate_node_list( + /*out*/ std::vector> &output_list, + int min_count, + int max_count); // Generates `size` of node addresses, each with port value in range [start_port, start_port + size] -inline std::vector generate_node_list(size_t size, int start_port = 12321) +inline std::vector> +generate_node_list(size_t size, int start_port = 12321) { - std::vector result; + std::vector> result; result.resize(size); - for (int i = 0; i < size; ++i) - result[i].assign_ipv4("127.0.0.1", static_cast(start_port + i + 1)); + for (int i = 0; i < size; ++i) { + result[i].first = dsn::host_port("localhost", static_cast(start_port + i + 1)); + result[i].second = dsn::rpc_address("127.0.0.1", static_cast(start_port + i + 1)); + } return result; } @@ -81,12 +87,12 @@ inline std::vector generate_node_list(size_t size, int start_p // REQUIRES: node_list.size() >= 3 void generate_app( /*out*/ std::shared_ptr &app, - const std::vector &node_list); + const std::vector &node_list); void generate_node_mapper( /*out*/ dsn::replication::node_mapper &output_nodes, const dsn::replication::app_mapper &input_apps, - const std::vector &input_node_list); + const std::vector &input_node_list); void generate_app_serving_replica_info(/*out*/ std::shared_ptr &app, int total_disks); @@ -97,7 +103,7 @@ void generate_node_fs_manager(const dsn::replication::app_mapper &apps, int total_disks); void generate_apps(/*out*/ dsn::replication::app_mapper &apps, - const std::vector &node_list, + const std::vector &node_list, int apps_count, int disks_per_node, std::pair partitions_range, diff --git a/src/meta/test/state_sync_test.cpp b/src/meta/test/state_sync_test.cpp index ccd83ee744..3b595c5cd8 100644 --- a/src/meta/test/state_sync_test.cpp +++ b/src/meta/test/state_sync_test.cpp @@ -47,6 +47,7 @@ #include "meta_admin_types.h" #include "meta_service_test_app.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task.h" #include "utils/autoref_ptr.h" #include "utils/error_code.h" @@ -61,13 +62,15 @@ class meta_options; DSN_DECLARE_string(cluster_root); DSN_DECLARE_string(meta_state_service_type); -static void random_assign_partition_config(std::shared_ptr &app, - const std::vector &server_list, - int max_replica_count) +static void random_assign_partition_config( + std::shared_ptr &app, + std::vector> &server_list, + int max_replica_count) { auto get_server = [&server_list](int indice) { - if (indice % 2 != 0) - return dsn::rpc_address(); + if (indice % 2 != 0) { + return std::make_pair(dsn::host_port(), dsn::rpc_address()); + } return server_list[indice / 2]; }; @@ -79,13 +82,21 @@ static void random_assign_partition_config(std::shared_ptr &app, indices.push_back(random32(start, max_servers)); start = indices.back() + 1; } - pc.primary = get_server(indices[0]); + auto server = get_server(indices[0]); + pc.primary = server.second; + pc.__set_hp_primary(server.first); + if (!pc.__isset.hp_secondaries) { + pc.__set_hp_secondaries({}); + } for (int i = 1; i < indices.size(); ++i) { - dsn::rpc_address addr = get_server(indices[i]); - if (!addr.is_invalid()) - pc.secondaries.push_back(addr); + auto s = get_server(indices[i]); + if (!s.first.is_invalid()) { + pc.secondaries.push_back(s.second); + pc.hp_secondaries.push_back(s.first); + } } - pc.last_drops = {server_list.back()}; + pc.__set_hp_last_drops({server_list.back().first}); + pc.last_drops = {server_list.back().second}; } } @@ -120,7 +131,7 @@ void meta_service_test_app::state_sync_test() { int apps_count = 15; int drop_ratio = 5; - std::vector server_list; + std::vector> server_list; std::vector drop_set; generate_node_list(server_list, 10, 10); @@ -187,7 +198,7 @@ void meta_service_test_app::state_sync_test() for (int j = 0; j < app->partition_count; ++j) { config_context &cc = app->helpers->contexts[j]; ASSERT_EQ(1, cc.dropped.size()); - ASSERT_NE(cc.dropped.end(), cc.find_from_dropped(server_list.back())); + ASSERT_NE(cc.dropped.end(), cc.find_from_dropped(server_list.back().first)); } } ec = ss2->dump_from_remote_storage("meta_state.dump1", false); @@ -381,10 +392,14 @@ void meta_service_test_app::construct_apps_test() std::shared_ptr svc(new meta_service()); - std::vector nodes; + std::vector> nodes; std::string hint_message; generate_node_list(nodes, 1, 1); - svc->_state->construct_apps({resp}, nodes, hint_message); + std::vector hps; + for (const auto &p : nodes) { + hps.emplace_back(p.first); + } + svc->_state->construct_apps({resp}, hps, hint_message); meta_view mv = svc->_state->get_meta_view(); const app_mapper &mapper = *(mv.apps); diff --git a/src/meta/test/update_configuration_test.cpp b/src/meta/test/update_configuration_test.cpp index 2c23d231c2..fde1e1e5d3 100644 --- a/src/meta/test/update_configuration_test.cpp +++ b/src/meta/test/update_configuration_test.cpp @@ -56,6 +56,7 @@ #include "metadata_types.h" #include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_holder.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" @@ -86,7 +87,7 @@ class fake_sender_meta_service : public dsn::replication::meta_service { destroy_message(response); } - virtual void send_message(const dsn::rpc_address &target, dsn::message_ex *request) override + virtual void send_message(const dsn::host_port &target, dsn::message_ex *request) override { // we expect this is a configuration_update_request proposal dsn::message_ex *recv_request = create_corresponding_receive(request); @@ -105,26 +106,34 @@ class fake_sender_meta_service : public dsn::replication::meta_service case config_type::CT_ASSIGN_PRIMARY: case config_type::CT_UPGRADE_TO_PRIMARY: pc.primary = update_req->node; + pc.__set_hp_primary(update_req->hp_node); replica_helper::remove_node(update_req->node, pc.secondaries); + replica_helper::remove_node(update_req->hp_node, pc.hp_secondaries); break; case config_type::CT_ADD_SECONDARY: case config_type::CT_ADD_SECONDARY_FOR_LB: pc.secondaries.push_back(update_req->node); + pc.hp_secondaries.push_back(update_req->hp_node); update_req->type = config_type::CT_UPGRADE_TO_SECONDARY; break; case config_type::CT_REMOVE: case config_type::CT_DOWNGRADE_TO_INACTIVE: - if (update_req->node == pc.primary) + if (update_req->hp_node == pc.hp_primary) { pc.primary.set_invalid(); - else + pc.hp_primary.reset(); + } else { replica_helper::remove_node(update_req->node, pc.secondaries); + replica_helper::remove_node(update_req->hp_node, pc.hp_secondaries); + } break; case config_type::CT_DOWNGRADE_TO_SECONDARY: pc.secondaries.push_back(pc.primary); pc.primary.set_invalid(); + pc.hp_secondaries.push_back(pc.hp_primary); + pc.hp_primary.reset(); break; default: break; @@ -137,7 +146,7 @@ class fake_sender_meta_service : public dsn::replication::meta_service class null_meta_service : public dsn::replication::meta_service { public: - void send_message(const dsn::rpc_address &target, dsn::message_ex *request) + void send_message(const dsn::host_port &target, dsn::message_ex *request) { LOG_INFO("send request to {}", target); request->add_ref(); @@ -154,7 +163,7 @@ class dummy_partition_guardian : public partition_guardian { action.type = config_type::CT_INVALID; const dsn::partition_configuration &pc = *get_config(*view.apps, gpid); - if (!pc.primary.is_invalid() && pc.secondaries.size() == 2) + if (!pc.hp_primary.is_invalid() && pc.hp_secondaries.size() == 2) return pc_status::healthy; return pc_status::ill; } @@ -217,7 +226,8 @@ void meta_service_test_app::update_configuration_test() { dsn::error_code ec; std::shared_ptr svc(new fake_sender_meta_service(this)); - svc->_failure_detector.reset(new dsn::replication::meta_server_failure_detector(svc.get())); + svc->_failure_detector.reset( + new dsn::replication::meta_server_failure_detector(svc->_dns_resolver, svc.get())); ec = svc->remote_storage_initialize(); ASSERT_EQ(ec, dsn::ERR_OK); svc->_partition_guardian.reset(new partition_guardian(svc.get())); @@ -238,52 +248,54 @@ void meta_service_test_app::update_configuration_test() ss->_all_apps.emplace(1, app); - std::vector nodes; + std::vector> nodes; generate_node_list(nodes, 4, 4); dsn::partition_configuration &pc0 = app->partitions[0]; - pc0.primary = nodes[0]; - pc0.secondaries.push_back(nodes[1]); - pc0.secondaries.push_back(nodes[2]); + pc0.primary = nodes[0].second; + pc0.__set_hp_primary(nodes[0].first); + pc0.secondaries = {nodes[1].second, nodes[2].second}; + pc0.__set_hp_secondaries({nodes[1].first, nodes[2].first}); pc0.ballot = 3; dsn::partition_configuration &pc1 = app->partitions[1]; - pc1.primary = nodes[1]; - pc1.secondaries.push_back(nodes[0]); - pc1.secondaries.push_back(nodes[2]); + pc1.primary = nodes[1].second; + pc1.__set_hp_primary(nodes[1].first); + pc1.secondaries = {nodes[0].second, nodes[2].second}; + pc1.__set_hp_secondaries({nodes[0].first, nodes[2].first}); pc1.ballot = 3; ss->sync_apps_to_remote_storage(); ASSERT_TRUE(ss->spin_wait_staging(30)); ss->initialize_node_state(); - svc->set_node_state({nodes[0], nodes[1], nodes[2]}, true); + svc->set_node_state({nodes[0].first, nodes[1].first, nodes[2].first}, true); svc->_started = true; // test remove primary state_validator validator1 = [pc0](const app_mapper &apps) { const dsn::partition_configuration *pc = get_config(apps, pc0.pid); - return pc->ballot == pc0.ballot + 2 && pc->secondaries.size() == 1 && - std::find(pc0.secondaries.begin(), pc0.secondaries.end(), pc->primary) != - pc0.secondaries.end(); + return pc->ballot == pc0.ballot + 2 && pc->hp_secondaries.size() == 1 && + std::find(pc0.hp_secondaries.begin(), pc0.hp_secondaries.end(), pc->hp_primary) != + pc0.hp_secondaries.end(); }; // test kickoff secondary - dsn::rpc_address addr = nodes[0]; - state_validator validator2 = [pc1, addr](const app_mapper &apps) { + auto hp = nodes[0].first; + state_validator validator2 = [pc1, hp](const app_mapper &apps) { const dsn::partition_configuration *pc = get_config(apps, pc1.pid); - return pc->ballot == pc1.ballot + 1 && pc->secondaries.size() == 1 && - pc->secondaries.front() != addr; + return pc->ballot == pc1.ballot + 1 && pc->hp_secondaries.size() == 1 && + pc->hp_secondaries.front() != hp; }; - svc->set_node_state({nodes[0]}, false); + svc->set_node_state({nodes[0].first}, false); ASSERT_TRUE(wait_state(ss, validator1, 30)); ASSERT_TRUE(wait_state(ss, validator2, 30)); // test add secondary - svc->set_node_state({nodes[3]}, true); + svc->set_node_state({nodes[3].first}, true); state_validator validator3 = [pc0](const app_mapper &apps) { const dsn::partition_configuration *pc = get_config(apps, pc0.pid); - return pc->ballot == pc0.ballot + 1 && pc->secondaries.size() == 2; + return pc->ballot == pc0.ballot + 1 && pc->hp_secondaries.size() == 2; }; // the default delay for add node is 5 miniutes ASSERT_FALSE(wait_state(ss, validator3, 10)); @@ -297,7 +309,8 @@ void meta_service_test_app::adjust_dropped_size() { dsn::error_code ec; std::shared_ptr svc(new null_meta_service()); - svc->_failure_detector.reset(new dsn::replication::meta_server_failure_detector(svc.get())); + svc->_failure_detector.reset( + new dsn::replication::meta_server_failure_detector(svc->_dns_resolver, svc.get())); ec = svc->remote_storage_initialize(); ASSERT_EQ(ec, dsn::ERR_OK); svc->_partition_guardian.reset(new partition_guardian(svc.get())); @@ -318,32 +331,41 @@ void meta_service_test_app::adjust_dropped_size() ss->_all_apps.emplace(1, app); - std::vector nodes; + std::vector> nodes; generate_node_list(nodes, 10, 10); // first, the replica is healthy, and there are 2 dropped dsn::partition_configuration &pc = app->partitions[0]; - pc.primary = nodes[0]; - pc.secondaries = {nodes[1], nodes[2]}; + pc.primary = nodes[0].second; + pc.__set_hp_primary(nodes[0].first); + pc.secondaries = {nodes[1].second, nodes[2].second}; + pc.__set_hp_secondaries({nodes[1].first, nodes[2].first}); pc.ballot = 10; config_context &cc = *get_config_context(ss->_all_apps, pc.pid); cc.dropped = { - dropped_replica{nodes[3], dropped_replica::INVALID_TIMESTAMP, 7, 11, 14}, - dropped_replica{nodes[4], 20, invalid_ballot, invalid_decree, invalid_decree}, + dropped_replica{nodes[3].first, dropped_replica::INVALID_TIMESTAMP, 7, 11, 14}, + dropped_replica{nodes[4].first, 20, invalid_ballot, invalid_decree, invalid_decree}, }; ss->sync_apps_to_remote_storage(); - generate_node_mapper(ss->_nodes, ss->_all_apps, nodes); + + std::vector hps; + for (const auto &p : nodes) { + hps.emplace_back(p.first); + } + generate_node_mapper(ss->_nodes, ss->_all_apps, hps); // then we receive a request for upgrade a node to secondary std::shared_ptr req = std::make_shared(); req->config = pc; req->config.ballot++; - req->config.secondaries.push_back(nodes[5]); + req->config.secondaries.push_back(nodes[5].second); + req->config.__set_hp_secondaries({nodes[5].first}); req->info = info; - req->node = nodes[5]; + req->node = nodes[5].second; + req->__set_hp_node(nodes[5].first); req->type = config_type::CT_UPGRADE_TO_SECONDARY; call_update_configuration(svc.get(), req); @@ -352,7 +374,8 @@ void meta_service_test_app::adjust_dropped_size() // then receive a config_sync request fro nodes[4], which has less data than node[3] std::shared_ptr req2 = std::make_shared(); - req2->__set_node(nodes[4]); + req2->node = nodes[4].second; + req2->__set_hp_node(nodes[4].first); replica_info rep_info; rep_info.pid = pc.pid; @@ -372,7 +395,7 @@ void meta_service_test_app::adjust_dropped_size() dropped_replica &d = cc.dropped[0]; if (d.time != dropped_replica::INVALID_TIMESTAMP) return false; - if (d.node != nodes[4]) + if (d.node != nodes[4].first) return false; if (d.last_committed_decree != rep_info.last_committed_decree) return false; @@ -402,23 +425,28 @@ void meta_service_test_app::apply_balancer_test() ec = meta_svc->remote_storage_initialize(); ASSERT_EQ(dsn::ERR_OK, ec); - meta_svc->_failure_detector.reset( - new dsn::replication::meta_server_failure_detector(meta_svc.get())); + meta_svc->_failure_detector.reset(new dsn::replication::meta_server_failure_detector( + meta_svc->_dns_resolver, meta_svc.get())); meta_svc->_partition_guardian.reset(new partition_guardian(meta_svc.get())); meta_svc->_balancer.reset(new greedy_load_balancer(meta_svc.get())); // initialize data structure - std::vector node_list; + std::vector> node_list; generate_node_list(node_list, 5, 10); + std::vector hps; + for (const auto &p : node_list) { + hps.emplace_back(p.first); + } + server_state *ss = meta_svc->_state.get(); - generate_apps(ss->_all_apps, node_list, 5, 5, std::pair(2, 5), false); + generate_apps(ss->_all_apps, hps, 5, 5, std::pair(2, 5), false); app_mapper backed_app; node_mapper backed_nodes; clone_app_mapper(backed_app, ss->_all_apps); - generate_node_mapper(backed_nodes, backed_app, node_list); + generate_node_mapper(backed_nodes, backed_app, hps); // before initialize, we need to mark apps to AS_CREATING: for (auto &kv : ss->_all_apps) { @@ -430,7 +458,7 @@ void meta_service_test_app::apply_balancer_test() ss->initialize_node_state(); meta_svc->_started = true; - meta_svc->set_node_state(node_list, true); + meta_svc->set_node_state(hps, true); app_mapper_compare(backed_app, ss->_all_apps); // run balancer @@ -476,11 +504,11 @@ void meta_service_test_app::cannot_run_balancer_test() FLAGS_node_live_percentage_threshold_for_update = 0; svc->_state->initialize(svc.get(), "/"); - svc->_failure_detector.reset(new meta_server_failure_detector(svc.get())); + svc->_failure_detector.reset(new meta_server_failure_detector(svc->_dns_resolver, svc.get())); svc->_balancer.reset(new dummy_balancer(svc.get())); svc->_partition_guardian.reset(new dummy_partition_guardian(svc.get())); - std::vector nodes; + std::vector> nodes; generate_node_list(nodes, 10, 10); dsn::app_info info; @@ -499,12 +527,19 @@ void meta_service_test_app::cannot_run_balancer_test() svc->_state->_table_metric_entities.create_entity(info.app_id, info.partition_count); dsn::partition_configuration &pc = the_app->partitions[0]; - pc.primary = nodes[0]; - pc.secondaries = {nodes[1], nodes[2]}; + pc.primary = nodes[0].second; + pc.__set_hp_primary(nodes[0].first); + pc.secondaries = {nodes[1].second, nodes[2].second}; + pc.__set_hp_secondaries({nodes[1].first, nodes[2].first}); + + std::vector hps; + for (const auto &p : nodes) { + hps.emplace_back(p.first); + } #define REGENERATE_NODE_MAPPER \ svc->_state->_nodes.clear(); \ - generate_node_mapper(svc->_state->_nodes, svc->_state->_all_apps, nodes) + generate_node_mapper(svc->_state->_nodes, svc->_state->_all_apps, hps) REGENERATE_NODE_MAPPER; // stage are freezed @@ -518,14 +553,16 @@ void meta_service_test_app::cannot_run_balancer_test() // all the partitions are not healthy svc->_function_level.store(meta_function_level::fl_lively); pc.primary.set_invalid(); + pc.hp_primary.reset(); REGENERATE_NODE_MAPPER; ASSERT_FALSE(svc->_state->check_all_partitions()); // some dropped node still exists in nodes - pc.primary = nodes[0]; + pc.primary = nodes[0].second; + pc.__set_hp_primary(nodes[0].first); REGENERATE_NODE_MAPPER; - get_node_state(svc->_state->_nodes, pc.primary, true)->set_alive(false); + get_node_state(svc->_state->_nodes, pc.hp_primary, true)->set_alive(false); ASSERT_FALSE(svc->_state->check_all_partitions()); // some apps are staging diff --git a/src/nfs/nfs.thrift b/src/nfs/nfs.thrift index 3f0f96bbb9..706e8c6b1e 100644 --- a/src/nfs/nfs.thrift +++ b/src/nfs/nfs.thrift @@ -40,6 +40,7 @@ struct copy_request 8: bool overwrite; 9: optional string source_disk_tag; 10: optional dsn.gpid pid; + 11: optional dsn.host_port hp_source; } struct copy_response @@ -60,6 +61,7 @@ struct get_file_size_request 6: optional string source_disk_tag; 7: optional string dest_disk_tag; 8: optional dsn.gpid pid; + 9: optional dsn.host_port hp_source; } struct get_file_size_response diff --git a/src/nfs/nfs_client_impl.cpp b/src/nfs/nfs_client_impl.cpp index b6d52e9570..c5744c0147 100644 --- a/src/nfs/nfs_client_impl.cpp +++ b/src/nfs/nfs_client_impl.cpp @@ -32,6 +32,8 @@ #include "absl/strings/string_view.h" #include "nfs/nfs_code_definition.h" #include "nfs/nfs_node.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/blob.h" #include "utils/command_manager.h" #include "utils/filesystem.h" @@ -112,7 +114,7 @@ DSN_DEFINE_int32(nfs, "rpc timeout in milliseconds for nfs copy, " "0 means use default timeout of rpc engine"); -nfs_client_impl::nfs_client_impl() +nfs_client_impl::nfs_client_impl(const std::shared_ptr &resolver) : _concurrent_copy_request_count(0), _concurrent_local_write_count(0), _buffered_local_write_count(0), @@ -121,7 +123,8 @@ nfs_client_impl::nfs_client_impl() METRIC_VAR_INIT_server(nfs_client_copy_bytes), METRIC_VAR_INIT_server(nfs_client_copy_failed_requests), METRIC_VAR_INIT_server(nfs_client_write_bytes), - METRIC_VAR_INIT_server(nfs_client_failed_writes) + METRIC_VAR_INIT_server(nfs_client_failed_writes), + _dns_resolver(resolver) { _copy_token_buckets = std::make_unique(); @@ -135,7 +138,7 @@ void nfs_client_impl::begin_remote_copy(std::shared_ptr &rc { user_request_ptr req(new user_request()); req->high_priority = rci->high_priority; - req->file_size_req.source = rci->source; + req->file_size_req.source = _dns_resolver->resolve_address(rci->source); req->file_size_req.dst_dir = rci->dest_dir; req->file_size_req.file_list = rci->files; req->file_size_req.source_dir = rci->source_dir; @@ -143,6 +146,7 @@ void nfs_client_impl::begin_remote_copy(std::shared_ptr &rc req->file_size_req.__set_source_disk_tag(rci->source_disk_tag); req->file_size_req.__set_dest_disk_tag(rci->dest_disk_tag); req->file_size_req.__set_pid(rci->pid); + req->file_size_req.__set_hp_source(rci->source); req->nfs_task = nfs_task; req->is_finished = false; @@ -151,7 +155,7 @@ void nfs_client_impl::begin_remote_copy(std::shared_ptr &rc end_get_file_size(err, std::move(resp), req); }, std::chrono::milliseconds(FLAGS_rpc_timeout_ms), - req->file_size_req.source); + _dns_resolver->resolve_address(req->file_size_req.hp_source)); } void nfs_client_impl::end_get_file_size(::dsn::error_code err, @@ -159,7 +163,8 @@ void nfs_client_impl::end_get_file_size(::dsn::error_code err, const user_request_ptr &ureq) { if (err != ::dsn::ERR_OK) { - LOG_ERROR("[nfs_service] remote get file size failed, source = {}, dir = {}, err = {}", + LOG_ERROR("[nfs_service] remote get file size failed, source = {}({}), dir = {}, err = {}", + ureq->file_size_req.hp_source, ureq->file_size_req.source, ureq->file_size_req.source_dir, err); @@ -169,7 +174,8 @@ void nfs_client_impl::end_get_file_size(::dsn::error_code err, err = dsn::error_code(resp.error); if (err != ::dsn::ERR_OK) { - LOG_ERROR("[nfs_service] remote get file size failed, source = {}, dir = {}, err = {}", + LOG_ERROR("[nfs_service] remote get file size failed, source = {}({}), dir = {}, err = {}", + ureq->file_size_req.hp_source, ureq->file_size_req.source, ureq->file_size_req.source_dir, err); @@ -300,6 +306,7 @@ void nfs_client_impl::continue_copy() copy_req.is_last = req->is_last; copy_req.__set_source_disk_tag(ureq->file_size_req.source_disk_tag); copy_req.__set_pid(ureq->file_size_req.pid); + copy_req.__set_hp_source(ureq->file_size_req.hp_source); req->remote_copy_task = async_nfs_copy(copy_req, [=](error_code err, copy_response &&resp) { @@ -313,7 +320,8 @@ void nfs_client_impl::continue_copy() } }, std::chrono::milliseconds(FLAGS_rpc_timeout_ms), - req->file_ctx->user_req->file_size_req.source); + _dns_resolver->resolve_address( + req->file_ctx->user_req->file_size_req.hp_source)); } else { --ureq->concurrent_copy_count; --_concurrent_copy_request_count; @@ -346,14 +354,17 @@ void nfs_client_impl::end_copy(::dsn::error_code err, METRIC_VAR_INCREMENT(nfs_client_copy_failed_requests); if (!fc->user_req->is_finished) { + host_port hp = fc->user_req->file_size_req.hp_source; if (reqc->retry_count > 0) { - LOG_WARNING("[nfs_service] remote copy failed, source = {}, dir = {}, file = {}, " - "err = {}, retry_count = {}", - fc->user_req->file_size_req.source, - fc->user_req->file_size_req.source_dir, - fc->file_name, - err, - reqc->retry_count); + LOG_WARNING( + "[nfs_service] remote copy failed, source = {}({}), dir = {}, file = {}, " + "err = {}, retry_count = {}", + hp, + fc->user_req->file_size_req.source, + fc->user_req->file_size_req.source_dir, + fc->file_name, + err, + reqc->retry_count); // retry copy reqc->retry_count--; @@ -365,8 +376,9 @@ void nfs_client_impl::end_copy(::dsn::error_code err, else _copy_requests_low.push_retry(reqc); } else { - LOG_ERROR("[nfs_service] remote copy failed, source = {}, dir = {}, file = {}, " + LOG_ERROR("[nfs_service] remote copy failed, source = {}({}), dir = {}, file = {}, " "err = {}, retry_count = {}", + hp, fc->user_req->file_size_req.source, fc->user_req->file_size_req.source_dir, fc->file_name, diff --git a/src/nfs/nfs_client_impl.h b/src/nfs/nfs_client_impl.h index 183ac38a97..b9a225bf4b 100644 --- a/src/nfs/nfs_client_impl.h +++ b/src/nfs/nfs_client_impl.h @@ -55,9 +55,11 @@ namespace dsn { class command_deregister; class disk_file; + namespace utils { class token_buckets; } // namespace utils +class dns_resolver; struct remote_copy_request; namespace service { @@ -268,7 +270,7 @@ class nfs_client_impl }; public: - nfs_client_impl(); + nfs_client_impl(const std::shared_ptr &resolver); virtual ~nfs_client_impl(); // copy file request entry @@ -319,6 +321,8 @@ class nfs_client_impl std::unique_ptr _nfs_max_copy_rate_megabytes_cmd; dsn::task_tracker _tracker; + + std::shared_ptr _dns_resolver; }; } // namespace service } // namespace dsn diff --git a/src/nfs/nfs_node.cpp b/src/nfs/nfs_node.cpp index 0103b9a35e..c9325178bf 100644 --- a/src/nfs/nfs_node.cpp +++ b/src/nfs/nfs_node.cpp @@ -33,14 +33,15 @@ #include "utils/autoref_ptr.h" namespace dsn { +class dns_resolver; class task_tracker; -std::unique_ptr nfs_node::create() +std::unique_ptr nfs_node::create(const std::shared_ptr &resolver) { - return std::make_unique(); + return std::make_unique(resolver); } -aio_task_ptr nfs_node::copy_remote_directory(const rpc_address &remote, +aio_task_ptr nfs_node::copy_remote_directory(const host_port &remote, const std::string &source_disk_tag, const std::string &source_dir, const std::string &dest_disk_tag, @@ -68,7 +69,7 @@ aio_task_ptr nfs_node::copy_remote_directory(const rpc_address &remote, hash); } -aio_task_ptr nfs_node::copy_remote_files(const rpc_address &remote, +aio_task_ptr nfs_node::copy_remote_files(const host_port &remote, const std::string &source_disk_tag, const std::string &source_dir, const std::vector &files, diff --git a/src/nfs/nfs_node.h b/src/nfs/nfs_node.h index 635562c669..fd735e5c01 100644 --- a/src/nfs/nfs_node.h +++ b/src/nfs/nfs_node.h @@ -33,24 +33,26 @@ #include "aio/aio_task.h" #include "common/gpid.h" #include "runtime/api_task.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task_code.h" #include "utils/error_code.h" namespace dsn { class task_tracker; + namespace service { class copy_request; class copy_response; class get_file_size_request; class get_file_size_response; } // namespace service +class dns_resolver; template class rpc_replier; struct remote_copy_request { - dsn::rpc_address source; + dsn::host_port source; std::string source_disk_tag; std::string source_dir; std::vector files; @@ -64,10 +66,10 @@ struct remote_copy_request class nfs_node { public: - static std::unique_ptr create(); + static std::unique_ptr create(const std::shared_ptr &resolver); public: - aio_task_ptr copy_remote_directory(const rpc_address &remote, + aio_task_ptr copy_remote_directory(const host_port &remote, const std::string &source_disk_tag, const std::string &source_dir, const std::string &dest_disk_tag, @@ -79,7 +81,7 @@ class nfs_node task_tracker *tracker, aio_handler &&callback, int hash = 0); - aio_task_ptr copy_remote_files(const rpc_address &remote, + aio_task_ptr copy_remote_files(const host_port &remote, const std::string &source_disk_tag, const std::string &source_dir, const std::vector &files, // empty for all diff --git a/src/nfs/nfs_node_simple.cpp b/src/nfs/nfs_node_simple.cpp index 1b51b379f7..dfa5b78158 100644 --- a/src/nfs/nfs_node_simple.cpp +++ b/src/nfs/nfs_node_simple.cpp @@ -34,6 +34,7 @@ namespace dsn { class aio_task; +class dns_resolver; template class rpc_replier; @@ -43,7 +44,8 @@ class copy_response; class get_file_size_request; class get_file_size_response; -nfs_node_simple::nfs_node_simple() : nfs_node() +nfs_node_simple::nfs_node_simple(const std::shared_ptr &resolver) + : nfs_node(), _dns_resolver(resolver) { _server = nullptr; _client = nullptr; @@ -60,7 +62,7 @@ error_code nfs_node_simple::start() { _server = new nfs_service_impl(); - _client = new nfs_client_impl(); + _client = new nfs_client_impl(_dns_resolver); return ERR_OK; } diff --git a/src/nfs/nfs_node_simple.h b/src/nfs/nfs_node_simple.h index d34c75348f..6f5d7eee69 100644 --- a/src/nfs/nfs_node_simple.h +++ b/src/nfs/nfs_node_simple.h @@ -33,6 +33,7 @@ namespace dsn { class aio_task; +class dns_resolver; template class rpc_replier; @@ -48,7 +49,7 @@ class nfs_service_impl; class nfs_node_simple : public nfs_node { public: - nfs_node_simple(); + nfs_node_simple(const std::shared_ptr &resolver); virtual ~nfs_node_simple(); @@ -70,6 +71,8 @@ class nfs_node_simple : public nfs_node private: nfs_service_impl *_server; nfs_client_impl *_client; + + std::shared_ptr _dns_resolver; }; } // namespace service } // namespace dsn diff --git a/src/nfs/test/main.cpp b/src/nfs/test/main.cpp index c665016243..f10c9977e7 100644 --- a/src/nfs/test/main.cpp +++ b/src/nfs/test/main.cpp @@ -38,7 +38,8 @@ #include "gtest/gtest.h" #include "nfs/nfs_node.h" #include "runtime/app_model.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task_code.h" #include "runtime/tool_api.h" #include "test_util/test_util.h" @@ -68,7 +69,8 @@ INSTANTIATE_TEST_SUITE_P(, nfs_test, ::testing::Values(false, true)); TEST_P(nfs_test, basic) { - auto nfs = dsn::nfs_node::create(); + auto resolver = std::make_shared(); + auto nfs = dsn::nfs_node::create(resolver); nfs->start(); nfs->register_async_rpc_handler_for_test(); dsn::gpid fake_pid = gpid(1, 0); @@ -109,7 +111,7 @@ TEST_P(nfs_test, basic) ASSERT_TRUE(dst_filenames.empty()); aio_result r; - dsn::aio_task_ptr t = nfs->copy_remote_files(dsn::rpc_address("localhost", 20101), + dsn::aio_task_ptr t = nfs->copy_remote_files(dsn::host_port("localhost", 20101), "default", ".", kSrcFilenames, @@ -151,7 +153,7 @@ TEST_P(nfs_test, basic) // copy files to the destination directory, files will be overwritten. { aio_result r; - dsn::aio_task_ptr t = nfs->copy_remote_files(dsn::rpc_address("localhost", 20101), + dsn::aio_task_ptr t = nfs->copy_remote_files(dsn::host_port("localhost", 20101), "default", ".", kSrcFilenames, @@ -203,7 +205,7 @@ TEST_P(nfs_test, basic) ASSERT_FALSE(utils::filesystem::directory_exists(kNewDstDir)); aio_result r; - dsn::aio_task_ptr t = nfs->copy_remote_directory(dsn::rpc_address("localhost", 20101), + dsn::aio_task_ptr t = nfs->copy_remote_directory(dsn::host_port("localhost", 20101), "default", kDstDir, "default", diff --git a/src/redis_protocol/proxy_lib/proxy_layer.cpp b/src/redis_protocol/proxy_lib/proxy_layer.cpp index fc415f21d1..59329a4032 100644 --- a/src/redis_protocol/proxy_lib/proxy_layer.cpp +++ b/src/redis_protocol/proxy_lib/proxy_layer.cpp @@ -22,6 +22,7 @@ #include "proxy_layer.h" #include "runtime/rpc/network.h" +#include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_message.h" #include "runtime/task/task_spec.h" #include "utils/autoref_ptr.h" @@ -61,7 +62,7 @@ proxy_stub::proxy_stub(const proxy_session::factory &f, void proxy_stub::on_rpc_request(dsn::message_ex *request) { - ::dsn::rpc_address source = request->header->from_address; + ::dsn::host_port source = ::dsn::host_port(request->header->from_address); std::shared_ptr session; { ::dsn::zauto_read_lock l(_lock); @@ -86,21 +87,21 @@ void proxy_stub::on_rpc_request(dsn::message_ex *request) void proxy_stub::on_recv_remove_session_request(dsn::message_ex *request) { - ::dsn::rpc_address source = request->header->from_address; + ::dsn::host_port source = ::dsn::host_port(request->header->from_address); remove_session(source); } -void proxy_stub::remove_session(dsn::rpc_address remote_address) +void proxy_stub::remove_session(dsn::host_port remote_host_port) { std::shared_ptr session; { ::dsn::zauto_write_lock l(_lock); - auto iter = _sessions.find(remote_address); + auto iter = _sessions.find(remote_host_port); if (iter == _sessions.end()) { - LOG_WARNING("{} has been removed from proxy stub", remote_address); + LOG_WARNING("{} has been removed from proxy stub", remote_host_port); return; } - LOG_INFO("remove {} from proxy stub", remote_address); + LOG_INFO("remove {} from proxy stub", remote_host_port); session = std::move(iter->second); _sessions.erase(iter); } @@ -113,8 +114,9 @@ proxy_session::proxy_session(proxy_stub *op, dsn::message_ex *first_msg) CHECK_NOTNULL(first_msg, "null msg when create session"); _backup_one_request->add_ref(); - _remote_address = _backup_one_request->header->from_address; - CHECK_EQ_MSG(_remote_address.type(), HOST_TYPE_IPV4, "invalid rpc_address type"); + _session_remote_host_port = ::dsn::host_port(_backup_one_request->header->from_address); + _session_remote_host_port_str = _session_remote_host_port.to_string(); + CHECK_EQ_MSG(_session_remote_host_port.type(), HOST_TYPE_IPV4, "invalid host_port type"); } proxy_session::~proxy_session() @@ -135,7 +137,7 @@ void proxy_session::on_recv_request(dsn::message_ex *msg) // "parse" with a lock. a subclass may implement a lock inside parse if necessary if (!parse(msg)) { LOG_ERROR_PREFIX("got invalid message, try to remove the proxy session from proxy stub"); - _stub->remove_session(_remote_address); + _stub->remove_session(_session_remote_host_port); LOG_ERROR_PREFIX("close the proxy session"); ((dsn::message_ex *)_backup_one_request)->io_session->close(); diff --git a/src/redis_protocol/proxy_lib/proxy_layer.h b/src/redis_protocol/proxy_lib/proxy_layer.h index 8f8a7022a9..538a7680e7 100644 --- a/src/redis_protocol/proxy_lib/proxy_layer.h +++ b/src/redis_protocol/proxy_lib/proxy_layer.h @@ -25,7 +25,7 @@ #include #include -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/serverlet.h" #include "runtime/task/task_code.h" #include "utils/threadpool_code.h" @@ -70,7 +70,7 @@ class proxy_session : public std::enable_shared_from_this virtual bool parse(dsn::message_ex *msg) = 0; dsn::message_ex *create_response(); - const char *log_prefix() const { return _remote_address.to_string(); } + const char *log_prefix() const { return _session_remote_host_port_str.c_str(); } protected: proxy_stub *_stub; @@ -79,8 +79,9 @@ class proxy_session : public std::enable_shared_from_this // when get message from raw parser, request & response of "dsn::message_ex*" are not in couple. // we need to backup one request to create a response struct. dsn::message_ex *_backup_one_request; - // the client address for which this session served - dsn::rpc_address _remote_address; + // the client for which this session served + dsn::host_port _session_remote_host_port; + std::string _session_remote_host_port_str; }; class proxy_stub : public ::dsn::serverlet @@ -106,16 +107,16 @@ class proxy_stub : public ::dsn::serverlet this->unregister_rpc_handler(RPC_CALL_RAW_MESSAGE); this->unregister_rpc_handler(RPC_CALL_RAW_SESSION_DISCONNECT); } - void remove_session(dsn::rpc_address remote_address); + void remove_session(dsn::host_port remote_address); private: void on_rpc_request(dsn::message_ex *request); void on_recv_remove_session_request(dsn::message_ex *); ::dsn::zrwlock_nr _lock; - std::unordered_map<::dsn::rpc_address, std::shared_ptr> _sessions; + std::unordered_map<::dsn::host_port, std::shared_ptr> _sessions; proxy_session::factory _factory; - ::dsn::rpc_address _uri_address; + ::dsn::host_port _uri_address; std::string _cluster; std::string _app; std::string _geo_app; diff --git a/src/redis_protocol/proxy_lib/redis_parser.cpp b/src/redis_protocol/proxy_lib/redis_parser.cpp index baf9aed8b0..37e47fc06c 100644 --- a/src/redis_protocol/proxy_lib/redis_parser.cpp +++ b/src/redis_protocol/proxy_lib/redis_parser.cpp @@ -32,12 +32,14 @@ #include #include +#include "absl/strings/string_view.h" #include "base/pegasus_const.h" #include "common/replication_other_types.h" #include "pegasus/client.h" #include "rrdb/rrdb_types.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/serialization.h" #include "utils/api_utilities.h" #include "utils/binary_writer.h" @@ -45,7 +47,6 @@ #include "utils/fmt_logging.h" #include "utils/ports.h" #include "utils/string_conv.h" -#include "absl/strings/string_view.h" #include "utils/strings.h" #include "utils/utils.h" @@ -92,14 +93,16 @@ redis_parser::redis_parser(proxy_stub *op, dsn::message_ex *first_msg) _total_length(0), _current_buffer(nullptr), _current_buffer_length(0), - _current_cursor(0) + _current_cursor(0), + _dns_resolver(new dsn::dns_resolver()) { ::dsn::apps::rrdb_client *r; if (op) { - std::vector meta_list; + std::vector meta_list; dsn::replication::replica_helper::load_meta_servers( meta_list, PEGASUS_CLUSTER_SECTION_NAME.c_str(), op->get_cluster()); - r = new ::dsn::apps::rrdb_client(op->get_cluster(), meta_list, op->get_app()); + r = new ::dsn::apps::rrdb_client( + op->get_cluster(), meta_list, op->get_app(), _dns_resolver); if (!dsn::utils::is_empty(op->get_geo_app())) { _geo_client = std::make_unique( "config.ini", op->get_cluster(), op->get_app(), op->get_geo_app()); diff --git a/src/redis_protocol/proxy_lib/redis_parser.h b/src/redis_protocol/proxy_lib/redis_parser.h index f66bce0002..4c3eae429a 100644 --- a/src/redis_protocol/proxy_lib/redis_parser.h +++ b/src/redis_protocol/proxy_lib/redis_parser.h @@ -40,6 +40,7 @@ namespace dsn { class binary_writer; +class dns_resolver; namespace apps { class rrdb_client; @@ -164,6 +165,8 @@ class redis_parser : public proxy_session std::unique_ptr<::dsn::apps::rrdb_client> client; std::unique_ptr _geo_client; + std::shared_ptr _dns_resolver; + protected: // function for data stream void append_message(dsn::message_ex *msg); diff --git a/src/replica/bulk_load/replica_bulk_loader.cpp b/src/replica/bulk_load/replica_bulk_loader.cpp index ba32e41061..0ab3d8b563 100644 --- a/src/replica/bulk_load/replica_bulk_loader.cpp +++ b/src/replica/bulk_load/replica_bulk_loader.cpp @@ -36,8 +36,10 @@ #include "replica/replica_stub.h" #include "replica/replication_app_base.h" #include "replica_bulk_loader.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_holder.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/async_calls.h" #include "utils/autoref_ptr.h" #include "utils/chrono_literals.h" @@ -183,13 +185,15 @@ void replica_bulk_loader::broadcast_group_bulk_load(const bulk_load_request &met LOG_INFO_PREFIX("start to broadcast group bulk load"); - for (const auto &addr : _replica->_primary_states.membership.secondaries) { - if (addr == _stub->_primary_address) + for (const auto &hp : _replica->_primary_states.membership.hp_secondaries) { + if (hp == _stub->primary_host_port()) continue; auto request = std::make_unique(); request->app_name = _replica->_app_info.app_name; - request->target_address = addr; + auto addr = _stub->get_dns_resolver()->resolve_address(hp); + request->target = addr; + request->__set_hp_target(hp); _replica->_primary_states.get_replica_config(partition_status::PS_SECONDARY, request->config); request->cluster_name = meta_req.cluster_name; @@ -197,14 +201,14 @@ void replica_bulk_loader::broadcast_group_bulk_load(const bulk_load_request &met request->meta_bulk_load_status = meta_req.meta_bulk_load_status; request->remote_root_path = meta_req.remote_root_path; - LOG_INFO_PREFIX("send group_bulk_load_request to {}", addr); + LOG_INFO_PREFIX("send group_bulk_load_request to {}({})", hp, addr); group_bulk_load_rpc rpc( std::move(request), RPC_GROUP_BULK_LOAD, 0_ms, 0, get_gpid().thread_hash()); auto callback_task = rpc.call(addr, tracker(), [this, rpc](error_code err) mutable { on_group_bulk_load_reply(err, rpc.request(), rpc.response()); }); - _replica->_primary_states.group_bulk_load_pending_replies[addr] = callback_task; + _replica->_primary_states.group_bulk_load_pending_replies[hp] = callback_task; } } @@ -240,8 +244,9 @@ void replica_bulk_loader::on_group_bulk_load(const group_bulk_load_request &requ return; } - LOG_INFO_PREFIX("receive group_bulk_load request, primary address = {}, ballot = {}, " + LOG_INFO_PREFIX("receive group_bulk_load request, primary address = {}({}), ballot = {}, " "meta bulk_load_status = {}, local bulk_load_status = {}", + request.config.hp_primary, request.config.primary, request.config.ballot, enum_to_string(request.meta_bulk_load_status), @@ -275,34 +280,39 @@ void replica_bulk_loader::on_group_bulk_load_reply(error_code err, return; } - _replica->_primary_states.group_bulk_load_pending_replies.erase(req.target_address); + _replica->_primary_states.group_bulk_load_pending_replies.erase(req.hp_target); if (err != ERR_OK) { - LOG_ERROR_PREFIX( - "failed to receive group_bulk_load_reply from {}, error = {}", req.target_address, err); - _replica->_primary_states.reset_node_bulk_load_states(req.target_address); + LOG_ERROR_PREFIX("failed to receive group_bulk_load_reply from {}({}), error = {}", + req.hp_target, + req.target, + err); + _replica->_primary_states.reset_node_bulk_load_states(req.hp_target); return; } if (resp.err != ERR_OK) { - LOG_ERROR_PREFIX("receive group_bulk_load response from {} failed, error = {}", - req.target_address, + LOG_ERROR_PREFIX("receive group_bulk_load response from {}({}) failed, error = {}", + req.hp_target, + req.target, resp.err); - _replica->_primary_states.reset_node_bulk_load_states(req.target_address); + _replica->_primary_states.reset_node_bulk_load_states(req.hp_target); return; } if (req.config.ballot != get_ballot()) { - LOG_ERROR_PREFIX("recevied wrong group_bulk_load response from {}, request ballot = {}, " - "current ballot = {}", - req.target_address, - req.config.ballot, - get_ballot()); - _replica->_primary_states.reset_node_bulk_load_states(req.target_address); + LOG_ERROR_PREFIX( + "recevied wrong group_bulk_load response from {}({}), request ballot = {}, " + "current ballot = {}", + req.hp_target, + req.target, + req.config.ballot, + get_ballot()); + _replica->_primary_states.reset_node_bulk_load_states(req.hp_target); return; } - _replica->_primary_states.secondary_bulk_load_states[req.target_address] = resp.bulk_load_state; + _replica->_primary_states.secondary_bulk_load_states[req.hp_target] = resp.bulk_load_state; } // ThreadPool: THREAD_POOL_REPLICATION @@ -427,7 +437,7 @@ error_code replica_bulk_loader::start_download(const std::string &remote_dir, if (_stub->_bulk_load_downloading_count.load() >= _stub->_max_concurrent_bulk_load_downloading_count) { LOG_WARNING_PREFIX("node[{}] already has {} replica downloading, wait for next round", - _stub->_primary_address_str, + _stub->_primary_host_port_cache, _stub->_bulk_load_downloading_count.load()); return ERR_BUSY; } @@ -446,7 +456,7 @@ error_code replica_bulk_loader::start_download(const std::string &remote_dir, _status = bulk_load_status::BLS_DOWNLOADING; ++_stub->_bulk_load_downloading_count; LOG_INFO_PREFIX("node[{}] has {} replica executing downloading", - _stub->_primary_address_str, + _stub->_primary_host_port_cache, _stub->_bulk_load_downloading_count.load()); _bulk_load_start_time_ms = dsn_now_ms(); METRIC_VAR_INCREMENT(bulk_load_downloading_count); @@ -658,7 +668,7 @@ void replica_bulk_loader::try_decrease_bulk_load_download_count() --_stub->_bulk_load_downloading_count; _is_downloading.store(false); LOG_INFO_PREFIX("node[{}] has {} replica executing downloading", - _stub->_primary_address_str, + _stub->_primary_host_port_cache, _stub->_bulk_load_downloading_count.load()); } @@ -733,8 +743,8 @@ void replica_bulk_loader::handle_bulk_load_finish(bulk_load_status::type new_sta } if (status() == partition_status::PS_PRIMARY) { - for (const auto &target_address : _replica->_primary_states.membership.secondaries) { - _replica->_primary_states.reset_node_bulk_load_states(target_address); + for (const auto &target_hp : _replica->_primary_states.membership.hp_secondaries) { + _replica->_primary_states.reset_node_bulk_load_states(target_hp); } } @@ -921,24 +931,27 @@ void replica_bulk_loader::report_group_download_progress(/*out*/ bulk_load_respo primary_state.__set_download_status(_download_status.load()); } response.group_bulk_load_state[_replica->_primary_states.membership.primary] = primary_state; - LOG_INFO_PREFIX("primary = {}, download progress = {}%, status = {}", + response.hp_group_bulk_load_state[_replica->_primary_states.membership.hp_primary] = + primary_state; + LOG_INFO_PREFIX("primary = {}({}), download progress = {}%, status = {}", + _replica->_primary_states.membership.hp_primary, _replica->_primary_states.membership.primary, primary_state.download_progress, primary_state.download_status); int32_t total_progress = primary_state.download_progress; - for (const auto &target_address : _replica->_primary_states.membership.secondaries) { + for (const auto &target_hp : _replica->_primary_states.membership.hp_secondaries) { const auto &secondary_state = - _replica->_primary_states.secondary_bulk_load_states[target_address]; + _replica->_primary_states.secondary_bulk_load_states[target_hp]; int32_t s_progress = secondary_state.__isset.download_progress ? secondary_state.download_progress : 0; error_code s_status = secondary_state.__isset.download_status ? secondary_state.download_status : ERR_OK; - LOG_INFO_PREFIX("secondary = {}, download progress = {}%, status={}", - target_address, - s_progress, - s_status); - response.group_bulk_load_state[target_address] = secondary_state; + LOG_INFO_PREFIX( + "secondary = {}, download progress = {}%, status={}", target_hp, s_progress, s_status); + response.group_bulk_load_state[_stub->get_dns_resolver()->resolve_address(target_hp)] = + secondary_state; + response.hp_group_bulk_load_state[target_hp] = secondary_state; total_progress += s_progress; } @@ -961,23 +974,28 @@ void replica_bulk_loader::report_group_ingestion_status(/*out*/ bulk_load_respon partition_bulk_load_state primary_state; primary_state.__set_ingest_status(_replica->_app->get_ingestion_status()); response.group_bulk_load_state[_replica->_primary_states.membership.primary] = primary_state; - LOG_INFO_PREFIX("primary = {}, ingestion status = {}", + response.hp_group_bulk_load_state[_replica->_primary_states.membership.hp_primary] = + primary_state; + LOG_INFO_PREFIX("primary = {}({}), ingestion status = {}", + _replica->_primary_states.membership.hp_primary, _replica->_primary_states.membership.primary, enum_to_string(primary_state.ingest_status)); bool is_group_ingestion_finish = (primary_state.ingest_status == ingestion_status::IS_SUCCEED) && - (_replica->_primary_states.membership.secondaries.size() + 1 == + (_replica->_primary_states.membership.hp_secondaries.size() + 1 == _replica->_primary_states.membership.max_replica_count); - for (const auto &target_address : _replica->_primary_states.membership.secondaries) { + for (const auto &target_hp : _replica->_primary_states.membership.hp_secondaries) { const auto &secondary_state = - _replica->_primary_states.secondary_bulk_load_states[target_address]; + _replica->_primary_states.secondary_bulk_load_states[target_hp]; ingestion_status::type ingest_status = secondary_state.__isset.ingest_status ? secondary_state.ingest_status : ingestion_status::IS_INVALID; LOG_INFO_PREFIX( - "secondary = {}, ingestion status={}", target_address, enum_to_string(ingest_status)); - response.group_bulk_load_state[target_address] = secondary_state; + "secondary = {}, ingestion status={}", target_hp, enum_to_string(ingest_status)); + response.group_bulk_load_state[_stub->get_dns_resolver()->resolve_address(target_hp)] = + secondary_state; + response.hp_group_bulk_load_state[target_hp] = secondary_state; is_group_ingestion_finish &= (ingest_status == ingestion_status::IS_SUCCEED); } response.__set_is_group_ingestion_finished(is_group_ingestion_finish); @@ -1004,21 +1022,25 @@ void replica_bulk_loader::report_group_cleaned_up(bulk_load_response &response) partition_bulk_load_state primary_state; primary_state.__set_is_cleaned_up(is_cleaned_up()); response.group_bulk_load_state[_replica->_primary_states.membership.primary] = primary_state; + response.hp_group_bulk_load_state[_replica->_primary_states.membership.hp_primary] = + primary_state; LOG_INFO_PREFIX("primary = {}, bulk load states cleaned_up = {}", _replica->_primary_states.membership.primary, primary_state.is_cleaned_up); bool group_flag = (primary_state.is_cleaned_up) && - (_replica->_primary_states.membership.secondaries.size() + 1 == + (_replica->_primary_states.membership.hp_secondaries.size() + 1 == _replica->_primary_states.membership.max_replica_count); - for (const auto &target_address : _replica->_primary_states.membership.secondaries) { + for (const auto &target_hp : _replica->_primary_states.membership.hp_secondaries) { const auto &secondary_state = - _replica->_primary_states.secondary_bulk_load_states[target_address]; + _replica->_primary_states.secondary_bulk_load_states[target_hp]; bool is_cleaned_up = secondary_state.__isset.is_cleaned_up ? secondary_state.is_cleaned_up : false; LOG_INFO_PREFIX( - "secondary = {}, bulk load states cleaned_up = {}", target_address, is_cleaned_up); - response.group_bulk_load_state[target_address] = secondary_state; + "secondary = {}, bulk load states cleaned_up = {}", target_hp, is_cleaned_up); + response.group_bulk_load_state[_stub->get_dns_resolver()->resolve_address(target_hp)] = + secondary_state; + response.hp_group_bulk_load_state[target_hp] = secondary_state; group_flag &= is_cleaned_up; } LOG_INFO_PREFIX("group bulk load states cleaned_up = {}", group_flag); @@ -1039,19 +1061,24 @@ void replica_bulk_loader::report_group_is_paused(bulk_load_response &response) partition_bulk_load_state primary_state; primary_state.__set_is_paused(_status == bulk_load_status::BLS_PAUSED); response.group_bulk_load_state[_replica->_primary_states.membership.primary] = primary_state; - LOG_INFO_PREFIX("primary = {}, bulk_load is_paused = {}", + response.hp_group_bulk_load_state[_replica->_primary_states.membership.hp_primary] = + primary_state; + LOG_INFO_PREFIX("primary = {}({}), bulk_load is_paused = {}", + _replica->_primary_states.membership.hp_primary, _replica->_primary_states.membership.primary, primary_state.is_paused); - bool group_is_paused = - primary_state.is_paused && (_replica->_primary_states.membership.secondaries.size() + 1 == - _replica->_primary_states.membership.max_replica_count); - for (const auto &target_address : _replica->_primary_states.membership.secondaries) { + bool group_is_paused = primary_state.is_paused && + (_replica->_primary_states.membership.hp_secondaries.size() + 1 == + _replica->_primary_states.membership.max_replica_count); + for (const auto &target_hp : _replica->_primary_states.membership.hp_secondaries) { partition_bulk_load_state secondary_state = - _replica->_primary_states.secondary_bulk_load_states[target_address]; + _replica->_primary_states.secondary_bulk_load_states[target_hp]; bool is_paused = secondary_state.__isset.is_paused ? secondary_state.is_paused : false; - LOG_INFO_PREFIX("secondary = {}, bulk_load is_paused = {}", target_address, is_paused); - response.group_bulk_load_state[target_address] = secondary_state; + LOG_INFO_PREFIX("secondary = {}, bulk_load is_paused = {}", target_hp, is_paused); + response.group_bulk_load_state[_stub->get_dns_resolver()->resolve_address(target_hp)] = + secondary_state; + response.hp_group_bulk_load_state[target_hp] = secondary_state; group_is_paused &= is_paused; } LOG_INFO_PREFIX("group bulk load is_paused = {}", group_is_paused); diff --git a/src/replica/bulk_load/test/replica_bulk_loader_test.cpp b/src/replica/bulk_load/test/replica_bulk_loader_test.cpp index 94a80bfb8c..c07f101a6d 100644 --- a/src/replica/bulk_load/test/replica_bulk_loader_test.cpp +++ b/src/replica/bulk_load/test/replica_bulk_loader_test.cpp @@ -28,6 +28,7 @@ #include "replica/test/mock_utils.h" #include "replica/test/replica_test_base.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task_tracker.h" #include "test_util/test_util.h" #include "utils/fail_point.h" @@ -165,8 +166,8 @@ class replica_bulk_loader_test : public replica_test_base mock_group_progress(status, 10, 50, 50); partition_bulk_load_state state; state.__set_is_paused(true); - _replica->set_secondary_bulk_load_state(SECONDARY, state); - _replica->set_secondary_bulk_load_state(SECONDARY2, state); + _replica->set_secondary_bulk_load_state(SECONDARY_HP, state); + _replica->set_secondary_bulk_load_state(SECONDARY_HP2, state); bulk_load_response response; _bulk_loader->report_group_is_paused(response); @@ -219,7 +220,8 @@ class replica_bulk_loader_test : public replica_test_base _group_req.meta_bulk_load_status = status; _group_req.config.status = partition_status::PS_SECONDARY; _group_req.config.ballot = b; - _group_req.target_address = SECONDARY; + _group_req.target = SECONDARY; + _group_req.__set_hp_target(SECONDARY_HP); } void mock_replica_config(partition_status::type status) @@ -228,6 +230,7 @@ class replica_bulk_loader_test : public replica_test_base rconfig.ballot = BALLOT; rconfig.pid = PID; rconfig.primary = PRIMARY; + rconfig.__set_hp_primary(PRIMARY_HP); rconfig.status = status; _replica->set_replica_config(rconfig); } @@ -242,6 +245,10 @@ class replica_bulk_loader_test : public replica_test_base config.primary = PRIMARY; config.secondaries.emplace_back(SECONDARY); config.secondaries.emplace_back(SECONDARY2); + config.__set_hp_primary(PRIMARY_HP); + config.__set_hp_secondaries({}); + config.hp_secondaries.emplace_back(SECONDARY_HP); + config.hp_secondaries.emplace_back(SECONDARY_HP2); _replica->set_primary_partition_configuration(config); } @@ -309,8 +316,8 @@ class replica_bulk_loader_test : public replica_test_base state1.__set_download_progress(secondary_progress1); state2.__set_download_status(ERR_OK); state2.__set_download_progress(secondary_progress2); - _replica->set_secondary_bulk_load_state(SECONDARY, state1); - _replica->set_secondary_bulk_load_state(SECONDARY2, state2); + _replica->set_secondary_bulk_load_state(SECONDARY_HP, state1); + _replica->set_secondary_bulk_load_state(SECONDARY_HP2, state2); } void mock_group_progress(bulk_load_status::type p_status, @@ -346,8 +353,8 @@ class replica_bulk_loader_test : public replica_test_base partition_bulk_load_state state1, state2; state1.__set_ingest_status(status1); state2.__set_ingest_status(status2); - _replica->set_secondary_bulk_load_state(SECONDARY, state1); - _replica->set_secondary_bulk_load_state(SECONDARY2, state2); + _replica->set_secondary_bulk_load_state(SECONDARY_HP, state1); + _replica->set_secondary_bulk_load_state(SECONDARY_HP2, state2); } void mock_group_ingestion_states(ingestion_status::type s1_status, @@ -372,8 +379,8 @@ class replica_bulk_loader_test : public replica_test_base partition_bulk_load_state state1, state2; state1.__set_is_cleaned_up(s1_cleaned_up); state2.__set_is_cleaned_up(s2_cleaned_up); - _replica->set_secondary_bulk_load_state(SECONDARY, state1); - _replica->set_secondary_bulk_load_state(SECONDARY2, state2); + _replica->set_secondary_bulk_load_state(SECONDARY_HP, state1); + _replica->set_secondary_bulk_load_state(SECONDARY_HP2, state2); } // helper functions @@ -382,7 +389,8 @@ class replica_bulk_loader_test : public replica_test_base int32_t get_download_progress() { return _bulk_loader->_download_progress.load(); } bool is_secondary_bulk_load_state_reset() { - const partition_bulk_load_state &state = _replica->get_secondary_bulk_load_state(SECONDARY); + const partition_bulk_load_state &state = + _replica->get_secondary_bulk_load_state(SECONDARY_HP); bool is_download_state_reset = (state.__isset.download_progress && state.__isset.download_status && state.download_progress == 0 && state.download_status == ERR_OK); @@ -410,9 +418,12 @@ class replica_bulk_loader_test : public replica_test_base std::string ROOT_PATH = "bulk_load_root"; gpid PID = gpid(1, 0); ballot BALLOT = 3; - rpc_address PRIMARY = rpc_address("127.0.0.2", 34801); - rpc_address SECONDARY = rpc_address("127.0.0.3", 34801); - rpc_address SECONDARY2 = rpc_address("127.0.0.4", 34801); + rpc_address PRIMARY = rpc_address("127.0.0.1", 34801); + rpc_address SECONDARY = rpc_address("127.0.0.1", 34801); + rpc_address SECONDARY2 = rpc_address("127.0.0.1", 34801); + host_port PRIMARY_HP = host_port("localhost", 34801); + host_port SECONDARY_HP = host_port("localhost", 34801); + host_port SECONDARY_HP2 = host_port("localhost", 34801); int32_t MAX_DOWNLOADING_COUNT = 5; std::string LOCAL_DIR = bulk_load_constant::BULK_LOAD_LOCAL_ROOT_DIR; std::string METADATA = bulk_load_constant::BULK_LOAD_METADATA; diff --git a/src/replica/duplication/duplication_sync_timer.cpp b/src/replica/duplication/duplication_sync_timer.cpp index 0682bd4317..8b9adce15e 100644 --- a/src/replica/duplication/duplication_sync_timer.cpp +++ b/src/replica/duplication/duplication_sync_timer.cpp @@ -71,6 +71,7 @@ void duplication_sync_timer::run() auto req = std::make_unique(); req->node = _stub->primary_address(); + req->__set_hp_node(_stub->primary_host_port()); // collects confirm points from all primaries on this server for (const replica_ptr &r : get_all_primaries()) { diff --git a/src/replica/duplication/replica_follower.cpp b/src/replica/duplication/replica_follower.cpp index 10a09c783a..6776aa188c 100644 --- a/src/replica/duplication/replica_follower.cpp +++ b/src/replica/duplication/replica_follower.cpp @@ -32,7 +32,9 @@ #include "nfs/nfs_node.h" #include "replica/replica.h" #include "replica/replica_stub.h" -#include "runtime/rpc/group_address.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/group_host_port.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" @@ -73,8 +75,8 @@ void replica_follower::init_master_info() dsn::utils::split_args(meta_list_str.c_str(), metas, ','); CHECK(!metas.empty(), "master cluster meta list is invalid!"); for (const auto &meta : metas) { - dsn::rpc_address node; - CHECK(node.from_string_ipv4(meta.c_str()), "{} is invalid meta address", meta); + dsn::host_port node; + CHECK(node.from_string(meta), "{} is invalid meta host_port", meta); _master_meta_list.emplace_back(std::move(node)); } } @@ -105,9 +107,9 @@ error_code replica_follower::duplicate_checkpoint() // ThreadPool: THREAD_POOL_DEFAULT void replica_follower::async_duplicate_checkpoint_from_master_replica() { - rpc_address meta_servers; + host_port meta_servers; meta_servers.assign_group(_master_cluster_name.c_str()); - meta_servers.group_address()->add_list(_master_meta_list); + meta_servers.group_host_port()->add_list(_master_meta_list); query_cfg_request meta_config_request; meta_config_request.app_name = _master_app_name; @@ -118,18 +120,21 @@ void replica_follower::async_duplicate_checkpoint_from_master_replica() dsn::message_ex *msg = dsn::message_ex::create_request( RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX, 0, get_gpid().thread_hash()); dsn::marshall(msg, meta_config_request); - rpc::call(meta_servers, msg, &_tracker, [&](error_code err, query_cfg_response &&resp) mutable { - FAIL_POINT_INJECT_F("duplicate_checkpoint_ok", [&](absl::string_view s) -> void { - _tracker.set_tasks_success(); - return; - }); + rpc::call(_replica->get_dns_resolver()->resolve_address(meta_servers), + msg, + &_tracker, + [&](error_code err, query_cfg_response &&resp) mutable { + FAIL_POINT_INJECT_F("duplicate_checkpoint_ok", [&](absl::string_view s) -> void { + _tracker.set_tasks_success(); + return; + }); - FAIL_POINT_INJECT_F("duplicate_checkpoint_failed", - [&](absl::string_view s) -> void { return; }); - if (update_master_replica_config(err, std::move(resp)) == ERR_OK) { - copy_master_replica_checkpoint(); - } - }); + FAIL_POINT_INJECT_F("duplicate_checkpoint_failed", + [&](absl::string_view s) -> void { return; }); + if (update_master_replica_config(err, std::move(resp)) == ERR_OK) { + copy_master_replica_checkpoint(); + } + }); } // ThreadPool: THREAD_POOL_DEFAULT @@ -165,7 +170,7 @@ error_code replica_follower::update_master_replica_config(error_code err, query_ return ERR_INCONSISTENT_STATE; } - if (dsn_unlikely(resp.partitions[0].primary == rpc_address::s_invalid_address)) { + if (dsn_unlikely(resp.partitions[0].hp_primary == host_port::s_invalid_host_port)) { LOG_ERROR_PREFIX("master[{}] partition address is invalid", master_replica_name()); return ERR_INVALID_STATE; } @@ -175,6 +180,7 @@ error_code replica_follower::update_master_replica_config(error_code err, query_ LOG_INFO_PREFIX( "query master[{}] config successfully and update local config: remote={}, gpid={}", master_replica_name(), + _master_replica_config.hp_primary, _master_replica_config.primary, _master_replica_config.pid); return ERR_OK; @@ -217,13 +223,15 @@ error_code replica_follower::nfs_copy_checkpoint(error_code err, learn_response return ERR_FILE_OPERATION_FAILED; } - nfs_copy_remote_files( - resp.address, resp.replica_disk_tag, resp.base_local_dir, resp.state.files, dest); + host_port hp; + GET_HOST_PORT(resp, address, hp); + + nfs_copy_remote_files(hp, resp.replica_disk_tag, resp.base_local_dir, resp.state.files, dest); return ERR_OK; } // ThreadPool: THREAD_POOL_DEFAULT -void replica_follower::nfs_copy_remote_files(const rpc_address &remote_node, +void replica_follower::nfs_copy_remote_files(const host_port &remote_node, const std::string &remote_disk, const std::string &remote_dir, std::vector &file_list, diff --git a/src/replica/duplication/replica_follower.h b/src/replica/duplication/replica_follower.h index 8a8842fccf..83dcb446ac 100644 --- a/src/replica/duplication/replica_follower.h +++ b/src/replica/duplication/replica_follower.h @@ -27,6 +27,7 @@ #include "dsn.layer2_types.h" #include "replica/replica_base.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task_tracker.h" #include "utils/error_code.h" #include "utils/zlocks.h" @@ -47,7 +48,7 @@ class replica_follower : replica_base const std::string &get_master_app_name() const { return _master_app_name; }; - const std::vector &get_master_meta_list() const { return _master_meta_list; }; + const std::vector &get_master_meta_list() const { return _master_meta_list; }; const bool is_need_duplicate() const { return need_duplicate; } @@ -59,7 +60,7 @@ class replica_follower : replica_base std::string _master_cluster_name; std::string _master_app_name; - std::vector _master_meta_list; + std::vector _master_meta_list; partition_configuration _master_replica_config; bool need_duplicate{false}; @@ -69,7 +70,7 @@ class replica_follower : replica_base error_code update_master_replica_config(error_code err, query_cfg_response &&resp); void copy_master_replica_checkpoint(); error_code nfs_copy_checkpoint(error_code err, learn_response &&resp); - void nfs_copy_remote_files(const rpc_address &remote_node, + void nfs_copy_remote_files(const host_port &remote_node, const std::string &remote_disk, const std::string &remote_dir, std::vector &file_list, @@ -78,9 +79,10 @@ class replica_follower : replica_base std::string master_replica_name() { std::string app_info = fmt::format("{}.{}", _master_cluster_name, _master_app_name); - if (_master_replica_config.primary != rpc_address::s_invalid_address) { - return fmt::format("{}({}|{})", + if (_master_replica_config.hp_primary != host_port::s_invalid_host_port) { + return fmt::format("{}({}({})|{})", app_info, + _master_replica_config.hp_primary.to_string(), _master_replica_config.primary.to_string(), _master_replica_config.pid.to_string()); } diff --git a/src/replica/duplication/test/replica_follower_test.cpp b/src/replica/duplication/test/replica_follower_test.cpp index e952f115b3..e18d39a917 100644 --- a/src/replica/duplication/test/replica_follower_test.cpp +++ b/src/replica/duplication/test/replica_follower_test.cpp @@ -31,7 +31,9 @@ #include "nfs/nfs_node.h" #include "replica/duplication/replica_follower.h" #include "replica/test/mock_utils.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task_tracker.h" #include "utils/autoref_ptr.h" #include "utils/error_code.h" @@ -104,7 +106,8 @@ class replica_follower_test : public duplication_test_base void init_nfs() { - stub->_nfs = nfs_node::create(); + auto resolver = std::make_shared(); + stub->_nfs = nfs_node::create(resolver); stub->_nfs->start(); } @@ -119,7 +122,7 @@ TEST_P(replica_follower_test, test_init_master_info) { _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterClusterKey, "master"); _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterMetasKey, - "127.0.0.1:34801,127.0.0.2:34801,127.0.0.3:34802"); + "127.0.0.1:34801,127.0.0.1:34802,127.0.0.1:34803"); update_mock_replica(_app_info); auto follower = _mock_replica->get_replica_follower(); @@ -127,7 +130,7 @@ TEST_P(replica_follower_test, test_init_master_info) ASSERT_EQ(follower->get_master_cluster_name(), "master"); ASSERT_TRUE(follower->is_need_duplicate()); ASSERT_TRUE(_mock_replica->is_duplication_follower()); - std::vector test_ip{"127.0.0.1:34801", "127.0.0.2:34801", "127.0.0.3:34802"}; + std::vector test_ip{"127.0.0.1:34801", "127.0.0.1:34802", "127.0.0.1:34803"}; for (int i = 0; i < follower->get_master_meta_list().size(); i++) { ASSERT_EQ(std::string(follower->get_master_meta_list()[i].to_string()), test_ip[i]); } @@ -143,7 +146,7 @@ TEST_P(replica_follower_test, test_duplicate_checkpoint) { _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterClusterKey, "master"); _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterMetasKey, - "127.0.0.1:34801,127.0.0.2:34801,127.0.0.3:34802"); + "127.0.0.1:34801,127.0.0.1:34802,127.0.0.1:34803"); update_mock_replica(_app_info); auto follower = _mock_replica->get_replica_follower(); @@ -163,7 +166,7 @@ TEST_P(replica_follower_test, test_async_duplicate_checkpoint_from_master_replic { _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterClusterKey, "master"); _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterMetasKey, - "127.0.0.1:34801,127.0.0.2:34801,127.0.0.3:34802"); + "127.0.0.1:34801,127.0.0.1:34802,127.0.0.1:34803"); update_mock_replica(_app_info); auto follower = _mock_replica->get_replica_follower(); @@ -185,17 +188,19 @@ TEST_P(replica_follower_test, test_update_master_replica_config) { _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterClusterKey, "master"); _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterMetasKey, - "127.0.0.1:34801,127.0.0.2:34801,127.0.0.3:34802"); + "127.0.0.1:34801,127.0.0.1:34802,127.0.0.1:34803"); update_mock_replica(_app_info); auto follower = _mock_replica->get_replica_follower(); query_cfg_response resp; ASSERT_EQ(update_master_replica_config(follower, resp), ERR_INCONSISTENT_STATE); ASSERT_EQ(master_replica_config(follower).primary, rpc_address::s_invalid_address); + ASSERT_EQ(master_replica_config(follower).hp_primary, host_port::s_invalid_host_port); resp.partition_count = 100; ASSERT_EQ(update_master_replica_config(follower, resp), ERR_INCONSISTENT_STATE); ASSERT_EQ(master_replica_config(follower).primary, rpc_address::s_invalid_address); + ASSERT_EQ(master_replica_config(follower).hp_primary, host_port::s_invalid_host_port); resp.partition_count = _app_info.partition_count; partition_configuration p; @@ -203,28 +208,35 @@ TEST_P(replica_follower_test, test_update_master_replica_config) resp.partitions.emplace_back(p); ASSERT_EQ(update_master_replica_config(follower, resp), ERR_INVALID_DATA); ASSERT_EQ(master_replica_config(follower).primary, rpc_address::s_invalid_address); + ASSERT_EQ(master_replica_config(follower).hp_primary, host_port::s_invalid_host_port); resp.partitions.clear(); p.pid = gpid(2, 100); resp.partitions.emplace_back(p); ASSERT_EQ(update_master_replica_config(follower, resp), ERR_INCONSISTENT_STATE); ASSERT_EQ(master_replica_config(follower).primary, rpc_address::s_invalid_address); + ASSERT_EQ(master_replica_config(follower).hp_primary, host_port::s_invalid_host_port); resp.partitions.clear(); p.primary = rpc_address::s_invalid_address; + p.__set_hp_primary(host_port::s_invalid_host_port); p.pid = gpid(2, 1); resp.partitions.emplace_back(p); ASSERT_EQ(update_master_replica_config(follower, resp), ERR_INVALID_STATE); ASSERT_EQ(master_replica_config(follower).primary, rpc_address::s_invalid_address); + ASSERT_EQ(master_replica_config(follower).hp_primary, host_port::s_invalid_host_port); resp.partitions.clear(); p.pid = gpid(2, 1); p.primary = rpc_address("127.0.0.1", 34801); - p.secondaries.emplace_back(rpc_address("127.0.0.2", 34801)); - p.secondaries.emplace_back(rpc_address("127.0.0.3", 34801)); + p.__set_hp_primary(host_port("localhost", 34801)); + p.__set_hp_secondaries({}); + p.hp_secondaries.emplace_back(host_port("localhost", 34802)); + p.hp_secondaries.emplace_back(host_port("localhost", 34803)); resp.partitions.emplace_back(p); ASSERT_EQ(update_master_replica_config(follower, resp), ERR_OK); ASSERT_EQ(master_replica_config(follower).primary, p.primary); + ASSERT_EQ(master_replica_config(follower).hp_primary, p.hp_primary); ASSERT_EQ(master_replica_config(follower).pid, p.pid); } @@ -232,7 +244,7 @@ TEST_P(replica_follower_test, test_nfs_copy_checkpoint) { _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterClusterKey, "master"); _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterMetasKey, - "127.0.0.1:34801,127.0.0.2:34801,127.0.0.3:34802"); + "127.0.0.1:34801,127.0.0.1:34802,127.0.0.1:34803"); update_mock_replica(_app_info); init_nfs(); auto follower = _mock_replica->get_replica_follower(); @@ -241,6 +253,7 @@ TEST_P(replica_follower_test, test_nfs_copy_checkpoint) auto resp = learn_response(); resp.address = rpc_address("127.0.0.1", 34801); + resp.__set_hp_address(host_port("localhost", 34801)); std::string dest = utils::filesystem::path_combine( _mock_replica->dir(), duplication_constants::kDuplicationCheckpointRootDir); diff --git a/src/replica/replica.cpp b/src/replica/replica.cpp index 1286d74526..f23bb9f9e3 100644 --- a/src/replica/replica.cpp +++ b/src/replica/replica.cpp @@ -273,7 +273,7 @@ replica::replica(replica_stub *stub, bool need_restore, bool is_duplication_follower) : serverlet("replica"), - replica_base(gpid, fmt::format("{}@{}", gpid, stub->_primary_address_str), app.app_name), + replica_base(gpid, fmt::format("{}@{}", gpid, stub->_primary_host_port_cache), app.app_name), _app_info(app), _primary_states(gpid, FLAGS_staleness_for_commit, FLAGS_batch_write_disabled), _potential_secondary_states(this), @@ -319,7 +319,8 @@ replica::replica(replica_stub *stub, METRIC_VAR_INIT_replica(backup_cancelled_count), METRIC_VAR_INIT_replica(backup_file_upload_failed_count), METRIC_VAR_INIT_replica(backup_file_upload_successful_count), - METRIC_VAR_INIT_replica(backup_file_upload_total_bytes) + METRIC_VAR_INIT_replica(backup_file_upload_total_bytes), + _dns_resolver(stub->get_dns_resolver()) { CHECK(!_app_info.app_type.empty(), ""); CHECK_NOTNULL(stub, ""); diff --git a/src/replica/replica.h b/src/replica/replica.h index e552a33b0b..7e609671f8 100644 --- a/src/replica/replica.h +++ b/src/replica/replica.h @@ -67,8 +67,9 @@ class rocksdb_wrapper_test; } // namespace pegasus namespace dsn { +class dns_resolver; class gpid; -class rpc_address; +class host_port; namespace dist { namespace block_service { @@ -289,6 +290,8 @@ class replica : public serverlet, public ref_counter, public replica_ba METRIC_DEFINE_INCREMENT(backup_file_upload_successful_count) METRIC_DEFINE_INCREMENT_BY(backup_file_upload_total_bytes) + std::shared_ptr get_dns_resolver() { return _dns_resolver; } + static const std::string kAppInfo; protected: @@ -321,7 +324,7 @@ class replica : public serverlet, public ref_counter, public replica_ba // See more about it in `replica_bulk_loader.cpp` void init_prepare(mutation_ptr &mu, bool reconciliation, bool pop_all_committed_mutations = false); - void send_prepare_message(::dsn::rpc_address addr, + void send_prepare_message(::dsn::host_port addr, partition_status::type status, const mutation_ptr &mu, int timeout_milliseconds, @@ -347,7 +350,7 @@ class replica : public serverlet, public ref_counter, public replica_ba learn_response &&resp); void on_learn_remote_state_completed(error_code err); void handle_learning_error(error_code err, bool is_local_error); - error_code handle_learning_succeeded_on_primary(::dsn::rpc_address node, + error_code handle_learning_succeeded_on_primary(::dsn::host_port node, uint64_t learn_signature); void notify_learn_completion(); error_code apply_learned_state_from_private_log(learn_state &state); @@ -376,7 +379,7 @@ class replica : public serverlet, public ref_counter, public replica_ba // failure handling void handle_local_failure(error_code error); void handle_remote_failure(partition_status::type status, - ::dsn::rpc_address node, + ::dsn::host_port node, error_code error, const std::string &caused_by); @@ -384,12 +387,12 @@ class replica : public serverlet, public ref_counter, public replica_ba // reconfiguration void assign_primary(configuration_update_request &proposal); void add_potential_secondary(configuration_update_request &proposal); - void upgrade_to_secondary_on_primary(::dsn::rpc_address node); + void upgrade_to_secondary_on_primary(::dsn::host_port node); void downgrade_to_secondary_on_primary(configuration_update_request &proposal); void downgrade_to_inactive_on_primary(configuration_update_request &proposal); void remove(configuration_update_request &proposal); void update_configuration_on_meta_server(config_type::type type, - ::dsn::rpc_address node, + ::dsn::host_port node, partition_configuration &newConfig); void on_update_configuration_on_meta_server_reply(error_code err, @@ -698,6 +701,8 @@ class replica : public serverlet, public ref_counter, public replica_ba bool _allow_ingest_behind{false}; // Indicate where the storage engine data is corrupted and unrecoverable. bool _data_corrupted{false}; + + std::shared_ptr _dns_resolver; }; typedef dsn::ref_ptr replica_ptr; } // namespace replication diff --git a/src/replica/replica_2pc.cpp b/src/replica/replica_2pc.cpp index 0e335d49b1..df18139690 100644 --- a/src/replica/replica_2pc.cpp +++ b/src/replica/replica_2pc.cpp @@ -56,7 +56,9 @@ #include "replica/replication_app_base.h" #include "replica_stub.h" #include "runtime/api_layer1.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/rpc_stream.h" #include "runtime/rpc/serialization.h" @@ -198,7 +200,7 @@ void replica::on_client_write(dsn::message_ex *request, bool ignore_throttling) LOG_INFO_PREFIX("receive bulk load ingestion request"); // bulk load ingestion request requires that all secondaries should be alive - if (static_cast(_primary_states.membership.secondaries.size()) + 1 < + if (static_cast(_primary_states.membership.hp_secondaries.size()) + 1 < _primary_states.membership.max_replica_count) { response_client_write(request, ERR_NOT_ENOUGH_MEMBER); return; @@ -207,7 +209,7 @@ void replica::on_client_write(dsn::message_ex *request, bool ignore_throttling) _bulk_load_ingestion_start_time_ms = dsn_now_ms(); } - if (static_cast(_primary_states.membership.secondaries.size()) + 1 < + if (static_cast(_primary_states.membership.hp_secondaries.size()) + 1 < _options->app_mutation_2pc_min_replica_count(_app_info.max_replica_count)) { response_client_write(request, ERR_NOT_ENOUGH_MEMBER); return; @@ -267,7 +269,7 @@ void replica::init_prepare(mutation_ptr &mu, bool reconciliation, bool pop_all_c break; } LOG_INFO_PREFIX("try to prepare bulk load mutation({})", mu->name()); - if (static_cast(_primary_states.membership.secondaries.size()) + 1 < + if (static_cast(_primary_states.membership.hp_secondaries.size()) + 1 < _primary_states.membership.max_replica_count) { err = ERR_NOT_ENOUGH_MEMBER; break; @@ -280,7 +282,7 @@ void replica::init_prepare(mutation_ptr &mu, bool reconciliation, bool pop_all_c // stop prepare if there are too few replicas unless it's a reconciliation // for reconciliation, we should ensure every prepared mutation to be committed // please refer to PacificA paper - if (static_cast(_primary_states.membership.secondaries.size()) + 1 < + if (static_cast(_primary_states.membership.hp_secondaries.size()) + 1 < _options->app_mutation_2pc_min_replica_count(_app_info.max_replica_count) && !reconciliation) { err = ERR_NOT_ENOUGH_MEMBER; @@ -297,11 +299,10 @@ void replica::init_prepare(mutation_ptr &mu, bool reconciliation, bool pop_all_c // remote prepare mu->set_prepare_ts(); - mu->set_left_secondary_ack_count((unsigned int)_primary_states.membership.secondaries.size()); - for (auto it = _primary_states.membership.secondaries.begin(); - it != _primary_states.membership.secondaries.end(); - ++it) { - send_prepare_message(*it, + mu->set_left_secondary_ack_count( + (unsigned int)_primary_states.membership.hp_secondaries.size()); + for (const auto &secondary : _primary_states.membership.hp_secondaries) { + send_prepare_message(secondary, partition_status::PS_SECONDARY, mu, FLAGS_prepare_timeout_ms_for_secondaries, @@ -356,15 +357,15 @@ void replica::init_prepare(mutation_ptr &mu, bool reconciliation, bool pop_all_c return; } -void replica::send_prepare_message(::dsn::rpc_address addr, +void replica::send_prepare_message(::dsn::host_port hp, partition_status::type status, const mutation_ptr &mu, int timeout_milliseconds, bool pop_all_committed_mutations, int64_t learn_signature) { - mu->_tracer->add_sub_tracer(addr.to_string()); - ADD_POINT(mu->_tracer->sub_tracer(addr.to_string())); + mu->_tracer->add_sub_tracer(hp.to_string()); + ADD_POINT(mu->_tracer->sub_tracer(hp.to_string())); dsn::message_ex *msg = dsn::message_ex::create_request( RPC_PREPARE, timeout_milliseconds, get_gpid().thread_hash()); @@ -382,8 +383,8 @@ void replica::send_prepare_message(::dsn::rpc_address addr, mu->write_to(writer, msg); } - mu->remote_tasks()[addr] = - rpc::call(addr, + mu->remote_tasks()[hp] = + rpc::call(_dns_resolver->resolve_address(hp), msg, &_tracker, [=](error_code err, dsn::message_ex *request, dsn::message_ex *reply) { @@ -393,7 +394,7 @@ void replica::send_prepare_message(::dsn::rpc_address addr, LOG_DEBUG_PREFIX("mutation {} send_prepare_message to {} as {}", mu->name(), - addr, + hp, enum_to_string(rconfig.status)); } @@ -623,7 +624,7 @@ void replica::on_prepare_reply(std::pair p CHECK_EQ_MSG(mu->data.header.ballot, get_ballot(), "{}: invalid mutation ballot", mu->name()); - ::dsn::rpc_address node = request->to_address; + ::dsn::host_port node = request->to_host_port; partition_status::type st = _primary_states.get_node_status(node); // handle reply @@ -636,7 +637,7 @@ void replica::on_prepare_reply(std::pair p ::dsn::unmarshall(reply, resp); } - auto send_prepare_tracer = mu->_tracer->sub_tracer(request->to_address.to_string()); + auto send_prepare_tracer = mu->_tracer->sub_tracer(request->to_host_port.to_string()); APPEND_EXTERN_POINT(send_prepare_tracer, resp.receive_timestamp, "remote_receive"); APPEND_EXTERN_POINT(send_prepare_tracer, resp.response_timestamp, "remote_reply"); ADD_CUSTOM_POINT(send_prepare_tracer, resp.err.to_string()); diff --git a/src/replica/replica_check.cpp b/src/replica/replica_check.cpp index ae12c2bff6..c3f4ea78d0 100644 --- a/src/replica/replica_check.cpp +++ b/src/replica/replica_check.cpp @@ -47,7 +47,9 @@ #include "replica/replication_app_base.h" #include "replica_stub.h" #include "runtime/api_layer1.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/async_calls.h" #include "runtime/task/task.h" #include "split/replica_split_manager.h" @@ -115,14 +117,16 @@ void replica::broadcast_group_check() } for (auto it = _primary_states.statuses.begin(); it != _primary_states.statuses.end(); ++it) { - if (it->first == _stub->_primary_address) + if (it->first == _stub->primary_host_port()) continue; - ::dsn::rpc_address addr = it->first; + auto hp = it->first; + auto addr = _dns_resolver->resolve_address(hp); std::shared_ptr request(new group_check_request); request->app = _app_info; request->node = addr; + request->__set_hp_node(hp); _primary_states.get_replica_config(it->second, request->config); request->last_committed_decree = last_committed_decree(); request->__set_confirmed_decree(_duplication_mgr->min_confirmed_decree()); @@ -136,12 +140,12 @@ void replica::broadcast_group_check() } if (request->config.status == partition_status::PS_POTENTIAL_SECONDARY) { - auto it = _primary_states.learners.find(addr); - CHECK(it != _primary_states.learners.end(), "learner {} is missing", addr); + auto it = _primary_states.learners.find(hp); + CHECK(it != _primary_states.learners.end(), "learner {} is missing", hp); request->config.learner_signature = it->second.signature; } - LOG_INFO_PREFIX("send group check to {} with state {}", addr, enum_to_string(it->second)); + LOG_INFO_PREFIX("send group check to {} with state {}", hp, enum_to_string(it->second)); dsn::task_ptr callback_task = rpc::call(addr, @@ -155,7 +159,7 @@ void replica::broadcast_group_check() std::chrono::milliseconds(0), get_gpid().thread_hash()); - _primary_states.group_check_pending_replies[addr] = callback_task; + _primary_states.group_check_pending_replies[hp] = callback_task; } // send empty prepare when necessary @@ -172,8 +176,9 @@ void replica::on_group_check(const group_check_request &request, { _checker.only_one_thread_access(); - LOG_INFO_PREFIX("process group check, primary = {}, ballot = {}, status = {}, " + LOG_INFO_PREFIX("process group check, primary = {}({}), ballot = {}, status = {}, " "last_committed_decree = {}, confirmed_decree = {}", + request.config.hp_primary, request.config.primary, request.config.ballot, enum_to_string(request.config.status), @@ -217,7 +222,8 @@ void replica::on_group_check(const group_check_request &request, } response.pid = get_gpid(); - response.node = _stub->_primary_address; + response.node = _stub->primary_address(); + response.__set_hp_node(_stub->primary_host_port()); response.err = ERR_OK; if (status() == partition_status::PS_ERROR) { response.err = ERR_INVALID_STATE; @@ -236,27 +242,29 @@ void replica::on_group_check_reply(error_code err, { _checker.only_one_thread_access(); + host_port hp_node; + GET_HOST_PORT(*req, node, hp_node); if (partition_status::PS_PRIMARY != status() || req->config.ballot < get_ballot()) { return; } - auto r = _primary_states.group_check_pending_replies.erase(req->node); - CHECK_EQ_MSG(r, 1, "invalid node address, address = {}", req->node); + auto r = _primary_states.group_check_pending_replies.erase(hp_node); + CHECK_EQ_MSG(r, 1, "invalid node address, address = {}({})", hp_node, req->node); if (err != ERR_OK || resp->err != ERR_OK) { if (ERR_OK == err) { err = resp->err; } - handle_remote_failure(req->config.status, req->node, err, "group check"); + handle_remote_failure(req->config.status, hp_node, err, "group check"); METRIC_VAR_INCREMENT(group_check_failed_requests); } else { if (resp->learner_status_ == learner_status::LearningSucceeded && req->config.status == partition_status::PS_POTENTIAL_SECONDARY) { - handle_learning_succeeded_on_primary(req->node, resp->learner_signature); + handle_learning_succeeded_on_primary(hp_node, resp->learner_signature); } _split_mgr->primary_parent_handle_stop_split(req, resp); if (req->config.status == partition_status::PS_SECONDARY) { - _primary_states.secondary_disk_status[req->node] = resp->disk_status; + _primary_states.secondary_disk_status[hp_node] = resp->disk_status; } } } diff --git a/src/replica/replica_chkpt.cpp b/src/replica/replica_chkpt.cpp index d255573dd9..cf50edb078 100644 --- a/src/replica/replica_chkpt.cpp +++ b/src/replica/replica_chkpt.cpp @@ -256,7 +256,8 @@ void replica::on_query_last_checkpoint(/*out*/ learn_response &response) // for example: base_local_dir = "./data" + "checkpoint.1024" = "./data/checkpoint.1024" response.base_local_dir = utils::filesystem::path_combine( _app->data_dir(), checkpoint_folder(response.state.to_decree_included)); - response.address = _stub->_primary_address; + response.address = _stub->primary_address(); + response.__set_hp_address(_stub->primary_host_port()); for (auto &file : response.state.files) { // response.state.files contain file absolute path, for example: // "./data/checkpoint.1024/1.sst" use `substr` to get the file name: 1.sst diff --git a/src/replica/replica_config.cpp b/src/replica/replica_config.cpp index bf2e07f503..e41078b649 100644 --- a/src/replica/replica_config.cpp +++ b/src/replica/replica_config.cpp @@ -58,7 +58,9 @@ #include "replica/replication_app_base.h" #include "replica_stub.h" #include "runtime/api_layer1.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "security/access_controller.h" @@ -143,7 +145,7 @@ void replica::on_config_proposal(configuration_update_request &proposal) void replica::assign_primary(configuration_update_request &proposal) { - CHECK_EQ(proposal.node, _stub->_primary_address); + CHECK_EQ(proposal.hp_node, _stub->primary_host_port()); if (status() == partition_status::PS_PRIMARY) { LOG_WARNING_PREFIX("invalid assgin primary proposal as the node is in {}", @@ -162,10 +164,12 @@ void replica::assign_primary(configuration_update_request &proposal) return; } - proposal.config.primary = _stub->_primary_address; - replica_helper::remove_node(_stub->_primary_address, proposal.config.secondaries); + proposal.config.primary = _stub->primary_address(); + proposal.config.__set_hp_primary(_stub->primary_host_port()); + replica_helper::remove_node(_stub->primary_address(), proposal.config.secondaries); + replica_helper::remove_node(_stub->primary_host_port(), proposal.config.hp_secondaries); - update_configuration_on_meta_server(proposal.type, proposal.node, proposal.config); + update_configuration_on_meta_server(proposal.type, proposal.hp_node, proposal.config); } // run on primary to send ADD_LEARNER request to candidate replica server @@ -179,20 +183,20 @@ void replica::add_potential_secondary(configuration_update_request &proposal) CHECK_EQ(proposal.config.ballot, get_ballot()); CHECK_EQ(proposal.config.pid, _primary_states.membership.pid); - CHECK_EQ(proposal.config.primary, _primary_states.membership.primary); - CHECK(proposal.config.secondaries == _primary_states.membership.secondaries, ""); - CHECK(!_primary_states.check_exist(proposal.node, partition_status::PS_PRIMARY), + CHECK_EQ(proposal.config.hp_primary, _primary_states.membership.hp_primary); + CHECK(proposal.config.hp_secondaries == _primary_states.membership.hp_secondaries, ""); + CHECK(!_primary_states.check_exist(proposal.hp_node, partition_status::PS_PRIMARY), "node = {}", - proposal.node); - CHECK(!_primary_states.check_exist(proposal.node, partition_status::PS_SECONDARY), + proposal.hp_node); + CHECK(!_primary_states.check_exist(proposal.hp_node, partition_status::PS_SECONDARY), "node = {}", - proposal.node); + proposal.hp_node); int potential_secondaries_count = - _primary_states.membership.secondaries.size() + _primary_states.learners.size(); + _primary_states.membership.hp_secondaries.size() + _primary_states.learners.size(); if (potential_secondaries_count >= _primary_states.membership.max_replica_count - 1) { if (proposal.type == config_type::CT_ADD_SECONDARY) { - if (_primary_states.learners.find(proposal.node) == _primary_states.learners.end()) { + if (_primary_states.learners.find(proposal.hp_node) == _primary_states.learners.end()) { LOG_INFO_PREFIX( "already have enough secondaries or potential secondaries, ignore new " "potential secondary proposal"); @@ -204,7 +208,8 @@ void replica::add_potential_secondary(configuration_update_request &proposal) "secondary proposal"); return; } else { - LOG_INFO_PREFIX("add a new secondary({}) for future load balancer", proposal.node); + LOG_INFO_PREFIX("add a new secondary({}) for future load balancer", + proposal.hp_node); } } else { CHECK(false, "invalid config_type, type = {}", enum_to_string(proposal.type)); @@ -215,38 +220,44 @@ void replica::add_potential_secondary(configuration_update_request &proposal) state.prepare_start_decree = invalid_decree; state.timeout_task = nullptr; // TODO: add timer for learner task - auto it = _primary_states.learners.find(proposal.node); + auto it = _primary_states.learners.find(proposal.hp_node); if (it != _primary_states.learners.end()) { state.signature = it->second.signature; } else { state.signature = ++_primary_states.next_learning_version; - _primary_states.learners[proposal.node] = state; - _primary_states.statuses[proposal.node] = partition_status::PS_POTENTIAL_SECONDARY; + _primary_states.learners[proposal.hp_node] = state; + _primary_states.statuses[host_port(proposal.hp_node)] = + partition_status::PS_POTENTIAL_SECONDARY; } group_check_request request; request.app = _app_info; request.node = proposal.node; + request.__set_hp_node(proposal.hp_node); _primary_states.get_replica_config( partition_status::PS_POTENTIAL_SECONDARY, request.config, state.signature); request.last_committed_decree = last_committed_decree(); LOG_INFO_PREFIX("call one way {} to start learning with signature [{:#018x}]", - proposal.node, + proposal.hp_node, state.signature); rpc::call_one_way_typed( proposal.node, RPC_LEARN_ADD_LEARNER, request, get_gpid().thread_hash()); } -void replica::upgrade_to_secondary_on_primary(::dsn::rpc_address node) +void replica::upgrade_to_secondary_on_primary(::dsn::host_port node) { LOG_INFO_PREFIX("upgrade potential secondary {} to secondary", node); partition_configuration newConfig = _primary_states.membership; // add secondary - newConfig.secondaries.push_back(node); + if (!newConfig.__isset.hp_secondaries) { + newConfig.__set_hp_secondaries({}); + } + newConfig.hp_secondaries.push_back(node); + newConfig.secondaries.push_back(_stub->get_dns_resolver()->resolve_address(node)); update_configuration_on_meta_server(config_type::CT_UPGRADE_TO_SECONDARY, node, newConfig); } @@ -257,15 +268,20 @@ void replica::downgrade_to_secondary_on_primary(configuration_update_request &pr return; CHECK_EQ(proposal.config.pid, _primary_states.membership.pid); - CHECK_EQ(proposal.config.primary, _primary_states.membership.primary); - CHECK(proposal.config.secondaries == _primary_states.membership.secondaries, ""); - CHECK_EQ(proposal.node, proposal.config.primary); + CHECK_EQ(proposal.config.hp_primary, _primary_states.membership.hp_primary); + CHECK(proposal.config.hp_secondaries == _primary_states.membership.hp_secondaries, ""); + CHECK_EQ(proposal.hp_node, proposal.config.hp_primary); proposal.config.primary.set_invalid(); + proposal.config.__set_hp_primary(host_port()); proposal.config.secondaries.push_back(proposal.node); + if (!proposal.config.__isset.hp_secondaries) { + proposal.config.__set_hp_secondaries({}); + } + proposal.config.hp_secondaries.push_back(proposal.hp_node); update_configuration_on_meta_server( - config_type::CT_DOWNGRADE_TO_SECONDARY, proposal.node, proposal.config); + config_type::CT_DOWNGRADE_TO_SECONDARY, proposal.hp_node, proposal.config); } void replica::downgrade_to_inactive_on_primary(configuration_update_request &proposal) @@ -274,19 +290,21 @@ void replica::downgrade_to_inactive_on_primary(configuration_update_request &pro return; CHECK_EQ(proposal.config.pid, _primary_states.membership.pid); - CHECK_EQ(proposal.config.primary, _primary_states.membership.primary); - CHECK(proposal.config.secondaries == _primary_states.membership.secondaries, ""); + CHECK_EQ(proposal.config.hp_primary, _primary_states.membership.hp_primary); + CHECK(proposal.config.hp_secondaries == _primary_states.membership.hp_secondaries, ""); - if (proposal.node == proposal.config.primary) { + if (proposal.hp_node == proposal.config.hp_primary) { proposal.config.primary.set_invalid(); + proposal.config.hp_primary.reset(); } else { - CHECK(replica_helper::remove_node(proposal.node, proposal.config.secondaries), + CHECK(replica_helper::remove_node(proposal.node, proposal.config.secondaries) && + replica_helper::remove_node(proposal.hp_node, proposal.config.hp_secondaries), "remove node failed, node = {}", proposal.node); } update_configuration_on_meta_server( - config_type::CT_DOWNGRADE_TO_INACTIVE, proposal.node, proposal.config); + config_type::CT_DOWNGRADE_TO_INACTIVE, proposal.hp_node, proposal.config); } void replica::remove(configuration_update_request &proposal) @@ -295,18 +313,20 @@ void replica::remove(configuration_update_request &proposal) return; CHECK_EQ(proposal.config.pid, _primary_states.membership.pid); - CHECK_EQ(proposal.config.primary, _primary_states.membership.primary); - CHECK(proposal.config.secondaries == _primary_states.membership.secondaries, ""); + CHECK_EQ(proposal.config.hp_primary, _primary_states.membership.hp_primary); + CHECK(proposal.config.hp_secondaries == _primary_states.membership.hp_secondaries, ""); - auto st = _primary_states.get_node_status(proposal.node); + auto st = _primary_states.get_node_status(host_port(proposal.node)); switch (st) { case partition_status::PS_PRIMARY: - CHECK_EQ(proposal.config.primary, proposal.node); + CHECK_EQ(proposal.config.hp_primary, proposal.hp_node); proposal.config.primary.set_invalid(); + proposal.config.hp_primary.reset(); break; case partition_status::PS_SECONDARY: { - CHECK(replica_helper::remove_node(proposal.node, proposal.config.secondaries), + CHECK(replica_helper::remove_node(proposal.node, proposal.config.secondaries) && + replica_helper::remove_node(proposal.hp_node, proposal.config.hp_secondaries), "remove_node failed, node = {}", proposal.node); } break; @@ -316,7 +336,7 @@ void replica::remove(configuration_update_request &proposal) break; } - update_configuration_on_meta_server(config_type::CT_REMOVE, proposal.node, proposal.config); + update_configuration_on_meta_server(config_type::CT_REMOVE, proposal.hp_node, proposal.config); } // from primary @@ -348,7 +368,7 @@ void replica::on_remove(const replica_configuration &request) } void replica::update_configuration_on_meta_server(config_type::type type, - ::dsn::rpc_address node, + ::dsn::host_port node, partition_configuration &newConfig) { // type should never be `CT_REGISTER_CHILD` @@ -362,7 +382,7 @@ void replica::update_configuration_on_meta_server(config_type::type type, CHECK(status() == partition_status::PS_INACTIVE && _inactive_is_transient && _is_initializing, ""); - CHECK_EQ(newConfig.primary, node); + CHECK_EQ(newConfig.hp_primary, node); } else if (type != config_type::CT_ASSIGN_PRIMARY && type != config_type::CT_UPGRADE_TO_PRIMARY) { CHECK_EQ(status(), partition_status::PS_PRIMARY); @@ -383,7 +403,8 @@ void replica::update_configuration_on_meta_server(config_type::type type, request->config = newConfig; request->config.ballot++; request->type = type; - request->node = node; + request->node = _stub->get_dns_resolver()->resolve_address(node); + request->__set_hp_node(node); ::dsn::marshall(msg, *request); @@ -397,7 +418,8 @@ void replica::update_configuration_on_meta_server(config_type::type type, enum_to_string(request->type), request->node); - rpc_address target(_stub->_failure_detector->get_servers()); + rpc_address target( + _stub->get_dns_resolver()->resolve_address(_stub->_failure_detector->get_servers())); _primary_states.reconfiguration_task = rpc::call(target, msg, @@ -438,7 +460,8 @@ void replica::on_update_configuration_on_meta_server_reply( LPC_DELAY_UPDATE_CONFIG, &_tracker, [ this, request, req2 = std::move(req) ]() { - rpc_address target(_stub->_failure_detector->get_servers()); + rpc_address target(_stub->get_dns_resolver()->resolve_address( + _stub->_failure_detector->get_servers())); rpc_response_task_ptr t = rpc::create_rpc_response_task( request, &_tracker, @@ -474,8 +497,8 @@ void replica::on_update_configuration_on_meta_server_reply( // post-update work items? if (resp.err == ERR_OK) { CHECK_EQ(req->config.pid, resp.config.pid); - CHECK_EQ(req->config.primary, resp.config.primary); - CHECK(req->config.secondaries == resp.config.secondaries, ""); + CHECK_EQ(req->config.hp_primary, resp.config.hp_primary); + CHECK(req->config.hp_secondaries == resp.config.hp_secondaries, ""); switch (req->type) { case config_type::CT_UPGRADE_TO_PRIMARY: @@ -489,9 +512,9 @@ void replica::on_update_configuration_on_meta_server_reply( case config_type::CT_UPGRADE_TO_SECONDARY: break; case config_type::CT_REMOVE: - if (req->node != _stub->_primary_address) { + if (req->hp_node != _stub->primary_host_port()) { replica_configuration rconfig; - replica_helper::get_replica_config(resp.config, req->node, rconfig); + replica_helper::get_replica_config(resp.config, req->hp_node, rconfig); rpc::call_one_way_typed( req->node, RPC_REMOVE_REPLICA, rconfig, get_gpid().thread_hash()); } @@ -616,11 +639,11 @@ bool replica::update_configuration(const partition_configuration &config) CHECK_GE(config.ballot, get_ballot()); replica_configuration rconfig; - replica_helper::get_replica_config(config, _stub->_primary_address, rconfig); + replica_helper::get_replica_config(config, _stub->primary_host_port(), rconfig); if (rconfig.status == partition_status::PS_PRIMARY && (rconfig.ballot > get_ballot() || status() != partition_status::PS_PRIMARY)) { - _primary_states.reset_membership(config, config.primary != _stub->_primary_address); + _primary_states.reset_membership(config, config.hp_primary != _stub->primary_host_port()); } if (config.ballot > get_ballot() || @@ -1007,7 +1030,7 @@ bool replica::update_local_configuration(const replica_configuration &config, init_prepare(next, false); } - if (_primary_states.membership.secondaries.size() + 1 < + if (_primary_states.membership.hp_secondaries.size() + 1 < _options->app_mutation_2pc_min_replica_count(_app_info.max_replica_count)) { std::vector queued; _primary_states.write_queue.clear(queued); @@ -1057,10 +1080,10 @@ void replica::on_config_sync(const app_info &info, } else { if (_is_initializing) { // in initializing, when replica still primary, need to inc ballot - if (config.primary == _stub->_primary_address && + if (config.hp_primary == _stub->primary_host_port() && status() == partition_status::PS_INACTIVE && _inactive_is_transient) { update_configuration_on_meta_server(config_type::CT_PRIMARY_FORCE_UPDATE_BALLOT, - config.primary, + config.hp_primary, const_cast(config)); return; } @@ -1070,9 +1093,10 @@ void replica::on_config_sync(const app_info &info, update_configuration(config); if (status() == partition_status::PS_INACTIVE && !_inactive_is_transient) { - if (config.primary == _stub->_primary_address // dead primary + if (config.hp_primary == _stub->primary_host_port() // dead primary || - config.primary.is_invalid() // primary is dead (otherwise let primary remove this) + config.hp_primary + .is_invalid() // primary is dead (otherwise let primary remove this) ) { LOG_INFO_PREFIX("downgrade myself as inactive is not transient, remote_config({})", boost::lexical_cast(config)); diff --git a/src/replica/replica_context.cpp b/src/replica/replica_context.cpp index f557463168..07490bc50d 100644 --- a/src/replica/replica_context.cpp +++ b/src/replica/replica_context.cpp @@ -34,6 +34,7 @@ #include "replica.h" #include "replica_context.h" #include "replica_stub.h" +#include "runtime/rpc/rpc_address.h" #include "utils/error_code.h" namespace dsn { @@ -104,11 +105,11 @@ void primary_context::reset_membership(const partition_configuration &config, bo membership = config; - if (membership.primary.is_invalid() == false) { - statuses[membership.primary] = partition_status::PS_PRIMARY; + if (membership.hp_primary.is_invalid() == false) { + statuses[membership.hp_primary] = partition_status::PS_PRIMARY; } - for (auto it = config.secondaries.begin(); it != config.secondaries.end(); ++it) { + for (auto it = config.hp_secondaries.begin(); it != config.hp_secondaries.end(); ++it) { statuses[*it] = partition_status::PS_SECONDARY; learners.erase(*it); } @@ -124,19 +125,21 @@ void primary_context::get_replica_config(partition_status::type st, { config.pid = membership.pid; config.primary = membership.primary; + config.__set_hp_primary(membership.hp_primary); config.ballot = membership.ballot; config.status = st; config.learner_signature = learner_signature; } -bool primary_context::check_exist(::dsn::rpc_address node, partition_status::type st) +bool primary_context::check_exist(::dsn::host_port node, partition_status::type st) { switch (st) { case partition_status::PS_PRIMARY: - return membership.primary == node; + return membership.hp_primary == node; case partition_status::PS_SECONDARY: - return std::find(membership.secondaries.begin(), membership.secondaries.end(), node) != - membership.secondaries.end(); + return std::find(membership.hp_secondaries.begin(), + membership.hp_secondaries.end(), + node) != membership.hp_secondaries.end(); case partition_status::PS_POTENTIAL_SECONDARY: return learners.find(node) != learners.end(); default: @@ -145,7 +148,7 @@ bool primary_context::check_exist(::dsn::rpc_address node, partition_status::typ } } -void primary_context::reset_node_bulk_load_states(const rpc_address &node) +void primary_context::reset_node_bulk_load_states(const host_port &node) { secondary_bulk_load_states[node].__set_download_progress(0); secondary_bulk_load_states[node].__set_download_status(ERR_OK); diff --git a/src/replica/replica_context.h b/src/replica/replica_context.h index 32da41d38b..79b32b8314 100644 --- a/src/replica/replica_context.h +++ b/src/replica/replica_context.h @@ -41,7 +41,7 @@ #include "metadata_types.h" #include "mutation.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task.h" #include "utils/autoref_ptr.h" #include "utils/fmt_logging.h" @@ -59,7 +59,7 @@ struct remote_learner_state std::string last_learn_log_file; }; -typedef std::unordered_map<::dsn::rpc_address, remote_learner_state> learner_map; +typedef std::unordered_map<::dsn::host_port, remote_learner_state> learner_map; #define CLEANUP_TASK(task_, force) \ { \ @@ -104,13 +104,13 @@ class primary_context void get_replica_config(partition_status::type status, /*out*/ replica_configuration &config, uint64_t learner_signature = invalid_signature); - bool check_exist(::dsn::rpc_address node, partition_status::type status); - partition_status::type get_node_status(::dsn::rpc_address addr) const; + bool check_exist(::dsn::host_port node, partition_status::type status); + partition_status::type get_node_status(::dsn::host_port hp) const; void do_cleanup_pending_mutations(bool clean_pending_mutations = true); // reset bulk load states in secondary_bulk_load_states by node address - void reset_node_bulk_load_states(const rpc_address &node); + void reset_node_bulk_load_states(const host_port &node); void cleanup_bulk_load_states(); @@ -150,7 +150,7 @@ class primary_context // Used for partition split // child addresses who has been caught up with its parent - std::unordered_set caught_up_children; + std::unordered_set caught_up_children; // Used for partition split // whether parent's write request should be sent to child synchronously @@ -170,7 +170,7 @@ class primary_context // Used partition split // secondary replica address who has paused or canceled split - std::unordered_set split_stopped_secondary; + std::unordered_set split_stopped_secondary; // Used for partition split // primary parent query child on meta_server task @@ -181,13 +181,13 @@ class primary_context // group bulk_load response tasks of RPC_GROUP_BULK_LOAD for each secondary replica node_tasks group_bulk_load_pending_replies; // bulk_load_state of secondary replicas - std::unordered_map secondary_bulk_load_states; + std::unordered_map secondary_bulk_load_states; // if primary send an empty prepare after ingestion succeed to gurantee secondary commit its // ingestion request bool ingestion_is_empty_prepare_sent{false}; - // secondary rpc_address -> secondary disk_status - std::unordered_map secondary_disk_status; + // secondary host_port -> secondary disk_status + std::unordered_map secondary_disk_status; }; // Context of the secondary replica. @@ -295,9 +295,9 @@ class partition_split_context //---------------inline impl---------------------------------------------------------------- -inline partition_status::type primary_context::get_node_status(::dsn::rpc_address addr) const +inline partition_status::type primary_context::get_node_status(::dsn::host_port hp) const { - auto it = statuses.find(addr); + auto it = statuses.find(hp); return it != statuses.end() ? it->second : partition_status::PS_INACTIVE; } } // namespace replication diff --git a/src/replica/replica_failover.cpp b/src/replica/replica_failover.cpp index 90f88c0985..ca1bf7b344 100644 --- a/src/replica/replica_failover.cpp +++ b/src/replica/replica_failover.cpp @@ -25,6 +25,7 @@ */ #include +#include #include #include "common/fs_manager.h" @@ -36,7 +37,9 @@ #include "replica.h" #include "replica/replica_context.h" #include "replica_stub.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/error_code.h" #include "utils/fmt_logging.h" @@ -63,7 +66,7 @@ void replica::handle_local_failure(error_code error) } void replica::handle_remote_failure(partition_status::type st, - ::dsn::rpc_address node, + ::dsn::host_port node, error_code error, const std::string &caused_by) { @@ -74,7 +77,7 @@ void replica::handle_remote_failure(partition_status::type st, node); CHECK_EQ(status(), partition_status::PS_PRIMARY); - CHECK_NE(node, _stub->_primary_address); + CHECK_NE(node, _stub->primary_host_port()); switch (st) { case partition_status::PS_SECONDARY: @@ -84,7 +87,8 @@ void replica::handle_remote_failure(partition_status::type st, enum_to_string(st)); { configuration_update_request request; - request.node = node; + request.node = _stub->get_dns_resolver()->resolve_address(node); + request.__set_hp_node(node); request.type = config_type::CT_DOWNGRADE_TO_INACTIVE; request.config = _primary_states.membership; downgrade_to_inactive_on_primary(request); @@ -95,7 +99,7 @@ void replica::handle_remote_failure(partition_status::type st, // potential secondary failure does not lead to ballot change // therefore, it is possible to have multiple exec here _primary_states.learners.erase(node); - _primary_states.statuses.erase(node); + _primary_states.statuses.erase(host_port(node)); } break; case partition_status::PS_INACTIVE: case partition_status::PS_ERROR: diff --git a/src/replica/replica_learn.cpp b/src/replica/replica_learn.cpp index 1fc4a7ada8..72bd062455 100644 --- a/src/replica/replica_learn.cpp +++ b/src/replica/replica_learn.cpp @@ -58,6 +58,7 @@ #include "replica_stub.h" #include "runtime/api_layer1.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" @@ -213,10 +214,11 @@ void replica::init_learn(uint64_t signature) if (_app->last_committed_decree() == 0 && _stub->_learn_app_concurrent_count.load() >= FLAGS_learn_app_max_concurrent_count) { LOG_WARNING_PREFIX( - "init_learn[{:#018x}]: learnee = {}, learn_duration = {} ms, need to learn app " + "init_learn[{:#018x}]: learnee = {}({}), learn_duration = {} ms, need to learn app " "because app_committed_decree = 0, but learn_app_concurrent_count({}) >= " "FLAGS_learn_app_max_concurrent_count({}), skip", _potential_secondary_states.learning_version, + _config.hp_primary, _config.primary, _potential_secondary_states.duration_ms(), _stub->_learn_app_concurrent_count, @@ -232,25 +234,28 @@ void replica::init_learn(uint64_t signature) request.__set_max_gced_decree(get_max_gced_decree_for_learn()); request.last_committed_decree_in_app = _app->last_committed_decree(); request.last_committed_decree_in_prepare_list = _prepare_list->last_committed_decree(); - request.learner = _stub->_primary_address; + request.learner = _stub->primary_address(); + request.__set_hp_learner(_stub->primary_host_port()); request.signature = _potential_secondary_states.learning_version; _app->prepare_get_checkpoint(request.app_specific_learn_request); - LOG_INFO_PREFIX("init_learn[{:#018x}]: learnee = {}, learn_duration = {} ms, max_gced_decree = " - "{}, local_committed_decree = {}, app_committed_decree = {}, " - "app_durable_decree = {}, current_learning_status = {}, total_copy_file_count " - "= {}, total_copy_file_size = {}, total_copy_buffer_size = {}", - request.signature, - _config.primary, - _potential_secondary_states.duration_ms(), - request.max_gced_decree, - last_committed_decree(), - _app->last_committed_decree(), - _app->last_durable_decree(), - enum_to_string(_potential_secondary_states.learning_status), - _potential_secondary_states.learning_copy_file_count, - _potential_secondary_states.learning_copy_file_size, - _potential_secondary_states.learning_copy_buffer_size); + LOG_INFO_PREFIX( + "init_learn[{:#018x}]: learnee = {}({}), learn_duration = {} ms, max_gced_decree = " + "{}, local_committed_decree = {}, app_committed_decree = {}, " + "app_durable_decree = {}, current_learning_status = {}, total_copy_file_count " + "= {}, total_copy_file_size = {}, total_copy_buffer_size = {}", + request.signature, + _config.hp_primary, + _config.primary, + _potential_secondary_states.duration_ms(), + request.max_gced_decree, + last_committed_decree(), + _app->last_committed_decree(), + _app->last_durable_decree(), + enum_to_string(_potential_secondary_states.learning_status), + _potential_secondary_states.learning_copy_file_count, + _potential_secondary_states.learning_copy_file_size, + _potential_secondary_states.learning_copy_buffer_size); dsn::message_ex *msg = dsn::message_ex::create_request(RPC_LEARN, 0, get_gpid().thread_hash()); dsn::marshall(msg, request); @@ -370,7 +375,10 @@ void replica::on_learn(dsn::message_ex *msg, const learn_request &request) // but just set state to partition_status::PS_POTENTIAL_SECONDARY _primary_states.get_replica_config(partition_status::PS_POTENTIAL_SECONDARY, response.config); - auto it = _primary_states.learners.find(request.learner); + host_port hp_learner; + GET_HOST_PORT(request, learner, hp_learner); + + auto it = _primary_states.learners.find(hp_learner); if (it == _primary_states.learners.end()) { response.config.status = partition_status::PS_INACTIVE; response.err = ERR_OBJECT_NOT_FOUND; @@ -392,13 +400,15 @@ void replica::on_learn(dsn::message_ex *msg, const learn_request &request) // TODO: learner machine has been down for a long time, and DDD MUST happened before // which leads to state lost. Now the lost state is back, what shall we do? if (request.last_committed_decree_in_app > last_prepared_decree()) { - LOG_ERROR_PREFIX("on_learn[{:#018x}]: learner = {}, learner state is newer than learnee, " - "learner_app_committed_decree = {}, local_committed_decree = {}, learn " - "from scratch", - request.signature, - request.learner, - request.last_committed_decree_in_app, - local_committed_decree); + LOG_ERROR_PREFIX( + "on_learn[{:#018x}]: learner = {}({}), learner state is newer than learnee, " + "learner_app_committed_decree = {}, local_committed_decree = {}, learn " + "from scratch", + request.signature, + hp_learner, + request.learner, + request.last_committed_decree_in_app, + local_committed_decree); *(decree *)&request.last_committed_decree_in_app = 0; } @@ -407,25 +417,29 @@ void replica::on_learn(dsn::message_ex *msg, const learn_request &request) // this happens when the new primary does not commit the previously prepared mutations // yet, which it should do, so let's help it now. else if (request.last_committed_decree_in_app > local_committed_decree) { - LOG_ERROR_PREFIX("on_learn[{:#018x}]: learner = {}, learner's last_committed_decree_in_app " - "is newer than learnee, learner_app_committed_decree = {}, " - "local_committed_decree = {}, commit local soft", - request.signature, - request.learner, - request.last_committed_decree_in_app, - local_committed_decree); + LOG_ERROR_PREFIX( + "on_learn[{:#018x}]: learner = {}({}), learner's last_committed_decree_in_app " + "is newer than learnee, learner_app_committed_decree = {}, " + "local_committed_decree = {}, commit local soft", + request.signature, + hp_learner, + request.learner, + request.last_committed_decree_in_app, + local_committed_decree); // we shouldn't commit mutations hard coz these mutations may preparing on another learner _prepare_list->commit(request.last_committed_decree_in_app, COMMIT_TO_DECREE_SOFT); local_committed_decree = last_committed_decree(); if (request.last_committed_decree_in_app > local_committed_decree) { - LOG_ERROR_PREFIX("on_learn[{:#018x}]: try to commit primary to {}, still less than " - "learner({})'s committed decree({}), wait mutations to be commitable", - request.signature, - local_committed_decree, - request.learner, - request.last_committed_decree_in_app); + LOG_ERROR_PREFIX( + "on_learn[{:#018x}]: try to commit primary to {}, still less than " + "learner({}({}))'s committed decree({}), wait mutations to be commitable", + request.signature, + local_committed_decree, + hp_learner, + request.learner, + request.last_committed_decree_in_app); response.err = ERR_INCONSISTENT_STATE; reply(msg, response); return; @@ -438,11 +452,12 @@ void replica::on_learn(dsn::message_ex *msg, const learn_request &request) response.state.__set_learn_start_decree(learn_start_decree); bool delayed_replay_prepare_list = false; - LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}, remote_committed_decree = {}, " + LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}({}), remote_committed_decree = {}, " "remote_app_committed_decree = {}, local_committed_decree = {}, " "app_committed_decree = {}, app_durable_decree = {}, " "prepare_min_decree = {}, prepare_list_count = {}, learn_start_decree = {}", request.signature, + hp_learner, request.learner, request.last_committed_decree_in_prepare_list, request.last_committed_decree_in_app, @@ -453,7 +468,8 @@ void replica::on_learn(dsn::message_ex *msg, const learn_request &request) _prepare_list->count(), learn_start_decree); - response.address = _stub->_primary_address; + response.address = _stub->primary_address(); + response.__set_hp_address(_stub->primary_host_port()); response.prepare_start_decree = invalid_decree; response.last_committed_decree = local_committed_decree; response.err = ERR_OK; @@ -467,31 +483,35 @@ void replica::on_learn(dsn::message_ex *msg, const learn_request &request) delayed_replay_prepare_list); if (!should_learn_cache) { if (learn_start_decree > _app->last_durable_decree()) { - LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}, choose to learn private logs, " + LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}({}), choose to learn private logs, " "because learn_start_decree({}) > _app->last_durable_decree({})", request.signature, + hp_learner, request.learner, learn_start_decree, _app->last_durable_decree()); _private_log->get_learn_state(get_gpid(), learn_start_decree, response.state); response.type = learn_type::LT_LOG; } else if (_private_log->get_learn_state(get_gpid(), learn_start_decree, response.state)) { - LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}, choose to learn private logs, " + LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}({}), choose to learn private logs, " "because mutation_log::get_learn_state() returns true", request.signature, + hp_learner, request.learner); response.type = learn_type::LT_LOG; } else if (learn_start_decree < request.last_committed_decree_in_app + 1) { - LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}, choose to learn private logs, " + LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}({}), choose to learn private logs, " "because learn_start_decree steps back for duplication", request.signature, + hp_learner, request.learner); response.type = learn_type::LT_LOG; } else { - LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}, choose to learn app, beacuse " + LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}({}), choose to learn app, beacuse " "learn_start_decree({}) <= _app->last_durable_decree({}), and " "mutation_log::get_learn_state() returns false", request.signature, + hp_learner, request.learner, learn_start_decree, _app->last_durable_decree()); @@ -504,9 +524,10 @@ void replica::on_learn(dsn::message_ex *msg, const learn_request &request) if (response.state.files.size() > 0) { auto &last_file = response.state.files.back(); if (last_file == learner_state.last_learn_log_file) { - LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}, learn the same file {} " + LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}({}), learn the same file {} " "repeatedly, hint to switch file", request.signature, + hp_learner, request.learner, last_file); _private_log->hint_switch_file(); @@ -516,10 +537,11 @@ void replica::on_learn(dsn::message_ex *msg, const learn_request &request) } // it is safe to commit to last_committed_decree() now response.state.to_decree_included = last_committed_decree(); - LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}, learn private logs succeed, " + LOG_INFO_PREFIX("on_learn[{:#018x}]: learner = {}({}), learn private logs succeed, " "learned_meta_size = {}, learned_file_count = {}, to_decree_included = " "{}", request.signature, + hp_learner, request.learner, response.state.meta.length(), response.state.files.size(), @@ -531,17 +553,19 @@ void replica::on_learn(dsn::message_ex *msg, const learn_request &request) if (err != ERR_OK) { response.err = ERR_GET_LEARN_STATE_FAILED; LOG_ERROR_PREFIX( - "on_learn[{:#018x}]: learner = {}, get app checkpoint failed, error = {}", + "on_learn[{:#018x}]: learner = {}({}), get app checkpoint failed, error = {}", request.signature, + hp_learner, request.learner, err); } else { response.base_local_dir = _app->data_dir(); response.__set_replica_disk_tag(_dir_node->tag); LOG_INFO_PREFIX( - "on_learn[{:#018x}]: learner = {}, get app learn state succeed, " + "on_learn[{:#018x}]: learner = {}({}), get app learn state succeed, " "learned_meta_size = {}, learned_file_count = {}, learned_to_decree = {}", request.signature, + hp_learner, request.learner, response.state.meta.length(), response.state.files.size(), @@ -575,12 +599,13 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response } LOG_INFO_PREFIX( - "on_learn_reply_start[{}]: learnee = {}, learn_duration ={} ms, response_err = " + "on_learn_reply_start[{}]: learnee = {}({}), learn_duration ={} ms, response_err = " "{}, remote_committed_decree = {}, prepare_start_decree = {}, learn_type = {} " "learned_buffer_size = {}, learned_file_count = {},to_decree_included = " "{}, learn_start_decree = {}, last_commit_decree = {}, current_learning_status = " "{} ", req.signature, + resp.config.hp_primary, resp.config.primary, _potential_secondary_states.duration_ms(), resp.err, @@ -599,10 +624,11 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response if (resp.err != ERR_OK) { if (resp.err == ERR_INACTIVE_STATE || resp.err == ERR_INCONSISTENT_STATE) { - LOG_WARNING_PREFIX("on_learn_reply[{:#018x}]: learnee = {}, learnee is updating " + LOG_WARNING_PREFIX("on_learn_reply[{:#018x}]: learnee = {}({}), learnee is updating " "ballot(inactive state) or reconciliation(inconsistent state), " "delay to start another round of learning", req.signature, + resp.config.hp_primary, resp.config.primary); _potential_secondary_states.learning_round_is_running = false; _potential_secondary_states.delay_learning_task = @@ -618,17 +644,19 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response } if (resp.config.ballot > get_ballot()) { - LOG_INFO_PREFIX("on_learn_reply[{:#018x}]: learnee = {}, update configuration because " + LOG_INFO_PREFIX("on_learn_reply[{:#018x}]: learnee = {}({}), update configuration because " "ballot have changed", req.signature, + resp.config.hp_primary, resp.config.primary); CHECK(update_local_configuration(resp.config), ""); } if (status() != partition_status::PS_POTENTIAL_SECONDARY) { LOG_ERROR_PREFIX( - "on_learn_reply[{:#018x}]: learnee = {}, current_status = {}, stop learning", + "on_learn_reply[{:#018x}]: learnee = {}({}), current_status = {}, stop learning", req.signature, + resp.config.hp_primary, resp.config.primary, enum_to_string(status())); return; @@ -636,12 +664,14 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response // local state is newer than learnee if (resp.last_committed_decree < _app->last_committed_decree()) { - LOG_WARNING_PREFIX("on_learn_reply[{:#018x}]: learnee = {}, learner state is newer than " - "learnee (primary): {} vs {}, create new app", - req.signature, - resp.config.primary, - _app->last_committed_decree(), - resp.last_committed_decree); + LOG_WARNING_PREFIX( + "on_learn_reply[{:#018x}]: learnee = {}({}), learner state is newer than " + "learnee (primary): {} vs {}, create new app", + req.signature, + resp.config.hp_primary, + resp.config.primary, + _app->last_committed_decree(), + resp.last_committed_decree); METRIC_VAR_INCREMENT(learn_resets); @@ -649,9 +679,10 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response auto err = _app->close(true); if (err != ERR_OK) { LOG_ERROR_PREFIX( - "on_learn_reply[{:#018x}]: learnee = {}, close app (with clear_state=true) " + "on_learn_reply[{:#018x}]: learnee = {}({}), close app (with clear_state=true) " "failed, err = {}", req.signature, + resp.config.hp_primary, resp.config.primary, err); } @@ -677,9 +708,10 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response err = _app->open_new_internal(this, _private_log->on_partition_reset(get_gpid(), 0)); if (err != ERR_OK) { - LOG_ERROR_PREFIX("on_learn_reply[{:#018x}]: learnee = {}, open app (with " + LOG_ERROR_PREFIX("on_learn_reply[{:#018x}]: learnee = {}({}), open app (with " "create_new=true) failed, err = {}", req.signature, + resp.config.hp_primary, resp.config.primary, err); } @@ -714,9 +746,10 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response if (++_stub->_learn_app_concurrent_count > FLAGS_learn_app_max_concurrent_count) { --_stub->_learn_app_concurrent_count; LOG_WARNING_PREFIX( - "on_learn_reply[{:#018x}]: learnee = {}, learn_app_concurrent_count({}) >= " + "on_learn_reply[{:#018x}]: learnee = {}({}), learn_app_concurrent_count({}) >= " "FLAGS_learn_app_max_concurrent_count({}), skip this round", _potential_secondary_states.learning_version, + _config.hp_primary, _config.primary, _stub->_learn_app_concurrent_count, FLAGS_learn_app_max_concurrent_count); @@ -725,8 +758,9 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response } else { _potential_secondary_states.learn_app_concurrent_count_increased = true; LOG_INFO_PREFIX( - "on_learn_reply[{:#018x}]: learnee = {}, ++learn_app_concurrent_count = {}", + "on_learn_reply[{:#018x}]: learnee = {}({}), ++learn_app_concurrent_count = {}", _potential_secondary_states.learning_version, + _config.hp_primary, _config.primary, _stub->_learn_app_concurrent_count.load()); } @@ -771,9 +805,10 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response // reset preparelist _potential_secondary_states.learning_start_prepare_decree = resp.prepare_start_decree; _prepare_list->truncate(_app->last_committed_decree()); - LOG_INFO_PREFIX("on_learn_reply[{:#018x}]: learnee = {}, truncate prepare list, " + LOG_INFO_PREFIX("on_learn_reply[{:#018x}]: learnee = {}({}), truncate prepare list, " "local_committed_decree = {}, current_learning_status = {}", req.signature, + resp.config.hp_primary, resp.config.primary, _app->last_committed_decree(), enum_to_string(_potential_secondary_states.learning_status)); @@ -802,12 +837,14 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response _prepare_list->get_mutation_by_decree(mu->data.header.decree); if (existing_mutation != nullptr && existing_mutation->data.header.ballot > mu->data.header.ballot) { - LOG_INFO_PREFIX("on_learn_reply[{:#018x}]: learnee = {}, mutation({}) exist on " - "the learner with larger ballot {}", - req.signature, - resp.config.primary, - mu->name(), - existing_mutation->data.header.ballot); + LOG_INFO_PREFIX( + "on_learn_reply[{:#018x}]: learnee = {}({}), mutation({}) exist on " + "the learner with larger ballot {}", + req.signature, + resp.config.hp_primary, + resp.config.primary, + mu->name(), + existing_mutation->data.header.ballot); } else { _prepare_list->prepare(mu, partition_status::PS_POTENTIAL_SECONDARY); } @@ -819,10 +856,11 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response } } - LOG_INFO_PREFIX("on_learn_reply[{:#018x}]: learnee = {}, learn_duration = {} ms, apply " + LOG_INFO_PREFIX("on_learn_reply[{:#018x}]: learnee = {}({}), learn_duration = {} ms, apply " "cache done, prepare_cache_range = <{}, {}>, local_committed_decree = {}, " "app_committed_decree = {}, current_learning_status = {}", req.signature, + resp.config.hp_primary, resp.config.primary, _potential_secondary_states.duration_ms(), cache_range.first, @@ -865,8 +903,9 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response if (!dsn::utils::filesystem::directory_exists(learn_dir)) { LOG_ERROR_PREFIX( - "on_learn_reply[{:#018x}]: learnee = {}, create replica learn dir {} failed", + "on_learn_reply[{:#018x}]: learnee = {}({}), create replica learn dir {} failed", req.signature, + resp.config.hp_primary, resp.config.primary, learn_dir); @@ -888,16 +927,18 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response } bool high_priority = (resp.type == learn_type::LT_APP ? false : true); - LOG_INFO_PREFIX("on_learn_reply[{:#018x}]: learnee = {}, learn_duration = {} ms, start to " - "copy remote files, copy_file_count = {}, priority = {}", - req.signature, - resp.config.primary, - _potential_secondary_states.duration_ms(), - resp.state.files.size(), - high_priority ? "high" : "low"); + LOG_INFO_PREFIX( + "on_learn_reply[{:#018x}]: learnee = {}({}), learn_duration = {} ms, start to " + "copy remote files, copy_file_count = {}, priority = {}", + req.signature, + resp.config.hp_primary, + resp.config.primary, + _potential_secondary_states.duration_ms(), + resp.state.files.size(), + high_priority ? "high" : "low"); _potential_secondary_states.learn_remote_files_task = _stub->_nfs->copy_remote_files( - resp.config.primary, + resp.config.hp_primary, resp.replica_disk_tag, resp.base_local_dir, resp.state.files, @@ -1003,30 +1044,33 @@ void replica::on_copy_remote_state_completed(error_code err, decree old_app_committed = _app->last_committed_decree(); decree old_app_durable = _app->last_durable_decree(); - LOG_INFO_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}, learn_duration = {} " - "ms, copy remote state done, err = {}, copy_file_count = {}, copy_file_size = " - "{}, copy_time_used = {} ms, local_committed_decree = {}, app_committed_decree " - "= {}, app_durable_decree = {}, prepare_start_decree = {}, " - "current_learning_status = {}", - req.signature, - resp.config.primary, - _potential_secondary_states.duration_ms(), - err, - resp.state.files.size(), - size, - _potential_secondary_states.duration_ms() - copy_start_time, - last_committed_decree(), - _app->last_committed_decree(), - _app->last_durable_decree(), - resp.prepare_start_decree, - enum_to_string(_potential_secondary_states.learning_status)); + LOG_INFO_PREFIX( + "on_copy_remote_state_completed[{:#018x}]: learnee = {}({}), learn_duration = {} " + "ms, copy remote state done, err = {}, copy_file_count = {}, copy_file_size = " + "{}, copy_time_used = {} ms, local_committed_decree = {}, app_committed_decree " + "= {}, app_durable_decree = {}, prepare_start_decree = {}, " + "current_learning_status = {}", + req.signature, + resp.config.hp_primary, + resp.config.primary, + _potential_secondary_states.duration_ms(), + err, + resp.state.files.size(), + size, + _potential_secondary_states.duration_ms() - copy_start_time, + last_committed_decree(), + _app->last_committed_decree(), + _app->last_durable_decree(), + resp.prepare_start_decree, + enum_to_string(_potential_secondary_states.learning_status)); if (resp.type == learn_type::LT_APP) { --_stub->_learn_app_concurrent_count; _potential_secondary_states.learn_app_concurrent_count_increased = false; - LOG_INFO_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}, " + LOG_INFO_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}({}), " "--learn_app_concurrent_count = {}", _potential_secondary_states.learning_version, + _config.hp_primary, _config.primary, _stub->_learn_app_concurrent_count.load()); } @@ -1071,19 +1115,21 @@ void replica::on_copy_remote_state_completed(error_code err, // the learn_start_decree will be set to 0, which makes learner to learn from // scratch CHECK_LE(_app->last_committed_decree(), resp.last_committed_decree); - LOG_INFO_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}, " + LOG_INFO_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}({}), " "learn_duration = {} ms, checkpoint duration = {} ns, apply " "checkpoint succeed, app_committed_decree = {}", req.signature, + resp.config.hp_primary, resp.config.primary, _potential_secondary_states.duration_ms(), dsn_now_ns() - start_ts, _app->last_committed_decree()); } else { - LOG_ERROR_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}, " + LOG_ERROR_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}({}), " "learn_duration = {} ms, checkpoint duration = {} ns, apply " "checkpoint failed, err = {}", req.signature, + resp.config.hp_primary, resp.config.primary, _potential_secondary_states.duration_ms(), dsn_now_ns() - start_ts, @@ -1096,19 +1142,21 @@ void replica::on_copy_remote_state_completed(error_code err, auto start_ts = dsn_now_ns(); err = apply_learned_state_from_private_log(lstate); if (err == ERR_OK) { - LOG_INFO_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}, " + LOG_INFO_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}({}), " "learn_duration = {} ms, apply_log_duration = {} ns, apply learned " "state from private log succeed, app_committed_decree = {}", req.signature, + resp.config.hp_primary, resp.config.primary, _potential_secondary_states.duration_ms(), dsn_now_ns() - start_ts, _app->last_committed_decree()); } else { - LOG_ERROR_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}, " + LOG_ERROR_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}({}), " "learn_duration = {} ms, apply_log_duration = {} ns, apply " "learned state from private log failed, err = {}", req.signature, + resp.config.hp_primary, resp.config.primary, _potential_secondary_states.duration_ms(), dsn_now_ns() - start_ts, @@ -1119,26 +1167,28 @@ void replica::on_copy_remote_state_completed(error_code err, // reset prepare list to make it catch with app _prepare_list->reset(_app->last_committed_decree()); - LOG_INFO_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}, learn_duration = " - "{} ms, apply checkpoint/log done, err = {}, last_prepared_decree = ({} => " - "{}), last_committed_decree = ({} => {}), app_committed_decree = ({} => " - "{}), app_durable_decree = ({} => {}), remote_committed_decree = {}, " - "prepare_start_decree = {}, current_learning_status = {}", - req.signature, - resp.config.primary, - _potential_secondary_states.duration_ms(), - err, - old_prepared, - last_prepared_decree(), - old_committed, - last_committed_decree(), - old_app_committed, - _app->last_committed_decree(), - old_app_durable, - _app->last_durable_decree(), - resp.last_committed_decree, - resp.prepare_start_decree, - enum_to_string(_potential_secondary_states.learning_status)); + LOG_INFO_PREFIX( + "on_copy_remote_state_completed[{:#018x}]: learnee = {}({}), learn_duration = " + "{} ms, apply checkpoint/log done, err = {}, last_prepared_decree = ({} => " + "{}), last_committed_decree = ({} => {}), app_committed_decree = ({} => " + "{}), app_durable_decree = ({} => {}), remote_committed_decree = {}, " + "prepare_start_decree = {}, current_learning_status = {}", + req.signature, + resp.config.hp_primary, + resp.config.primary, + _potential_secondary_states.duration_ms(), + err, + old_prepared, + last_prepared_decree(), + old_committed, + last_committed_decree(), + old_app_committed, + _app->last_committed_decree(), + old_app_durable, + _app->last_durable_decree(), + resp.last_committed_decree, + resp.prepare_start_decree, + enum_to_string(_potential_secondary_states.learning_status)); } // if catch-up done, do flush to enable all learned state is durable @@ -1148,15 +1198,17 @@ void replica::on_copy_remote_state_completed(error_code err, _app->last_committed_decree() > _app->last_durable_decree()) { err = background_sync_checkpoint(); - LOG_INFO_PREFIX("on_copy_remote_state_completed[{:#018x}]: learnee = {}, learn_duration = " - "{} ms, flush done, err = {}, app_committed_decree = {}, " - "app_durable_decree = {}", - req.signature, - resp.config.primary, - _potential_secondary_states.duration_ms(), - err, - _app->last_committed_decree(), - _app->last_durable_decree()); + LOG_INFO_PREFIX( + "on_copy_remote_state_completed[{:#018x}]: learnee = {}({}), learn_duration = " + "{} ms, flush done, err = {}, app_committed_decree = {}, " + "app_durable_decree = {}", + req.signature, + resp.config.hp_primary, + resp.config.primary, + _potential_secondary_states.duration_ms(), + err, + _app->last_committed_decree(), + _app->last_durable_decree()); if (err == ERR_OK) { CHECK_EQ(_app->last_committed_decree(), _app->last_durable_decree()); @@ -1183,10 +1235,11 @@ void replica::on_learn_remote_state_completed(error_code err) _checker.only_one_thread_access(); if (partition_status::PS_POTENTIAL_SECONDARY != status()) { - LOG_WARNING_PREFIX("on_learn_remote_state_completed[{:#018x}]: learnee = {}, " + LOG_WARNING_PREFIX("on_learn_remote_state_completed[{:#018x}]: learnee = {}({}), " "learn_duration = {} ms, err = {}, the learner status is not " "PS_POTENTIAL_SECONDARY, but {}, ignore", _potential_secondary_states.learning_version, + _config.hp_primary, _config.primary, _potential_secondary_states.duration_ms(), err, @@ -1194,17 +1247,19 @@ void replica::on_learn_remote_state_completed(error_code err) return; } - LOG_INFO_PREFIX("on_learn_remote_state_completed[{:#018x}]: learnee = {}, learn_duration = {} " - "ms, err = {}, local_committed_decree = {}, app_committed_decree = {}, " - "app_durable_decree = {}, current_learning_status = {}", - _potential_secondary_states.learning_version, - _config.primary, - _potential_secondary_states.duration_ms(), - err, - last_committed_decree(), - _app->last_committed_decree(), - _app->last_durable_decree(), - enum_to_string(_potential_secondary_states.learning_status)); + LOG_INFO_PREFIX( + "on_learn_remote_state_completed[{:#018x}]: learnee = {}({}), learn_duration = {} " + "ms, err = {}, local_committed_decree = {}, app_committed_decree = {}, " + "app_durable_decree = {}, current_learning_status = {}", + _potential_secondary_states.learning_version, + _config.hp_primary, + _config.primary, + _potential_secondary_states.duration_ms(), + err, + last_committed_decree(), + _app->last_committed_decree(), + _app->last_durable_decree(), + enum_to_string(_potential_secondary_states.learning_status)); _potential_secondary_states.learning_round_is_running = false; @@ -1221,8 +1276,9 @@ void replica::handle_learning_error(error_code err, bool is_local_error) _checker.only_one_thread_access(); LOG_ERROR_PREFIX( - "handle_learning_error[{:#018x}]: learnee = {}, learn_duration = {} ms, err = {}, {}", + "handle_learning_error[{:#018x}]: learnee = {}({}), learn_duration = {} ms, err = {}, {}", _potential_secondary_states.learning_version, + _config.hp_primary, _config.primary, _potential_secondary_states.duration_ms(), err, @@ -1242,7 +1298,7 @@ void replica::handle_learning_error(error_code err, bool is_local_error) is_local_error ? partition_status::PS_ERROR : partition_status::PS_INACTIVE); } -error_code replica::handle_learning_succeeded_on_primary(::dsn::rpc_address node, +error_code replica::handle_learning_succeeded_on_primary(::dsn::host_port node, uint64_t learn_signature) { auto it = _primary_states.learners.find(node); @@ -1277,12 +1333,14 @@ void replica::notify_learn_completion() report.last_committed_decree_in_prepare_list = last_committed_decree(); report.learner_signature = _potential_secondary_states.learning_version; report.learner_status_ = _potential_secondary_states.learning_status; - report.node = _stub->_primary_address; + report.node = _stub->primary_address(); + report.__set_hp_node(_stub->primary_host_port()); - LOG_INFO_PREFIX("notify_learn_completion[{:#018x}]: learnee = {}, learn_duration = {} ms, " + LOG_INFO_PREFIX("notify_learn_completion[{:#018x}]: learnee = {}({}), learn_duration = {} ms, " "local_committed_decree = {}, app_committed_decree = {}, app_durable_decree = " "{}, current_learning_status = {}", _potential_secondary_states.learning_version, + _config.hp_primary, _config.primary, _potential_secondary_states.duration_ms(), last_committed_decree(), @@ -1312,9 +1370,13 @@ void replica::on_learn_completion_notification(const group_check_response &repor { _checker.only_one_thread_access(); + host_port hp_node; + GET_HOST_PORT(report, node, hp_node); + LOG_INFO_PREFIX( - "on_learn_completion_notification[{:#018x}]: learner = {}, learning_status = {}", + "on_learn_completion_notification[{:#018x}]: learner = {}({}), learning_status = {}", report.learner_signature, + hp_node, report.node, enum_to_string(report.learner_status_)); @@ -1322,25 +1384,30 @@ void replica::on_learn_completion_notification(const group_check_response &repor response.err = (partition_status::PS_INACTIVE == status() && _inactive_is_transient) ? ERR_INACTIVE_STATE : ERR_INVALID_STATE; - LOG_ERROR_PREFIX("on_learn_completion_notification[{:#018x}]: learner = {}, this replica " - "is not primary, but {}, reply {}", - report.learner_signature, - report.node, - enum_to_string(status()), - response.err); + LOG_ERROR_PREFIX( + "on_learn_completion_notification[{:#018x}]: learner = {}({}), this replica " + "is not primary, but {}, reply {}", + report.learner_signature, + hp_node, + report.node, + enum_to_string(status()), + response.err); } else if (report.learner_status_ != learner_status::LearningSucceeded) { response.err = ERR_INVALID_STATE; - LOG_ERROR_PREFIX("on_learn_completion_notification[{:#018x}]: learner = {}, learner_status " - "is not LearningSucceeded, but {}, reply ERR_INVALID_STATE", - report.learner_signature, - report.node, - enum_to_string(report.learner_status_)); + LOG_ERROR_PREFIX( + "on_learn_completion_notification[{:#018x}]: learner = {}({}), learner_status " + "is not LearningSucceeded, but {}, reply ERR_INVALID_STATE", + report.learner_signature, + hp_node, + report.node, + enum_to_string(report.learner_status_)); } else { - response.err = handle_learning_succeeded_on_primary(report.node, report.learner_signature); + response.err = handle_learning_succeeded_on_primary(hp_node, report.learner_signature); if (response.err != ERR_OK) { - LOG_ERROR_PREFIX("on_learn_completion_notification[{:#018x}]: learner = {}, handle " + LOG_ERROR_PREFIX("on_learn_completion_notification[{:#018x}]: learner = {}({}), handle " "learning succeeded on primary failed, reply {}", report.learner_signature, + hp_node, report.node, response.err); } @@ -1363,10 +1430,11 @@ void replica::on_learn_completion_notification_reply(error_code err, } if (resp.signature != (int64_t)_potential_secondary_states.learning_version) { - LOG_ERROR_PREFIX("on_learn_completion_notification_reply[{:#018x}]: learnee = {}, " + LOG_ERROR_PREFIX("on_learn_completion_notification_reply[{:#018x}]: learnee = {}({}), " "learn_duration = {} ms, signature not matched, current signature on " "primary is [{:#018x}]", report.learner_signature, + _config.hp_primary, _config.primary, _potential_secondary_states.duration_ms(), resp.signature); @@ -1374,21 +1442,24 @@ void replica::on_learn_completion_notification_reply(error_code err, return; } - LOG_INFO_PREFIX("on_learn_completion_notification_reply[{:#018x}]: learnee = {}, " + LOG_INFO_PREFIX("on_learn_completion_notification_reply[{:#018x}]: learnee = {}({}), " "learn_duration = {} ms, response_err = {}", report.learner_signature, + _config.hp_primary, _config.primary, _potential_secondary_states.duration_ms(), resp.err); if (resp.err != ERR_OK) { if (resp.err == ERR_INACTIVE_STATE) { - LOG_WARNING_PREFIX("on_learn_completion_notification_reply[{:#018x}]: learnee = {}, " - "learn_duration = {} ms, learnee is updating ballot, delay to start " - "another round of learning", - report.learner_signature, - _config.primary, - _potential_secondary_states.duration_ms()); + LOG_WARNING_PREFIX( + "on_learn_completion_notification_reply[{:#018x}]: learnee = {}({}), " + "learn_duration = {} ms, learnee is updating ballot, delay to start " + "another round of learning", + report.learner_signature, + _config.hp_primary, + _config.primary, + _potential_secondary_states.duration_ms()); _potential_secondary_states.learning_round_is_running = false; _potential_secondary_states.delay_learning_task = tasking::create_task( LPC_DELAY_LEARN, @@ -1406,8 +1477,9 @@ void replica::on_learn_completion_notification_reply(error_code err, void replica::on_add_learner(const group_check_request &request) { - LOG_INFO_PREFIX("process add learner, primary = {}, ballot ={}, status ={}, " + LOG_INFO_PREFIX("process add learner, primary = {}({}), ballot ={}, status ={}, " "last_committed_decree = {}, duplicating = {}", + request.config.hp_primary, request.config.primary, request.config.ballot, enum_to_string(request.config.status), @@ -1548,12 +1620,13 @@ error_code replica::apply_learned_state_from_private_log(learn_state &state) LOG_INFO_PREFIX( "apply_learned_state_from_private_log[{}]: duplicating={}, step_back={}, " - "learnee = {}, learn_duration = {} ms, apply private log files done, file_count " + "learnee = {}({}), learn_duration = {} ms, apply private log files done, file_count " "={}, first_learn_start_decree ={}, learn_start_decree = {}, " "app_committed_decree = {}", _potential_secondary_states.learning_version, duplicating, step_back, + _config.hp_primary, _config.primary, _potential_secondary_states.duration_ms(), state.files.size(), @@ -1581,20 +1654,22 @@ error_code replica::apply_learned_state_from_private_log(learn_state &state) } if (state.to_decree_included > last_committed_decree()) { - LOG_INFO_PREFIX("apply_learned_state_from_private_log[{}]: learnee ={}, " + LOG_INFO_PREFIX("apply_learned_state_from_private_log[{}]: learnee ={}({}), " "learned_to_decree_included({}) > last_committed_decree({}), commit to " "to_decree_included", _potential_secondary_states.learning_version, + _config.hp_primary, _config.primary, state.to_decree_included, last_committed_decree()); plist.commit(state.to_decree_included, COMMIT_TO_DECREE_SOFT); } - LOG_INFO_PREFIX(" apply_learned_state_from_private_log[{}]: learnee ={}, " + LOG_INFO_PREFIX(" apply_learned_state_from_private_log[{}]: learnee ={}({}), " "learn_duration ={} ms, apply in-buffer private logs done, " "replay_count ={}, app_committed_decree = {}", _potential_secondary_states.learning_version, + _config.hp_primary, _config.primary, _potential_secondary_states.duration_ms(), replay_count, diff --git a/src/replica/replica_restore.cpp b/src/replica/replica_restore.cpp index d4e268cc7b..cfe5330200 100644 --- a/src/replica/replica_restore.cpp +++ b/src/replica/replica_restore.cpp @@ -39,6 +39,7 @@ #include "metadata_types.h" #include "replica.h" #include "replica_stub.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" @@ -403,7 +404,8 @@ void replica::tell_meta_to_restore_rollback() dsn::message_ex *msg = dsn::message_ex::create_request(RPC_CM_DROP_APP); ::dsn::marshall(msg, request); - rpc_address target(_stub->_failure_detector->get_servers()); + rpc_address target( + _stub->get_dns_resolver()->resolve_address(_stub->_failure_detector->get_servers())); rpc::call(target, msg, &_tracker, @@ -432,7 +434,8 @@ void replica::report_restore_status_to_meta() dsn::message_ex *msg = dsn::message_ex::create_request(RPC_CM_REPORT_RESTORE_STATUS); ::dsn::marshall(msg, request); - rpc_address target(_stub->_failure_detector->get_servers()); + rpc_address target( + _stub->get_dns_resolver()->resolve_address(_stub->_failure_detector->get_servers())); rpc::call(target, msg, &_tracker, diff --git a/src/replica/replica_stub.cpp b/src/replica/replica_stub.cpp index 1a54b5281b..4990165590 100644 --- a/src/replica/replica_stub.cpp +++ b/src/replica/replica_stub.cpp @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -54,6 +53,7 @@ #include "mutation_log.h" #include "nfs/nfs_node.h" #include "nfs_types.h" +#include "ranger/access_type.h" #include "replica.h" #include "replica/duplication/replica_follower.h" #include "replica/replica_context.h" @@ -62,11 +62,10 @@ #include "replica_disk_migrator.h" #include "replica_stub.h" #include "runtime/api_layer1.h" -#include "ranger/access_type.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" -#include "security/access_controller.h" #include "runtime/task/async_calls.h" +#include "security/access_controller.h" #include "split/replica_split_manager.h" #include "utils/command_manager.h" #include "utils/filesystem.h" @@ -331,7 +330,8 @@ replica_stub::replica_stub(replica_state_subscriber subscriber /*= nullptr*/, METRIC_VAR_INIT_server(splitting_replicas), METRIC_VAR_INIT_server(splitting_replicas_max_duration_ms), METRIC_VAR_INIT_server(splitting_replicas_async_learn_max_duration_ms), - METRIC_VAR_INIT_server(splitting_replicas_max_copy_file_bytes) + METRIC_VAR_INIT_server(splitting_replicas_max_copy_file_bytes), + _dns_resolver(new dns_resolver()) { #ifdef DSN_ENABLE_GPERF _is_releasing_memory = false; @@ -340,7 +340,6 @@ replica_stub::replica_stub(replica_state_subscriber subscriber /*= nullptr*/, _is_long_subscriber = is_long_subscriber; _failure_detector = nullptr; _state = NS_Disconnected; - _primary_address_str[0] = '\0'; } replica_stub::~replica_stub(void) { close(); } @@ -355,9 +354,9 @@ void replica_stub::initialize(bool clear /* = false*/) void replica_stub::initialize(const replication_options &opts, bool clear /* = false*/) { - _primary_address = dsn_primary_address(); - strcpy(_primary_address_str, _primary_address.to_string()); - LOG_INFO("primary_address = {}", _primary_address_str); + _primary_host_port = dsn_primary_host_port(); + _primary_host_port_cache = _primary_host_port.to_string(); + LOG_INFO("primary_host_port = {}", _primary_host_port_cache); set_options(opts); LOG_INFO("meta_servers = {}", fmt::join(_options.meta_servers, ", ")); @@ -528,7 +527,7 @@ void replica_stub::initialize(const replication_options &opts, bool clear /* = f _fs_manager.add_replica(kv.first, kv.second->dir()); } - _nfs = dsn::nfs_node::create(); + _nfs = dsn::nfs_node::create(_dns_resolver); _nfs->start(); dist::cmd::register_remote_command_rpc(); @@ -592,8 +591,10 @@ void replica_stub::initialize_start() // init liveness monitor CHECK_EQ(NS_Disconnected, _state); + if (!FLAGS_fd_disabled) { _failure_detector = std::make_shared( + _dns_resolver, _options.meta_servers, [this]() { this->on_meta_server_disconnected(); }, [this]() { this->on_meta_server_connected(); }); @@ -674,7 +675,7 @@ void replica_stub::on_client_write(gpid id, dsn::message_ex *request) if (_verbose_client_log && request) { LOG_INFO("{}@{}: client = {}, code = {}, timeout = {}", id, - _primary_address_str, + _primary_host_port_cache, request->header->from_address, request->header->rpc_name, request->header->client.timeout_ms); @@ -696,7 +697,7 @@ void replica_stub::on_client_read(gpid id, dsn::message_ex *request) if (_verbose_client_log && request) { LOG_INFO("{}@{}: client = {}, code = {}, timeout = {}", id, - _primary_address_str, + _primary_host_port_cache, request->header->from_address, request->header->rpc_name, request->header->client.timeout_ms); @@ -714,16 +715,17 @@ void replica_stub::on_config_proposal(const configuration_update_request &propos if (!is_connected()) { LOG_WARNING("{}@{}: received config proposal {} for {}: not connected, ignore", proposal.config.pid, - _primary_address_str, + _primary_host_port_cache, enum_to_string(proposal.type), proposal.node); return; } - LOG_INFO("{}@{}: received config proposal {} for {}", + LOG_INFO("{}@{}: received config proposal {} for {}({})", proposal.config.pid, - _primary_address_str, + _primary_host_port_cache, enum_to_string(proposal.type), + proposal.hp_node, proposal.node); replica_ptr rep = get_replica(proposal.config.pid); @@ -968,14 +970,15 @@ void replica_stub::on_group_check(group_check_rpc rpc) if (!is_connected()) { LOG_WARNING("{}@{}: received group check: not connected, ignore", request.config.pid, - _primary_address_str); + _primary_host_port_cache); return; } - LOG_INFO("{}@{}: received group check, primary = {}, ballot = {}, status = {}, " + LOG_INFO("{}@{}: received group check, primary = {}({}), ballot = {}, status = {}, " "last_committed_decree = {}", request.config.pid, - _primary_address_str, + _primary_host_port_cache, + request.config.hp_primary, request.config.primary, request.config.ballot, enum_to_string(request.config.status), @@ -1035,17 +1038,19 @@ void replica_stub::on_learn_completion_notification(learn_completion_notificatio void replica_stub::on_add_learner(const group_check_request &request) { if (!is_connected()) { - LOG_WARNING("{}@{}: received add learner, primary = {}, not connected, ignore", + LOG_WARNING("{}@{}: received add learner, primary = {}({}), not connected, ignore", request.config.pid, - _primary_address_str, + _primary_host_port_cache, + request.config.hp_primary, request.config.primary); return; } - LOG_INFO("{}@{}: received add learner, primary = {}, ballot = {}, status = {}, " + LOG_INFO("{}@{}: received add learner, primary = {}({}), ballot = {}, status = {}, " "last_committed_decree = {}", request.config.pid, - _primary_address_str, + _primary_host_port_cache, + request.config.hp_primary, request.config.primary, request.config.ballot, enum_to_string(request.config.status), @@ -1125,7 +1130,8 @@ void replica_stub::query_configuration_by_node() dsn::message_ex *msg = dsn::message_ex::create_request(RPC_CM_CONFIG_SYNC); configuration_query_by_node_request req; - req.node = _primary_address; + req.node = primary_address(); + req.__set_hp_node(_primary_host_port); // TODO: send stored replicas may cost network, we shouldn't config the frequency get_local_replicas(req.stored_replicas); @@ -1136,7 +1142,7 @@ void replica_stub::query_configuration_by_node() LOG_INFO("send query node partitions request to meta server, stored_replicas_count = {}", req.stored_replicas.size()); - rpc_address target(_failure_detector->get_servers()); + rpc_address target(_dns_resolver->resolve_address(_failure_detector->get_servers())); _config_query_task = rpc::call(target, msg, @@ -1283,17 +1289,17 @@ void replica_stub::on_node_query_reply_scatter(replica_stub_ptr this_, req.__isset.meta_split_status ? req.meta_split_status : split_status::NOT_SPLIT); } else { - if (req.config.primary == _primary_address) { + if (req.config.hp_primary == _primary_host_port) { LOG_INFO("{}@{}: replica not exists on replica server, which is primary, remove it " "from meta server", req.config.pid, - _primary_address_str); + _primary_host_port_cache); remove_replica_on_meta_server(req.info, req.config); } else { LOG_INFO( "{}@{}: replica not exists on replica server, which is not primary, just ignore", req.config.pid, - _primary_address_str); + _primary_host_port_cache); } } } @@ -1331,23 +1337,23 @@ void replica_stub::remove_replica_on_meta_server(const app_info &info, request->info = info; request->config = config; request->config.ballot++; - request->node = _primary_address; + request->node = primary_address(); + request->__set_hp_node(_primary_host_port); request->type = config_type::CT_DOWNGRADE_TO_INACTIVE; - if (_primary_address == config.primary) { + if (_primary_host_port == config.hp_primary) { request->config.primary.set_invalid(); - } else if (replica_helper::remove_node(_primary_address, request->config.secondaries)) { + request->config.hp_primary.reset(); + } else if (replica_helper::remove_node(primary_address(), request->config.secondaries) && + replica_helper::remove_node(_primary_host_port, request->config.hp_secondaries)) { } else { return; } ::dsn::marshall(msg, *request); - rpc_address target(_failure_detector->get_servers()); - rpc::call(_failure_detector->get_servers(), - msg, - nullptr, - [](error_code err, dsn::message_ex *, dsn::message_ex *) {}); + rpc_address target(_dns_resolver->resolve_address(_failure_detector->get_servers())); + rpc::call(target, msg, nullptr, [](error_code err, dsn::message_ex *, dsn::message_ex *) {}); } void replica_stub::on_meta_server_disconnected() @@ -1411,7 +1417,7 @@ void replica_stub::response_client(gpid id, } LOG_ERROR("{}@{}: {} fail: client = {}, code = {}, timeout = {}, status = {}, error = {}", id, - _primary_address_str, + _primary_host_port_cache, is_read ? "read" : "write", request == nullptr ? "null" : request->header->from_address.to_string(), request == nullptr ? "null" : request->header->rpc_name, @@ -1646,7 +1652,7 @@ void replica_stub::open_replica( // process below LOG_INFO("{}@{}: start to load replica {} group check, dir = {}", id, - _primary_address_str, + _primary_host_port_cache, group_check ? "with" : "without", dir); rep = load_replica(dn, dir.c_str()); @@ -1690,7 +1696,7 @@ void replica_stub::open_replica( "{}@{}: cannot load replica({}.{}), ballot = {}, " "last_committed_decree = {}, but it does not existed!", id, - _primary_address_str, + _primary_host_port_cache, id, app.app_type.c_str(), configuration_update->config.ballot, @@ -1728,8 +1734,9 @@ void replica_stub::open_replica( } if (rep == nullptr) { - LOG_WARNING( - "{}@{}: open replica failed, erase from opening replicas", id, _primary_address_str); + LOG_WARNING("{}@{}: open replica failed, erase from opening replicas", + id, + _primary_host_port_cache); zauto_write_lock l(_replicas_lock); CHECK_GT_MSG(_opening_replicas.erase(id), 0, "replica {} is not in _opening_replicas", id); METRIC_VAR_DECREMENT(opening_replicas); @@ -1749,12 +1756,12 @@ void replica_stub::open_replica( } if (nullptr != group_check) { - rpc::call_one_way_typed(_primary_address, + rpc::call_one_way_typed(primary_address(), RPC_LEARN_ADD_LEARNER, *group_check, group_check->config.pid.thread_hash()); } else if (nullptr != configuration_update) { - rpc::call_one_way_typed(_primary_address, + rpc::call_one_way_typed(primary_address(), RPC_CONFIG_PROPOSAL, *configuration_update, configuration_update->config.pid.thread_hash()); @@ -1969,9 +1976,9 @@ void replica_stub::notify_replica_state_update(const replica_configuration &conf tasking::enqueue( LPC_REPLICA_STATE_CHANGE_NOTIFICATION, &_tracker, - std::bind(_replica_state_subscriber, _primary_address, config, is_closing)); + std::bind(_replica_state_subscriber, _primary_host_port, config, is_closing)); } else { - _replica_state_subscriber(_primary_address, config, is_closing); + _replica_state_subscriber(_primary_host_port, config, is_closing); } } } @@ -2347,7 +2354,7 @@ replica_stub::exec_command_on_replica(const std::vector &args, std::stringstream query_state; query_state << processed << " processed, " << not_found << " not found"; for (auto &kv : results) { - query_state << "\n " << kv.first << "@" << _primary_address_str; + query_state << "\n " << kv.first << "@" << _primary_host_port_cache; if (kv.second.first != partition_status::PS_INVALID) query_state << "@" << (kv.second.first == partition_status::PS_PRIMARY ? "P" : "S"); query_state << " : " << kv.second.second; @@ -2493,7 +2500,7 @@ uint64_t replica_stub::gc_tcmalloc_memory(bool release_all) // // partition split // -void replica_stub::create_child_replica(rpc_address primary_address, +void replica_stub::create_child_replica(host_port primary_address, app_info app, ballot init_ballot, gpid child_gpid, @@ -2636,7 +2643,7 @@ void replica_stub::on_bulk_load(bulk_load_rpc rpc) const bulk_load_request &request = rpc.request(); bulk_load_response &response = rpc.response(); - LOG_INFO("[{}@{}]: receive bulk load request", request.pid, _primary_address_str); + LOG_INFO("[{}@{}]: receive bulk load request", request.pid, _primary_host_port_cache); replica_ptr rep = get_replica(request.pid); if (rep != nullptr) { rep->get_bulk_loader()->on_bulk_load(request, response); @@ -2651,10 +2658,11 @@ void replica_stub::on_group_bulk_load(group_bulk_load_rpc rpc) const group_bulk_load_request &request = rpc.request(); group_bulk_load_response &response = rpc.response(); - LOG_INFO("[{}@{}]: received group bulk load request, primary = {}, ballot = {}, " + LOG_INFO("[{}@{}]: received group bulk load request, primary = {}({}), ballot = {}, " "meta_bulk_load_status = {}", request.config.pid, - _primary_address_str, + _primary_host_port_cache, + request.config.hp_primary, request.config.primary, request.config.ballot, enum_to_string(request.meta_bulk_load_status)); @@ -2675,7 +2683,7 @@ void replica_stub::on_detect_hotkey(detect_hotkey_rpc rpc) LOG_INFO("[{}@{}]: received detect hotkey request, hotkey_type = {}, detect_action = {}", request.pid, - _primary_address_str, + _primary_host_port_cache, enum_to_string(request.type), enum_to_string(request.action)); diff --git a/src/replica/replica_stub.h b/src/replica/replica_stub.h index 08ae770a5b..79c0d8b4a0 100644 --- a/src/replica/replica_stub.h +++ b/src/replica/replica_stub.h @@ -50,17 +50,19 @@ #include "failure_detector/failure_detector_multimaster.h" #include "metadata_types.h" #include "partition_split_types.h" +#include "ranger/access_type.h" #include "replica.h" #include "replica/mutation_log.h" #include "replica_admin_types.h" -#include "ranger/access_type.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_holder.h" -#include "security/access_controller.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/serverlet.h" #include "runtime/task/task.h" #include "runtime/task/task_code.h" #include "runtime/task/task_tracker.h" +#include "security/access_controller.h" #include "utils/autoref_ptr.h" #include "utils/error_code.h" #include "utils/flags.h" @@ -111,7 +113,7 @@ class replica_split_manager; typedef std::unordered_map replicas; typedef std::function + ::dsn::host_port /*from*/, const replica_configuration & /*new_config*/, bool /*is_closing*/)> replica_state_subscriber; class replica_stub; @@ -194,8 +196,15 @@ class replica_stub : public serverlet, public ref_counter replication_options &options() { return _options; } const replication_options &options() const { return _options; } bool is_connected() const { return NS_Connected == _state; } - virtual rpc_address get_meta_server_address() const { return _failure_detector->get_servers(); } - rpc_address primary_address() const { return _primary_address; } + virtual rpc_address get_meta_server_address() const + { + return _dns_resolver->resolve_address(_failure_detector->get_servers()); + } + rpc_address primary_address() const + { + return _dns_resolver->resolve_address(_primary_host_port); + } + const host_port &primary_host_port() const { return _primary_host_port; } // // helper methods @@ -215,7 +224,7 @@ class replica_stub : public serverlet, public ref_counter // // called by parent partition, executed by child partition - void create_child_replica(dsn::rpc_address primary_address, + void create_child_replica(dsn::host_port primary_address, app_info app, ballot init_ballot, gpid child_gpid, @@ -296,6 +305,8 @@ class replica_stub : public serverlet, public ref_counter void on_nfs_get_file_size(const ::dsn::service::get_file_size_request &request, ::dsn::rpc_replier<::dsn::service::get_file_size_response> &reply); + std::shared_ptr get_dns_resolver() const { return _dns_resolver; } + private: enum replica_node_state { @@ -438,8 +449,9 @@ class replica_stub : public serverlet, public ref_counter closing_replicas _closing_replicas; closed_replicas _closed_replicas; - ::dsn::rpc_address _primary_address; - char _primary_address_str[64]; + ::dsn::host_port _primary_host_port; + // The stringify of '_primary_host_port', used by logging usually. + std::string _primary_host_port_cache; std::shared_ptr _failure_detector; mutable zlock _state_lock; @@ -533,6 +545,9 @@ class replica_stub : public serverlet, public ref_counter METRIC_VAR_DECLARE_gauge_int64(splitting_replicas_max_copy_file_bytes); dsn::task_tracker _tracker; + + // Resolve host_port to address. + std::shared_ptr _dns_resolver; }; } // namespace replication } // namespace dsn diff --git a/src/replica/split/replica_split_manager.cpp b/src/replica/split/replica_split_manager.cpp index 8df996e5d6..f3adee4f29 100644 --- a/src/replica/split/replica_split_manager.cpp +++ b/src/replica/split/replica_split_manager.cpp @@ -35,8 +35,10 @@ #include "replica/replica_stub.h" #include "replica/replication_app_base.h" #include "runtime/api_layer1.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_holder.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/async_calls.h" #include "runtime/task/task.h" #include "utils/autoref_ptr.h" @@ -144,17 +146,18 @@ void replica_split_manager::parent_start_split( _child_gpid = child_gpid; _child_init_ballot = get_ballot(); - LOG_INFO_PREFIX("start to add child({}), init_ballot={}, status={}, primary_address={}", + LOG_INFO_PREFIX("start to add child({}), init_ballot={}, status={}, primary_address={}({})", _child_gpid, _child_init_ballot, enum_to_string(status()), + request.config.hp_primary, request.config.primary); tasking::enqueue(LPC_CREATE_CHILD, tracker(), std::bind(&replica_stub::create_child_replica, _stub, - _replica->_config.primary, + _replica->_config.hp_primary, _replica->_app_info, _child_init_ballot, _child_gpid, @@ -165,7 +168,7 @@ void replica_split_manager::parent_start_split( // ThreadPool: THREAD_POOL_REPLICATION void replica_split_manager::child_init_replica(gpid parent_gpid, - rpc_address primary_address, + const host_port &primary_host_port, ballot init_ballot) // on child partition { FAIL_POINT_INJECT_F("replica_child_init_replica", [](absl::string_view) {}); @@ -181,7 +184,8 @@ void replica_split_manager::child_init_replica(gpid parent_gpid, // update replica config _replica->_config.ballot = init_ballot; - _replica->_config.primary = primary_address; + _replica->_config.primary = _stub->get_dns_resolver()->resolve_address(primary_host_port); + _replica->_config.__set_hp_primary(primary_host_port); _replica->_config.status = partition_status::PS_PARTITION_SPLIT; // initialize split context @@ -614,10 +618,12 @@ void replica_split_manager::child_notify_catch_up() // on child partition request->parent_gpid = _replica->_split_states.parent_gpid; request->child_gpid = get_gpid(); request->child_ballot = get_ballot(); - request->child_address = _stub->_primary_address; + request->child_address = _stub->primary_address(); + request->__set_hp_child_address(_stub->primary_host_port()); - LOG_INFO_PREFIX("send notification to primary parent[{}@{}], ballot={}", + LOG_INFO_PREFIX("send notification to primary parent[{}@{}({})], ballot={}", _replica->_split_states.parent_gpid, + _replica->_config.hp_primary, _replica->_config.primary, get_ballot()); @@ -647,8 +653,9 @@ void replica_split_manager::child_notify_catch_up() // on child partition child_handle_split_error("notify_primary_split_catch_up failed"); return; } - LOG_INFO_PREFIX("notify primary parent[{}@{}] catch up succeed", + LOG_INFO_PREFIX("notify primary parent[{}@{}({})] catch up succeed", _replica->_split_states.parent_gpid, + _replica->_config.hp_primary, _replica->_config.primary); }); } @@ -680,13 +687,17 @@ void replica_split_manager::parent_handle_child_catch_up( return; } + host_port hp_child_address; + GET_HOST_PORT(request, child_address, hp_child_address); + response.err = ERR_OK; - LOG_INFO_PREFIX("receive catch_up request from {}@{}, current ballot={}", + LOG_INFO_PREFIX("receive catch_up request from {}@{}({}), current ballot={}", request.child_gpid, + hp_child_address, request.child_address, request.child_ballot); - _replica->_primary_states.caught_up_children.insert(request.child_address); + _replica->_primary_states.caught_up_children.insert(hp_child_address); // _primary_states.statuses is a map structure: rpc address -> partition_status // it stores replica's rpc address and partition_status of this replica group for (auto &iter : _replica->_primary_states.statuses) { @@ -764,17 +775,17 @@ void replica_split_manager::update_child_group_partition_count( } if (!_replica->_primary_states.learners.empty() || - _replica->_primary_states.membership.secondaries.size() + 1 < + _replica->_primary_states.membership.hp_secondaries.size() + 1 < _replica->_primary_states.membership.max_replica_count) { LOG_ERROR_PREFIX("there are {} learners or not have enough secondaries(count is {})", _replica->_primary_states.learners.size(), - _replica->_primary_states.membership.secondaries.size()); + _replica->_primary_states.membership.hp_secondaries.size()); parent_handle_split_error( "update_child_group_partition_count failed, have learner or lack of secondary", true); return; } - auto not_replied_addresses = std::make_shared>(); + auto not_replied_addresses = std::make_shared>(); // _primary_states.statuses is a map structure: rpc address -> partition_status for (const auto &kv : _replica->_primary_states.statuses) { not_replied_addresses->insert(kv.first); @@ -787,22 +798,25 @@ void replica_split_manager::update_child_group_partition_count( // ThreadPool: THREAD_POOL_REPLICATION void replica_split_manager::parent_send_update_partition_count_request( - const rpc_address &address, + const host_port &hp, int32_t new_partition_count, - std::shared_ptr> ¬_replied_addresses) // on primary parent + std::shared_ptr> ¬_replied_addresses) // on primary parent { FAIL_POINT_INJECT_F("replica_parent_update_partition_count_request", [](absl::string_view) {}); CHECK_EQ_PREFIX(status(), partition_status::PS_PRIMARY); + auto address = _replica->get_dns_resolver()->resolve_address(hp); auto request = std::make_unique(); request->new_partition_count = new_partition_count; - request->target_address = address; + request->target = address; + request->__set_hp_target(hp); request->child_pid = _child_gpid; request->ballot = get_ballot(); LOG_INFO_PREFIX( - "send update child group partition count request to node({}), new partition_count = {}", + "send update child group partition count request to node({}({})), new partition_count = {}", + hp, address, new_partition_count); update_child_group_partition_count_rpc rpc(std::move(request), @@ -876,7 +890,7 @@ void replica_split_manager::on_update_child_group_partition_count_reply( error_code ec, const update_child_group_partition_count_request &request, const update_child_group_partition_count_response &response, - std::shared_ptr> ¬_replied_addresses) // on primary parent + std::shared_ptr> ¬_replied_addresses) // on primary parent { _replica->_checker.only_one_thread_access(); @@ -902,15 +916,16 @@ void replica_split_manager::on_update_child_group_partition_count_reply( error_code error = (ec == ERR_OK) ? response.err : ec; if (error == ERR_TIMEOUT) { LOG_WARNING_PREFIX( - "failed to update child node({}) partition_count, error = {}, wait and retry", - request.target_address, + "failed to update child node({}({})) partition_count, error = {}, wait and retry", + request.hp_target, + request.target, error); tasking::enqueue( LPC_PARTITION_SPLIT, tracker(), std::bind(&replica_split_manager::parent_send_update_partition_count_request, this, - request.target_address, + request.hp_target, request.new_partition_count, not_replied_addresses), get_gpid().thread_hash(), @@ -919,21 +934,23 @@ void replica_split_manager::on_update_child_group_partition_count_reply( } if (error != ERR_OK) { - LOG_ERROR_PREFIX("failed to update child node({}) partition_count({}), error = {}", - request.target_address, + LOG_ERROR_PREFIX("failed to update child node({}({})) partition_count({}), error = {}", + request.hp_target, + request.target, request.new_partition_count, error); parent_handle_split_error("on_update_child_group_partition_count_reply error", true); return; } - LOG_INFO_PREFIX("update node({}) child({}) partition_count({}) succeed", - request.target_address, + LOG_INFO_PREFIX("update node({}({})) child({}) partition_count({}) succeed", + request.hp_target, + request.target, request.child_pid, request.new_partition_count); // update group partition_count succeed - not_replied_addresses->erase(request.target_address); + not_replied_addresses->erase(request.hp_target); if (not_replied_addresses->empty()) { LOG_INFO_PREFIX("update child({}) group partition_count, new_partition_count = {}", request.child_pid, @@ -975,6 +992,7 @@ void replica_split_manager::register_child_on_meta(ballot b) // on primary paren child_config.ballot++; child_config.last_committed_decree = 0; child_config.last_drops.clear(); + child_config.hp_last_drops.clear(); child_config.pid.set_partition_index(_replica->_app_info.partition_count + get_gpid().get_partition_index()); @@ -982,7 +1000,8 @@ void replica_split_manager::register_child_on_meta(ballot b) // on primary paren request.app = _replica->_app_info; request.child_config = child_config; request.parent_config = _replica->_primary_states.membership; - request.primary_address = _stub->_primary_address; + request.primary_address = _stub->primary_address(); + request.__set_hp_primary(_stub->primary_host_port()); // reject client request _replica->update_local_configuration_with_no_ballot_change(partition_status::PS_INACTIVE); @@ -1006,7 +1025,8 @@ void replica_split_manager::parent_send_register_request( request.parent_config.ballot, request.child_config.ballot); - rpc_address meta_address(_stub->_failure_detector->get_servers()); + rpc_address meta_address( + _stub->get_dns_resolver()->resolve_address(_stub->_failure_detector->get_servers())); std::unique_ptr req = std::make_unique(request); register_child_rpc rpc(std::move(req), RPC_CM_REGISTER_CHILD_REPLICA, @@ -1206,13 +1226,13 @@ void replica_split_manager::trigger_primary_parent_split( _meta_split_status = meta_split_status; if (meta_split_status == split_status::SPLITTING) { if (!_replica->_primary_states.learners.empty() || - _replica->_primary_states.membership.secondaries.size() + 1 < + _replica->_primary_states.membership.hp_secondaries.size() + 1 < _replica->_primary_states.membership.max_replica_count) { LOG_WARNING_PREFIX( "there are {} learners or not have enough secondaries(count is {}), wait for " "next round", _replica->_primary_states.learners.size(), - _replica->_primary_states.membership.secondaries.size()); + _replica->_primary_states.membership.hp_secondaries.size()); return; } @@ -1479,7 +1499,8 @@ void replica_split_manager::primary_parent_handle_stop_split( return; } - _replica->_primary_states.split_stopped_secondary.insert(req->node); + _replica->_primary_states.split_stopped_secondary.insert( + req->__isset.hp_node ? req->hp_node : host_port(req->node)); auto count = 0; for (auto &iter : _replica->_primary_states.statuses) { if (iter.second == partition_status::PS_SECONDARY && @@ -1500,7 +1521,8 @@ void replica_split_manager::parent_send_notify_stop_request( split_status::type meta_split_status) // on primary parent { FAIL_POINT_INJECT_F("replica_parent_send_notify_stop_request", [](absl::string_view) {}); - rpc_address meta_address(_stub->_failure_detector->get_servers()); + rpc_address meta_address( + _stub->get_dns_resolver()->resolve_address(_stub->_failure_detector->get_servers())); std::unique_ptr req = std::make_unique(); req->app_name = _replica->_app_info.app_name; req->parent_gpid = get_gpid(); @@ -1531,7 +1553,8 @@ void replica_split_manager::query_child_state() // on primary parent request->pid = get_gpid(); request->partition_count = _replica->_app_info.partition_count; - rpc_address meta_address(_stub->_failure_detector->get_servers()); + rpc_address meta_address( + _stub->get_dns_resolver()->resolve_address(_stub->_failure_detector->get_servers())); LOG_INFO_PREFIX("send query child partition state request to meta server({})", meta_address); query_child_state_rpc rpc( std::move(request), RPC_CM_QUERY_CHILD_STATE, 0_ms, 0, get_gpid().thread_hash()); diff --git a/src/replica/split/replica_split_manager.h b/src/replica/split/replica_split_manager.h index 7435e1705d..e3b963f6fc 100644 --- a/src/replica/split/replica_split_manager.h +++ b/src/replica/split/replica_split_manager.h @@ -37,7 +37,7 @@ namespace dsn { class partition_configuration; -class rpc_address; +class host_port; class task_tracker; namespace replication { @@ -76,7 +76,7 @@ class replica_split_manager : replica_base void parent_start_split(const group_check_request &request); // child replica initialize config and state info - void child_init_replica(gpid parent_gpid, rpc_address primary_address, ballot init_ballot); + void child_init_replica(gpid parent_gpid, const host_port &primary_address, ballot init_ballot); void parent_prepare_states(const std::string &dir); @@ -123,9 +123,9 @@ class replica_split_manager : replica_base void update_child_group_partition_count(int32_t new_partition_count); void parent_send_update_partition_count_request( - const rpc_address &address, + const host_port &hp, int32_t new_partition_count, - std::shared_ptr> ¬_replied_addresses); + std::shared_ptr> ¬_replied_addresses); // child update its partition_count void @@ -136,7 +136,7 @@ class replica_split_manager : replica_base error_code ec, const update_child_group_partition_count_request &request, const update_child_group_partition_count_response &response, - std::shared_ptr> ¬_replied_addresses); + std::shared_ptr> ¬_replied_addresses); // all replicas update partition_count in memory and disk void update_local_partition_count(int32_t new_partition_count); diff --git a/src/replica/split/test/replica_split_test.cpp b/src/replica/split/test/replica_split_test.cpp index 4d7acae743..eee80cd81b 100644 --- a/src/replica/split/test/replica_split_test.cpp +++ b/src/replica/split/test/replica_split_test.cpp @@ -41,6 +41,7 @@ #include "replica/test/mock_utils.h" #include "replica/test/replica_test_base.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task.h" #include "runtime/task/task_tracker.h" #include "utils/autoref_ptr.h" @@ -189,10 +190,13 @@ class replica_split_test : public replica_test_base config.max_replica_count = 3; config.pid = PARENT_GPID; config.ballot = INIT_BALLOT; - config.primary = PRIMARY; - config.secondaries.emplace_back(SECONDARY); + config.hp_primary = PRIMARY; + config.primary = PRIMARY_ADDR; + config.__set_hp_secondaries({SECONDARY}); + config.secondaries.emplace_back(SECONDARY_ADDR); if (!lack_of_secondary) { - config.secondaries.emplace_back(SECONDARY2); + config.secondaries.emplace_back(SECONDARY_ADDR2); + config.hp_secondaries.emplace_back(SECONDARY2); } _parent_replica->set_primary_partition_configuration(config); } @@ -202,7 +206,8 @@ class replica_split_test : public replica_test_base { req.child_pid = CHILD_GPID; req.ballot = b; - req.target_address = PRIMARY; + req.target = PRIMARY_ADDR; + req.__set_hp_target(PRIMARY); req.new_partition_count = NEW_PARTITION_COUNT; } @@ -293,7 +298,8 @@ class replica_split_test : public replica_test_base req.child_gpid = CHILD_GPID; req.parent_gpid = PARENT_GPID; req.child_ballot = child_ballot; - req.child_address = PRIMARY; + req.child_address = PRIMARY_ADDR; + req.__set_hp_child_address(PRIMARY); notify_cacth_up_response resp; _parent_split_mgr->parent_handle_child_catch_up(req, resp); @@ -325,7 +331,7 @@ class replica_split_test : public replica_test_base mock_update_child_partition_count_request(req, INIT_BALLOT); update_child_group_partition_count_response resp; resp.err = resp_err; - auto not_replied_addresses = std::make_shared>(); + auto not_replied_addresses = std::make_shared>(); not_replied_addresses->insert(PRIMARY); _parent_split_mgr->on_update_child_group_partition_count_reply( @@ -345,7 +351,7 @@ class replica_split_test : public replica_test_base void test_on_register_child_reply(partition_status::type status, dsn::error_code resp_err) { stub->set_state_connected(); - stub->set_rpc_address(PRIMARY); + stub->set_host_port(PRIMARY); mock_parent_split_context(status); _parent_replica->_primary_states.sync_send_write_request = true; _parent_split_mgr->_partition_version = -1; @@ -356,11 +362,13 @@ class replica_split_test : public replica_test_base req.parent_config.pid = PARENT_GPID; req.parent_config.ballot = INIT_BALLOT; req.parent_config.last_committed_decree = DECREE; - req.parent_config.primary = PRIMARY; + req.parent_config.primary = PRIMARY_ADDR; + req.parent_config.__set_hp_primary(PRIMARY); req.child_config.pid = CHILD_GPID; req.child_config.ballot = INIT_BALLOT + 1; req.child_config.last_committed_decree = 0; - req.primary_address = PRIMARY; + req.primary_address = PRIMARY_ADDR; + req.__set_hp_primary(PRIMARY); register_child_response resp; resp.err = resp_err; @@ -394,7 +402,8 @@ class replica_split_test : public replica_test_base req.app = _parent_replica->_app_info; req.config.ballot = INIT_BALLOT; req.config.status = partition_status::PS_SECONDARY; - req.node = SECONDARY; + req.node = SECONDARY_ADDR; + req.__set_hp_node(SECONDARY); if (meta_split_status == split_status::PAUSING || meta_split_status == split_status::CANCELING) { req.__set_meta_split_status(meta_split_status); @@ -426,7 +435,8 @@ class replica_split_test : public replica_test_base std::shared_ptr req = std::make_shared(); std::shared_ptr resp = std::make_shared(); - req->node = SECONDARY; + req->node = SECONDARY_ADDR; + req->__set_hp_node(SECONDARY); if (meta_split_status != split_status::NOT_SPLIT) { req->__set_meta_split_status(meta_split_status); } @@ -525,9 +535,12 @@ class replica_split_test : public replica_test_base const int32_t APP_ID = 2; const int32_t OLD_PARTITION_COUNT = 8; const int32_t NEW_PARTITION_COUNT = 16; - const rpc_address PRIMARY = rpc_address("127.0.0.1", 18230); - const rpc_address SECONDARY = rpc_address("127.0.0.2", 10058); - const rpc_address SECONDARY2 = rpc_address("127.0.0.3", 10805); + const host_port PRIMARY = host_port("localhost", 18230); + const rpc_address PRIMARY_ADDR = rpc_address("127.0.0.1", 18230); + const host_port SECONDARY = host_port("localhost", 10058); + const rpc_address SECONDARY_ADDR = rpc_address("127.0.0.1", 10058); + const host_port SECONDARY2 = host_port("localhost", 10805); + const rpc_address SECONDARY_ADDR2 = rpc_address("127.0.0.1", 10805); const gpid PARENT_GPID = gpid(APP_ID, 1); const gpid CHILD_GPID = gpid(APP_ID, 9); const ballot INIT_BALLOT = 3; diff --git a/src/replica/storage/simple_kv/simple_kv.app.example.h b/src/replica/storage/simple_kv/simple_kv.app.example.h index 578a623ad3..edd7707d80 100644 --- a/src/replica/storage/simple_kv/simple_kv.app.example.h +++ b/src/replica/storage/simple_kv/simple_kv.app.example.h @@ -45,9 +45,9 @@ class simple_kv_client_app : public ::dsn::service_app return ::dsn::ERR_INVALID_PARAMETERS; printf("%s %s %s\n", args[1].c_str(), args[2].c_str(), args[3].c_str()); - dsn::rpc_address meta; - meta.from_string_ipv4(args[2].c_str()); - _simple_kv_client.reset(new simple_kv_client(args[1].c_str(), {meta}, args[3].c_str())); + dsn::host_port hp; + hp.from_string(args[2].c_str()); + _simple_kv_client.reset(new simple_kv_client(args[1].c_str(), {hp}, args[3].c_str())); _timer = ::dsn::tasking::enqueue_timer(LPC_SIMPLE_KV_TEST_TIMER, &_tracker, diff --git a/src/replica/storage/simple_kv/simple_kv.client.h b/src/replica/storage/simple_kv/simple_kv.client.h index 0825f712f5..89d2a49ce4 100644 --- a/src/replica/storage/simple_kv/simple_kv.client.h +++ b/src/replica/storage/simple_kv/simple_kv.client.h @@ -27,6 +27,7 @@ #pragma once #include #include "utils/optional.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/task/async_calls.h" #include "client/partition_resolver.h" #include "simple_kv.code.definition.h" @@ -39,10 +40,12 @@ class simple_kv_client { public: simple_kv_client(const char *cluster_name, - const std::vector &meta_list, + const std::vector &meta_list, const char *app_name) { - _resolver = partition_resolver::get_resolver(cluster_name, meta_list, app_name); + _dns_resolver = std::make_shared(); + _resolver = + partition_resolver::get_resolver(cluster_name, meta_list, app_name, _dns_resolver); } simple_kv_client() {} @@ -152,6 +155,7 @@ class simple_kv_client private: dsn::replication::partition_resolver_ptr _resolver; + std::shared_ptr _dns_resolver; }; } // namespace application } // namespace replication diff --git a/src/replica/storage/simple_kv/test/case.cpp b/src/replica/storage/simple_kv/test/case.cpp index 6446381373..99f338f3ec 100644 --- a/src/replica/storage/simple_kv/test/case.cpp +++ b/src/replica/storage/simple_kv/test/case.cpp @@ -534,8 +534,8 @@ void event_on_rpc::init(message_ex *msg, task *tsk) if (msg != nullptr) { _trace_id = fmt::sprintf("%016llx", msg->header->trace_id); _rpc_name = msg->header->rpc_name; - _from = address_to_node(msg->header->from_address); - _to = address_to_node(msg->to_address); + _from = address_to_node(host_port(msg->header->from_address)); + _to = address_to_node(msg->to_host_port); } } @@ -914,9 +914,9 @@ void client_case_line::get_read_params(int &id, std::string &key, int &timeout_m timeout_ms = _timeout; } -void client_case_line::get_replica_config_params(rpc_address &receiver, +void client_case_line::get_replica_config_params(host_port &receiver, dsn::replication::config_type::type &type, - rpc_address &node) const + host_port &node) const { CHECK_EQ(_type, replica_config); receiver = _config_receiver; @@ -1166,9 +1166,9 @@ bool test_case::check_client_write(int &id, std::string &key, std::string &value return true; } -bool test_case::check_replica_config(rpc_address &receiver, +bool test_case::check_replica_config(host_port &receiver, dsn::replication::config_type::type &type, - rpc_address &node) + host_port &node) { if (!check_client_instruction(client_case_line::replica_config)) return false; diff --git a/src/replica/storage/simple_kv/test/case.h b/src/replica/storage/simple_kv/test/case.h index 3f549ebd16..8bfd623989 100644 --- a/src/replica/storage/simple_kv/test/case.h +++ b/src/replica/storage/simple_kv/test/case.h @@ -35,7 +35,7 @@ #include "common.h" #include "meta_admin_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/error_code.h" #include "utils/fmt_utils.h" #include "utils/singleton.h" @@ -415,9 +415,9 @@ class client_case_line : public case_line bool parse_type_name(const std::string &name); void get_write_params(int &id, std::string &key, std::string &value, int &timeout_ms) const; void get_read_params(int &id, std::string &key, int &timeout_ms) const; - void get_replica_config_params(rpc_address &receiver, + void get_replica_config_params(host_port &receiver, dsn::replication::config_type::type &type, - rpc_address &node) const; + host_port &node) const; bool check_write_result(int id, ::dsn::error_code err, int32_t resp); bool check_read_result(int id, ::dsn::error_code err, const std::string &resp); @@ -434,9 +434,9 @@ class client_case_line : public case_line int _write_resp; std::string _read_resp; - rpc_address _config_receiver; + host_port _config_receiver; dsn::replication::config_type::type _config_type; - rpc_address _config_node; + host_port _config_node; }; USER_DEFINED_ENUM_FORMATTER(client_case_line::client_type) @@ -465,9 +465,9 @@ class test_case : public dsn::utils::singleton void wait_check_client(); void notify_check_client(); bool check_client_write(int &id, std::string &key, std::string &value, int &timeout_ms); - bool check_replica_config(rpc_address &receiver, + bool check_replica_config(host_port &receiver, dsn::replication::config_type::type &type, - rpc_address &node); + host_port &node); bool check_client_read(int &id, std::string &key, int &timeout_ms); void on_end_write(int id, ::dsn::error_code err, int32_t resp); void on_end_read(int id, ::dsn::error_code err, const std::string &resp); diff --git a/src/replica/storage/simple_kv/test/checker.cpp b/src/replica/storage/simple_kv/test/checker.cpp index 427126ad3c..9415962cd0 100644 --- a/src/replica/storage/simple_kv/test/checker.cpp +++ b/src/replica/storage/simple_kv/test/checker.cpp @@ -49,6 +49,8 @@ #include "replica/replica_stub.h" #include "replica/replication_service_app.h" #include "replica/storage/simple_kv/test/common.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_engine.h" #include "runtime/service_app.h" #include "runtime/service_engine.h" @@ -72,7 +74,7 @@ class checker_partition_guardian : public partition_guardian static bool s_disable_balancer; public: - checker_partition_guardian(meta_service *svc) : partition_guardian(svc) {} + checker_partition_guardian(meta_service *svc) : partition_guardian(svc), _svc(svc) {} pc_status cure(meta_view view, const dsn::gpid &gpid, configuration_proposal_action &action) override { @@ -82,22 +84,26 @@ class checker_partition_guardian : public partition_guardian return pc_status::healthy; pc_status result; - if (pc.primary.is_invalid()) { - if (pc.secondaries.size() > 0) { + if (pc.hp_primary.is_invalid()) { + if (pc.hp_secondaries.size() > 0) { action.node = pc.secondaries[0]; - for (unsigned int i = 1; i < pc.secondaries.size(); ++i) - if (pc.secondaries[i] < action.node) + action.__set_hp_node(pc.hp_secondaries[0]); + for (unsigned int i = 1; i < pc.hp_secondaries.size(); ++i) + if (pc.hp_secondaries[i] < action.hp_node) { action.node = pc.secondaries[i]; + action.hp_node = pc.hp_secondaries[i]; + } action.type = config_type::CT_UPGRADE_TO_PRIMARY; result = pc_status::ill; } - else if (pc.last_drops.size() == 0) { - std::vector sort_result; + else if (pc.hp_last_drops.size() == 0) { + std::vector sort_result; sort_alive_nodes(*view.nodes, server_load_balancer::primary_comparator(*view.nodes), sort_result); - action.node = sort_result[0]; + action.node = _svc->get_dns_resolver()->resolve_address(sort_result[0]); + action.__set_hp_node(sort_result[0]); action.type = config_type::CT_ASSIGN_PRIMARY; result = pc_status::ill; } @@ -105,28 +111,33 @@ class checker_partition_guardian : public partition_guardian // DDD else { action.node = *pc.last_drops.rbegin(); + action.__set_hp_node(*pc.hp_last_drops.rbegin()); action.type = config_type::CT_ASSIGN_PRIMARY; - LOG_ERROR("{} enters DDD state, we are waiting for its last primary node {} to " + LOG_ERROR("{} enters DDD state, we are waiting for its last primary node {}({}) to " "come back ...", pc.pid, + action.hp_node, action.node); result = pc_status::dead; } action.target = action.node; + action.__set_hp_target(action.hp_node); } - else if (static_cast(pc.secondaries.size()) + 1 < pc.max_replica_count) { - std::vector sort_result; + else if (static_cast(pc.hp_secondaries.size()) + 1 < pc.max_replica_count) { + std::vector sort_result; sort_alive_nodes( *view.nodes, server_load_balancer::partition_comparator(*view.nodes), sort_result); for (auto &node : sort_result) { if (!is_member(pc, node)) { - action.node = node; + action.node = _svc->get_dns_resolver()->resolve_address(node); + action.__set_hp_node(node); break; } } action.target = pc.primary; + action.__set_hp_target(pc.hp_primary); action.type = config_type::CT_ADD_SECONDARY; result = pc_status::ill; } else { @@ -135,10 +146,10 @@ class checker_partition_guardian : public partition_guardian return result; } - typedef std::function node_comparator; + typedef std::function node_comparator; static void sort_alive_nodes(const node_mapper &nodes, const node_comparator &cmp, - std::vector &sorted_node) + std::vector &sorted_node) { sorted_node.clear(); sorted_node.reserve(nodes.size()); @@ -149,6 +160,8 @@ class checker_partition_guardian : public partition_guardian } std::sort(sorted_node.begin(), sorted_node.end(), cmp); } + + meta_service *_svc; }; bool test_checker::s_inited = false; @@ -203,7 +216,7 @@ bool test_checker::init(const std::string &name, const std::vectorid(); std::string name = node.second->full_name(); - rpc_address paddr = node.second->rpc()->primary_address(); + host_port paddr = node.second->rpc()->primary_host_port(); int port = paddr.port(); _node_to_address[name] = paddr; LOG_INFO("=== node_to_address[{}]={}", name, paddr); @@ -265,7 +278,7 @@ void test_checker::check() } } -void test_checker::on_replica_state_change(::dsn::rpc_address from, +void test_checker::on_replica_state_change(::dsn::host_port from, const replica_configuration &new_config, bool is_closing) { @@ -374,7 +387,7 @@ bool test_checker::check_replica_state(int primary_count, int secondary_count, i return p == primary_count && s == secondary_count && i == inactive_count; } -std::string test_checker::address_to_node_name(rpc_address addr) +std::string test_checker::address_to_node_name(host_port addr) { auto find = _address_to_node.find(addr.port()); if (find != _address_to_node.end()) @@ -382,12 +395,12 @@ std::string test_checker::address_to_node_name(rpc_address addr) return "node@" + boost::lexical_cast(addr.port()); } -rpc_address test_checker::node_name_to_address(const std::string &name) +host_port test_checker::node_name_to_address(const std::string &name) { auto find = _node_to_address.find(name); if (find != _node_to_address.end()) return find->second; - return rpc_address(); + return host_port(); } void install_checkers() diff --git a/src/replica/storage/simple_kv/test/checker.h b/src/replica/storage/simple_kv/test/checker.h index 0578ac1330..509efcb01d 100644 --- a/src/replica/storage/simple_kv/test/checker.h +++ b/src/replica/storage/simple_kv/test/checker.h @@ -33,7 +33,7 @@ #include "common.h" #include "meta/meta_data.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/simulator.h" #include "utils/singleton.h" @@ -73,10 +73,10 @@ class test_checker : public dsn::utils::singleton bool check_replica_state(int primary_count, int secondary_count, int inactive_count); - std::string address_to_node_name(rpc_address addr); - rpc_address node_name_to_address(const std::string &name); + std::string address_to_node_name(host_port addr); + host_port node_name_to_address(const std::string &name); - void on_replica_state_change(::dsn::rpc_address from, + void on_replica_state_change(::dsn::host_port from, const replica_configuration &new_config, bool is_closing); void on_config_change(const app_mapper &new_config); @@ -92,8 +92,8 @@ class test_checker : public dsn::utils::singleton parti_config _last_config; state_snapshot _last_states; - std::map _node_to_address; // address is primary_address() - std::map _address_to_node; // port is enough for key + std::map _node_to_address; // address is primary_address() + std::map _address_to_node; // port is enough for key }; class wrap_checker : public dsn::tools::checker diff --git a/src/replica/storage/simple_kv/test/client.cpp b/src/replica/storage/simple_kv/test/client.cpp index f93dffe1b0..1de8421420 100644 --- a/src/replica/storage/simple_kv/test/client.cpp +++ b/src/replica/storage/simple_kv/test/client.cpp @@ -38,7 +38,9 @@ #include "replica/storage/simple_kv/simple_kv.client.h" #include "replica/storage/simple_kv/test/common.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/group_address.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/group_host_port.h" +#include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" @@ -55,7 +57,7 @@ using namespace dsn::replication::application; DEFINE_TASK_CODE(LPC_SIMPLE_KV_TEST, TASK_PRIORITY_COMMON, dsn::THREAD_POOL_DEFAULT) simple_kv_client_app::simple_kv_client_app(const service_app_info *info) - : ::dsn::service_app(info), _simple_kv_client(nullptr) + : ::dsn::service_app(info), _simple_kv_client(nullptr), _resolver(new dns_resolver()) { } @@ -66,10 +68,12 @@ ::dsn::error_code simple_kv_client_app::start(const std::vector &ar if (args.size() < 2) return ::dsn::ERR_INVALID_PARAMETERS; - std::vector meta_servers; + std::vector meta_servers; replica_helper::load_meta_servers(meta_servers); _meta_server_group.assign_group("meta_servers"); - _meta_server_group.group_address()->add_list(meta_servers); + for (const auto &hp : meta_servers) { + LOG_WARNING_IF(!_meta_server_group.group_host_port()->add(hp), "duplicate adress {}", hp); + } _simple_kv_client.reset( new application::simple_kv_client("mycluster", meta_servers, "simple_kv.instance0")); @@ -94,9 +98,9 @@ void simple_kv_client_app::run() std::string value; int timeout_ms; - rpc_address receiver; + host_port receiver; dsn::replication::config_type::type type; - rpc_address node; + host_port node; while (!g_done) { if (test_case::instance().check_client_write(id, key, value, timeout_ms)) { @@ -141,9 +145,9 @@ void simple_kv_client_app::begin_write(int id, std::chrono::milliseconds(timeout_ms)); } -void simple_kv_client_app::send_config_to_meta(const rpc_address &receiver, +void simple_kv_client_app::send_config_to_meta(const host_port &receiver, dsn::replication::config_type::type type, - const rpc_address &node) + const host_port &node) { dsn::message_ex *req = dsn::message_ex::create_request(RPC_CM_PROPOSE_BALANCER, 30000); @@ -151,15 +155,17 @@ void simple_kv_client_app::send_config_to_meta(const rpc_address &receiver, request.gpid = g_default_gpid; configuration_proposal_action act; - act.__set_target(receiver); - act.__set_node(node); + act.target = _resolver->resolve_address(receiver); + act.node = _resolver->resolve_address(node); + act.__set_hp_target(receiver); + act.__set_hp_node(node); act.__set_type(type); request.action_list.emplace_back(std::move(act)); request.__set_force(true); dsn::marshall(req, request); - dsn_rpc_call_one_way(_meta_server_group, req); + dsn_rpc_call_one_way(_resolver->resolve_address(_meta_server_group), req); } struct read_context diff --git a/src/replica/storage/simple_kv/test/client.h b/src/replica/storage/simple_kv/test/client.h index 66f71d5aae..37119dfe69 100644 --- a/src/replica/storage/simple_kv/test/client.h +++ b/src/replica/storage/simple_kv/test/client.h @@ -31,12 +31,14 @@ #include #include "meta_admin_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/service_app.h" #include "runtime/task/task_tracker.h" #include "utils/error_code.h" namespace dsn { +class dns_resolver; + namespace replication { namespace application { class simple_kv_client; @@ -57,15 +59,16 @@ class simple_kv_client_app : public ::dsn::service_app void begin_read(int id, const std::string &key, int timeout_ms); void begin_write(int id, const std::string &key, const std::string &value, int timeout_ms); - void send_config_to_meta(const rpc_address &receiver, + void send_config_to_meta(const host_port &receiver, dsn::replication::config_type::type type, - const rpc_address &node); + const host_port &node); private: std::unique_ptr _simple_kv_client; - rpc_address _meta_server_group; - rpc_address _service_addr; + host_port _meta_server_group; + host_port _service_addr; dsn::task_tracker _tracker; + std::unique_ptr _resolver; }; } } diff --git a/src/replica/storage/simple_kv/test/common.cpp b/src/replica/storage/simple_kv/test/common.cpp index 621218faf0..cc121d9904 100644 --- a/src/replica/storage/simple_kv/test/common.cpp +++ b/src/replica/storage/simple_kv/test/common.cpp @@ -86,7 +86,7 @@ partition_status::type partition_status_from_short_string(const std::string &str return partition_status::PS_INVALID; } -std::string address_to_node(rpc_address addr) +std::string address_to_node(host_port addr) { if (addr.is_invalid()) return "-"; @@ -94,10 +94,10 @@ std::string address_to_node(rpc_address addr) return test_checker::instance().address_to_node_name(addr); } -rpc_address node_to_address(const std::string &name) +host_port node_to_address(const std::string &name) { if (name == "-") - return rpc_address(); + return host_port(); CHECK(test_checker::s_inited, ""); return test_checker::instance().node_name_to_address(name); } @@ -325,8 +325,8 @@ void parti_config::convert_from(const partition_configuration &c) { pid = c.pid; ballot = c.ballot; - primary = address_to_node(c.primary); - for (auto &s : c.secondaries) + primary = address_to_node(c.hp_primary); + for (auto &s : c.hp_secondaries) secondaries.push_back(address_to_node(s)); std::sort(secondaries.begin(), secondaries.end()); } diff --git a/src/replica/storage/simple_kv/test/common.h b/src/replica/storage/simple_kv/test/common.h index b1ad6c6669..b5e60baf89 100644 --- a/src/replica/storage/simple_kv/test/common.h +++ b/src/replica/storage/simple_kv/test/common.h @@ -37,8 +37,8 @@ #include "common/gpid.h" #include "common/replication_other_types.h" #include "metadata_types.h" -#include "runtime/rpc/rpc_address.h" #include "utils/fmt_utils.h" +#include "runtime/rpc/rpc_host_port.h" namespace dsn { class partition_configuration; @@ -57,10 +57,10 @@ partition_status::type partition_status_from_short_string(const std::string &str // transfer primary_address to node_name // return "-" if addr.is_invalid() // return "node@port" if not found -std::string address_to_node(rpc_address addr); +std::string address_to_node(host_port addr); // transfer node_name to primary_address // return invalid addr if not found -rpc_address node_to_address(const std::string &name); +host_port node_to_address(const std::string &name); std::string gpid_to_string(gpid gpid); bool gpid_from_string(const std::string &str, gpid &gpid); diff --git a/src/replica/test/mock_utils.h b/src/replica/test/mock_utils.h index bf2e894002..23b9bb94fb 100644 --- a/src/replica/test/mock_utils.h +++ b/src/replica/test/mock_utils.h @@ -34,6 +34,7 @@ #include "replica/replica.h" #include "replica/replica_stub.h" #include "replica/backup/cold_backup_context.h" +#include "runtime/rpc/rpc_host_port.h" namespace dsn { namespace replication { @@ -179,11 +180,11 @@ class mock_replica : public replica { _primary_states.membership = pconfig; } - partition_bulk_load_state get_secondary_bulk_load_state(const rpc_address &node) + partition_bulk_load_state get_secondary_bulk_load_state(const host_port &node) { return _primary_states.secondary_bulk_load_states[node]; } - void set_secondary_bulk_load_state(const rpc_address &node, + void set_secondary_bulk_load_state(const host_port &node, const partition_bulk_load_state &state) { _primary_states.secondary_bulk_load_states[node] = state; @@ -275,7 +276,7 @@ class mock_replica_stub : public replica_stub void set_state_connected() { _state = replica_node_state::NS_Connected; } - rpc_address get_meta_server_address() const override { return rpc_address("127.0.0.2", 12321); } + rpc_address get_meta_server_address() const override { return rpc_address("127.0.0.1", 12321); } std::map mock_replicas; @@ -371,7 +372,7 @@ class mock_replica_stub : public replica_stub _bulk_load_downloading_count.store(count); } - void set_rpc_address(const rpc_address &address) { _primary_address = address; } + void set_host_port(const host_port &address) { _primary_host_port = address; } }; class mock_log_file : public log_file diff --git a/src/replica/test/replica_test.cpp b/src/replica/test/replica_test.cpp index eafbe9517a..1ef407dd00 100644 --- a/src/replica/test/replica_test.cpp +++ b/src/replica/test/replica_test.cpp @@ -269,6 +269,7 @@ TEST_P(replica_test, write_size_limited) auto write_request = dsn::message_ex::create_request(RPC_TEST); auto cleanup = dsn::defer([=]() { delete write_request; }); + header.context.u.is_forwarded = false; write_request->header = &header; std::unique_ptr sim_net( new tools::sim_network_provider(nullptr, nullptr)); diff --git a/src/runtime/api_layer1.h b/src/runtime/api_layer1.h index dc5e317032..6c2d675a13 100644 --- a/src/runtime/api_layer1.h +++ b/src/runtime/api_layer1.h @@ -33,6 +33,7 @@ #include "runtime/api_task.h" #include "common/gpid.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task_tracker.h" /*! @@ -215,6 +216,8 @@ replace the underneath implementation of the network (e.g., RDMA, simulated netw extern dsn::rpc_address dsn_primary_address(); +extern dsn::host_port dsn_primary_host_port(); + /*! @defgroup rpc-server Server-Side RPC Primitives diff --git a/src/runtime/rpc/asio_net_provider.cpp b/src/runtime/rpc/asio_net_provider.cpp index f06166d5ec..831b89525c 100644 --- a/src/runtime/rpc/asio_net_provider.cpp +++ b/src/runtime/rpc/asio_net_provider.cpp @@ -146,6 +146,7 @@ error_code asio_network_provider::start(rpc_channel channel, int port, bool clie channel); _address.assign_ipv4(get_local_ipv4(), port); + _hp = ::dsn::host_port(_address); if (!client_only) { auto v4_addr = boost::asio::ip::address_v4::any(); //(ntohl(_address.ip)); @@ -333,6 +334,19 @@ void asio_udp_provider::do_receive() return; } + // Get the remote endpoint of the socket. + boost::system::error_code ec; + auto remote = _socket->remote_endpoint(ec); + if (ec) { + LOG_ERROR("failed to get the remote endpoint: {}", ec.message()); + do_receive(); + return; + } + + auto ip = remote.address().to_v4().to_ulong(); + auto port = remote.port(); + ::dsn::rpc_address remote_addr = ::dsn::rpc_address(ip, port); + auto hdr_format = message_parser::get_header_type(_recv_reader._buffer.data()); if (NET_HDR_INVALID == hdr_format) { LOG_ERROR("{}: asio udp read failed: invalid header type '{}'", @@ -356,7 +370,25 @@ void asio_udp_provider::do_receive() return; } + if (msg->header->from_address != remote_addr) { + if (!msg->header->context.u.is_forwarded) { + msg->header->from_address = remote_addr; + LOG_DEBUG("{}: message's from_address {} is not equal to socket's remote_addr " + "{}, assign it to remote_addr.", + _address, + msg->header->from_address, + remote_addr); + } else { + LOG_DEBUG("{}: message's from_address {} is not equal to socket's remote_addr " + "{}, but it's forwarded message, ignore it!.", + _address, + msg->header->from_address, + remote_addr); + } + } + msg->to_address = _address; + msg->to_host_port = _hp; if (msg->header->context.u.is_request) { on_recv_request(msg, 0); } else { @@ -423,6 +455,8 @@ error_code asio_udp_provider::start(rpc_channel channel, int port, bool client_o } } + _hp = ::dsn::host_port(_address); + for (int i = 0; i < FLAGS_io_service_worker_count; i++) { _workers.push_back(std::make_shared([this, i]() { task::set_tls_dsn_context(node(), nullptr); diff --git a/src/runtime/rpc/asio_net_provider.h b/src/runtime/rpc/asio_net_provider.h index bbd200c1c5..774a6b5759 100644 --- a/src/runtime/rpc/asio_net_provider.h +++ b/src/runtime/rpc/asio_net_provider.h @@ -37,6 +37,7 @@ #include "runtime/rpc/message_parser.h" #include "runtime/rpc/network.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/task/task_spec.h" #include "utils/error_code.h" @@ -79,6 +80,7 @@ class asio_network_provider : public connection_oriented_network virtual error_code start(rpc_channel channel, int port, bool client_only) override; virtual ::dsn::rpc_address address() override { return _address; } + virtual ::dsn::host_port host_port() override { return _hp; } virtual rpc_session_ptr create_client_session(::dsn::rpc_address server_addr) override; private: @@ -93,6 +95,7 @@ class asio_network_provider : public connection_oriented_network std::vector> _io_services; std::vector> _workers; ::dsn::rpc_address _address; + ::dsn::host_port _hp; }; // TODO(Tangyanzhao): change the network model like asio_network_provider @@ -109,6 +112,8 @@ class asio_udp_provider : public network virtual ::dsn::rpc_address address() override { return _address; } + virtual ::dsn::host_port host_port() override { return _hp; } + virtual void inject_drop_message(message_ex *msg, bool is_send) override { // nothing to do for UDP @@ -125,6 +130,7 @@ class asio_udp_provider : public network std::shared_ptr _socket; std::vector> _workers; ::dsn::rpc_address _address; + ::dsn::host_port _hp; message_reader _recv_reader; ::dsn::utils::ex_lock_nr _lock; // [ diff --git a/src/runtime/rpc/dns_resolver.cpp b/src/runtime/rpc/dns_resolver.cpp index 06bf4aca17..c293b7798d 100644 --- a/src/runtime/rpc/dns_resolver.cpp +++ b/src/runtime/rpc/dns_resolver.cpp @@ -53,11 +53,6 @@ dns_resolver::dns_resolver() METRIC_VAR_INIT_server(dns_resolver_resolve_duration_ns), METRIC_VAR_INIT_server(dns_resolver_resolve_by_dns_duration_ns) { -#ifndef MOCK_TEST - static int only_one_instance = 0; - only_one_instance++; - CHECK_EQ_MSG(1, only_one_instance, "dns_resolver should only created once!"); -#endif } bool dns_resolver::get_cached_addresses(const host_port &hp, std::vector &addresses) diff --git a/src/runtime/rpc/group_host_port.h b/src/runtime/rpc/group_host_port.h index 1bc8989ea7..086887ccc2 100644 --- a/src/runtime/rpc/group_host_port.h +++ b/src/runtime/rpc/group_host_port.h @@ -23,7 +23,6 @@ #include #include "runtime/rpc/group_address.h" -#include "runtime/rpc/group_host_port.h" #include "runtime/rpc/rpc_host_port.h" #include "utils/autoref_ptr.h" #include "utils/fmt_logging.h" @@ -131,7 +130,10 @@ inline rpc_group_host_port::rpc_group_host_port(const rpc_group_address *g_addr) CHECK_TRUE(add(host_port(addr))); } _update_leader_automatically = g_addr->is_update_leader_automatically(); - set_leader(host_port(g_addr->leader())); + auto leader_addr = g_addr->leader(); + if (rpc_address::s_invalid_address != leader_addr) { + set_leader(host_port(leader_addr)); + } } inline rpc_group_host_port &rpc_group_host_port::operator=(const rpc_group_host_port &other) diff --git a/src/runtime/rpc/network.cpp b/src/runtime/rpc/network.cpp index c55826449c..403f3e1f69 100644 --- a/src/runtime/rpc/network.cpp +++ b/src/runtime/rpc/network.cpp @@ -387,10 +387,10 @@ rpc_session::rpc_session(connection_oriented_network &net, _message_sent(0), _net(net), _remote_addr(remote_addr), + _remote_host_port(host_port(remote_addr)), _max_buffer_block_count_per_send(net.max_buffer_block_count_per_send()), _reader(net.message_buffer_block_size()), _parser(parser), - _is_client(is_client), _matcher(_net.engine()->matcher()), _delay_server_receive_ms(0) @@ -432,9 +432,12 @@ void rpc_session::on_failure(bool is_write) bool rpc_session::on_recv_message(message_ex *msg, int delay_ms) { - if (msg->header->from_address.is_invalid()) + if (msg->header->from_address.is_invalid()) { msg->header->from_address = _remote_addr; + } + msg->to_address = _net.address(); + msg->to_host_port = _net.host_port(); msg->io_session = this; // ignore msg if join point return false diff --git a/src/runtime/rpc/network.h b/src/runtime/rpc/network.h index 05bfcda453..3ec3ef42fd 100644 --- a/src/runtime/rpc/network.h +++ b/src/runtime/rpc/network.h @@ -34,6 +34,7 @@ #include "rpc_address.h" #include "runtime/rpc/message_parser.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/task/task_spec.h" #include "utils/autoref_ptr.h" @@ -91,6 +92,7 @@ class network // the named address // virtual ::dsn::rpc_address address() = 0; + virtual ::dsn::host_port host_port() = 0; // // this is where the upper rpc engine calls down for a RPC call @@ -227,6 +229,7 @@ class rpc_session : public ref_counter bool is_client() const { return _is_client; } dsn::rpc_address remote_address() const { return _remote_addr; } + dsn::host_port remote_host_port() const { return _remote_host_port; } connection_oriented_network &net() const { return _net; } message_parser_ptr parser() const { return _parser; } @@ -328,6 +331,7 @@ class rpc_session : public ref_counter // constant info connection_oriented_network &_net; dsn::rpc_address _remote_addr; + dsn::host_port _remote_host_port; int _max_buffer_block_count_per_send; message_reader _reader; message_parser_ptr _parser; diff --git a/src/runtime/rpc/network.sim.cpp b/src/runtime/rpc/network.sim.cpp index a2c185dbc4..1dee19c0e5 100644 --- a/src/runtime/rpc/network.sim.cpp +++ b/src/runtime/rpc/network.sim.cpp @@ -86,6 +86,7 @@ static message_ex *virtual_send_message(message_ex *msg) blob bb(buffer, 0, msg->header->body_length + sizeof(message_header)); message_ex *recv_msg = message_ex::create_receive_message(bb); recv_msg->to_address = msg->to_address; + recv_msg->to_host_port = msg->to_host_port; msg->copy_to(*recv_msg); // extensible object state move @@ -160,6 +161,7 @@ sim_network_provider::sim_network_provider(rpc_engine *rpc, network *inner_provi : connection_oriented_network(rpc, inner_provider) { _address.assign_ipv4("localhost", 1); + _hp = ::dsn::host_port(_address); } error_code sim_network_provider::start(rpc_channel channel, int port, bool client_only) @@ -169,6 +171,7 @@ error_code sim_network_provider::start(rpc_channel channel, int port, bool clien channel); _address = ::dsn::rpc_address("localhost", port); + _hp = ::dsn::host_port(_address); auto hostname = boost::asio::ip::host_name(); if (!client_only) { for (int i = NET_HDR_INVALID + 1; i <= network_header_format::max_value(); i++) { diff --git a/src/runtime/rpc/network.sim.h b/src/runtime/rpc/network.sim.h index 279eafe742..ed91f1f09f 100644 --- a/src/runtime/rpc/network.sim.h +++ b/src/runtime/rpc/network.sim.h @@ -31,6 +31,7 @@ #include "runtime/rpc/message_parser.h" #include "runtime/rpc/network.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/task/task_spec.h" #include "utils/error_code.h" @@ -91,6 +92,7 @@ class sim_network_provider : public connection_oriented_network virtual error_code start(rpc_channel channel, int port, bool client_only); virtual ::dsn::rpc_address address() { return _address; } + virtual ::dsn::host_port host_port() { return _hp; } virtual rpc_session_ptr create_client_session(::dsn::rpc_address server_addr) { @@ -109,6 +111,7 @@ class sim_network_provider : public connection_oriented_network private: ::dsn::rpc_address _address; + ::dsn::host_port _hp; }; //------------- inline implementations ------------- diff --git a/src/runtime/rpc/rpc_address.h b/src/runtime/rpc/rpc_address.h index a2d002a5d0..c59df58b69 100644 --- a/src/runtime/rpc/rpc_address.h +++ b/src/runtime/rpc/rpc_address.h @@ -164,7 +164,7 @@ class rpc_address switch (type()) { case HOST_TYPE_IPV4: - return ip() == r.ip() && _addr.v4.port == r.port(); + return ip() == r.ip() && port() == r.port(); case HOST_TYPE_GROUP: return _addr.group.group == r._addr.group.group; default: diff --git a/src/runtime/rpc/rpc_engine.cpp b/src/runtime/rpc/rpc_engine.cpp index 97394d7f32..f97f00c763 100644 --- a/src/runtime/rpc/rpc_engine.cpp +++ b/src/runtime/rpc/rpc_engine.cpp @@ -36,6 +36,7 @@ #include "runtime/api_layer1.h" #include "runtime/global_config.h" #include "runtime/rpc/group_address.h" +#include "runtime/rpc/group_host_port.h" #include "runtime/rpc/network.h" #include "runtime/rpc/serialization.h" #include "runtime/service_engine.h" @@ -149,6 +150,7 @@ bool rpc_client_matcher::on_recv_reply(network *net, uint64_t key, message_ex *r case GRPC_TO_LEADER: if (req->server_address.group_address()->is_update_leader_automatically()) { req->server_address.group_address()->set_leader(addr); + req->server_host_port.group_host_port()->set_leader(host_port(addr)); } break; default: @@ -177,6 +179,8 @@ bool rpc_client_matcher::on_recv_reply(network *net, uint64_t key, message_ex *r req->server_address.group_address()->is_update_leader_automatically()) { req->server_address.group_address()->set_leader( reply->header->from_address); + req->server_host_port.group_host_port()->set_leader( + host_port(reply->header->from_address)); } break; default: @@ -517,9 +521,11 @@ error_code rpc_engine::start(const service_app_spec &aspec) _local_primary_address = _client_nets[NET_HDR_DSN][0]->address(); _local_primary_address.set_port(aspec.ports.size() > 0 ? *aspec.ports.begin() : aspec.id); + _local_primary_host_port = host_port(_local_primary_address); - LOG_INFO("=== service_node=[{}], primary_address=[{}] ===", + LOG_INFO("=== service_node=[{}], primary_address=[{}({})] ===", _node->full_name(), + _local_primary_host_port, _local_primary_address); _is_running = true; @@ -616,7 +622,7 @@ void rpc_engine::on_recv_request(network *net, message_ex *msg, int delay_ms) void rpc_engine::call(message_ex *request, const rpc_response_task_ptr &call) { auto &hdr = *request->header; - hdr.from_address = primary_address(); + hdr.from_address = _local_primary_address; hdr.trace_id = rand::next_u64(std::numeric_limits::min(), std::numeric_limits::max()); @@ -668,6 +674,7 @@ void rpc_engine::call_ip(rpc_address addr, } request->to_address = addr; + request->to_host_port = host_port(addr); auto sp = task_spec::get(request->local_rpc_code); auto &hdr = *request->header; @@ -833,7 +840,7 @@ void rpc_engine::forward(message_ex *request, rpc_address address) task_spec::get(request->local_rpc_code)->name, request->header->trace_id); CHECK_NE_MSG(address, - primary_address(), + _local_primary_address, "cannot forward msg {} (trace_id = {:#018x}) to the local node", task_spec::get(request->local_rpc_code)->name, request->header->trace_id); diff --git a/src/runtime/rpc/rpc_engine.h b/src/runtime/rpc/rpc_engine.h index 0613b721aa..93f128f457 100644 --- a/src/runtime/rpc/rpc_engine.h +++ b/src/runtime/rpc/rpc_engine.h @@ -36,6 +36,7 @@ #include "network.h" #include "runtime/api_task.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/task/task.h" #include "runtime/task/task_code.h" @@ -57,14 +58,13 @@ struct service_app_spec; // // client matcher for matching RPC request and RPC response, and handling timeout // (1) the whole network may share a single client matcher, -// (2) or we usually prefere each pair use a client matcher to have better inquery +// (2) or we usually prefer each pair use a client matcher to have better query // performance // (3) or we have certain cases we want RPC responses from node which is not the initial target node // the RPC request message is sent to. In this case, a shared rpc_engine level matcher is used. // // WE NOW USE option (3) so as to enable more features and the performance should not be degraded -// (due to -// less std::shared_ptr operations in rpc_timeout_task +// (due to less std::shared_ptr operations in rpc_timeout_task) // #define MATCHER_BUCKET_NR 13 class rpc_client_matcher : public ref_counter @@ -175,6 +175,7 @@ class rpc_engine // service_node *node() const { return _node; } ::dsn::rpc_address primary_address() const { return _local_primary_address; } + host_port primary_host_port() const { return _local_primary_host_port; } rpc_client_matcher *matcher() { return &_rpc_matcher; } // call with group address only @@ -202,6 +203,7 @@ class rpc_engine std::unordered_map>> _server_nets; // > ::dsn::rpc_address _local_primary_address; + host_port _local_primary_host_port; rpc_client_matcher _rpc_matcher; rpc_server_dispatcher _rpc_dispatcher; diff --git a/src/runtime/rpc/rpc_host_port.cpp b/src/runtime/rpc/rpc_host_port.cpp index 359cf5a3b1..28abc194e9 100644 --- a/src/runtime/rpc/rpc_host_port.cpp +++ b/src/runtime/rpc/rpc_host_port.cpp @@ -40,11 +40,14 @@ const host_port host_port::s_invalid_host_port; host_port::host_port(std::string host, uint16_t port) : _host(std::move(host)), _port(port), _type(HOST_TYPE_IPV4) { - CHECK_NE_MSG(rpc_address::ipv4_from_host(_host.c_str()), 0, "invalid hostname: {}", _host); + if (_host != "0.0.0.0") { + CHECK_NE_MSG(rpc_address::ipv4_from_host(_host.c_str()), 0, "invalid hostname: {}", _host); + } } host_port::host_port(rpc_address addr) { + reset(); switch (addr.type()) { case HOST_TYPE_IPV4: { CHECK(utils::hostname_from_ip(htonl(addr.ip()), &_host), @@ -194,4 +197,13 @@ error_s host_port::resolve_addresses(std::vector &addresses) const return error_s::ok(); } +void host_port::fill_host_ports_from_addresses(const std::vector &addr_v, + std::vector &hp_v) +{ + CHECK(hp_v.empty(), "optional host_port should be empty!"); + for (const auto &addr : addr_v) { + hp_v.emplace_back(host_port(addr)); + } +} + } // namespace dsn diff --git a/src/runtime/rpc/rpc_host_port.h b/src/runtime/rpc/rpc_host_port.h index 4cda4180bb..fd5502ddcb 100644 --- a/src/runtime/rpc/rpc_host_port.h +++ b/src/runtime/rpc/rpc_host_port.h @@ -42,6 +42,16 @@ class TProtocol; } // namespace thrift } // namespace apache +#define GET_HOST_PORT(obj, field, target) \ + do { \ + const auto &_obj = (obj); \ + if (_obj.__isset.hp_##field) { \ + target = _obj.hp_##field; \ + } else { \ + target = std::move(dsn::host_port(_obj.field)); \ + } \ + } while (0) + namespace dsn { class rpc_group_host_port; @@ -90,6 +100,9 @@ class host_port uint32_t read(::apache::thrift::protocol::TProtocol *iprot); uint32_t write(::apache::thrift::protocol::TProtocol *oprot) const; + static void fill_host_ports_from_addresses(const std::vector &addr_v, + /*output*/ std::vector &hp_v); + private: std::string _host; uint16_t _port = 0; diff --git a/src/runtime/rpc/rpc_message.cpp b/src/runtime/rpc/rpc_message.cpp index 7f34b6b857..33f2452e98 100644 --- a/src/runtime/rpc/rpc_message.cpp +++ b/src/runtime/rpc/rpc_message.cpp @@ -35,6 +35,7 @@ #include "network.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "utils/crc.h" #include "utils/flags.h" @@ -220,6 +221,7 @@ message_ex *message_ex::copy(bool clone_content, bool copy_for_receive) message_ex *msg = new message_ex(); msg->to_address = to_address; + msg->to_host_port = to_host_port; msg->local_rpc_code = local_rpc_code; msg->hdr_format = hdr_format; @@ -354,6 +356,7 @@ message_ex *message_ex::create_response() // the primary address. msg->header->from_address = to_address; msg->to_address = header->from_address; + msg->to_host_port = host_port(header->from_address); msg->io_session = io_session; msg->hdr_format = hdr_format; diff --git a/src/runtime/rpc/rpc_message.h b/src/runtime/rpc/rpc_message.h index d07cb0c5b6..63e4ecb5be 100644 --- a/src/runtime/rpc/rpc_message.h +++ b/src/runtime/rpc/rpc_message.h @@ -33,6 +33,7 @@ #include "common/gpid.h" #include "rpc_address.h" +#include "rpc_host_port.h" #include "runtime/task/task_code.h" #include "runtime/task/task_spec.h" #include "utils/autoref_ptr.h" @@ -95,13 +96,13 @@ typedef struct message_header // Attention: // here, from_address must be IPv4 address, namely we can regard from_address as a - // POD-type structure, so no memory-leak will occur even if we don't call it's + // POD-type structure, so no memory-leak will occur even if we don't call its // destructor. // // generally, it is the from_node's primary address, except the // case described in message_ex::create_response()'s ATTENTION comment. // - // in the forwarding case, the from_address is always the orignal client's address + // in the forwarding case, the from_address is always the original client's address rpc_address from_address; struct @@ -137,6 +138,8 @@ class message_ex : public ref_counter, public extensible_object rpc_session_ptr io_session; // send/recv session rpc_address to_address; // always ipv4/v6 address, it is the to_node's net address rpc_address server_address; // used by requests, and may be of uri/group address + host_port to_host_port; // fqdn from 'to_address' + host_port server_host_port; // fqdn from 'server_address' dsn::task_code local_rpc_code; network_header_format hdr_format; int send_retry_count; diff --git a/src/runtime/rpc/serialization.h b/src/runtime/rpc/serialization.h index 6d9eec3d5a..598dd0163a 100644 --- a/src/runtime/rpc/serialization.h +++ b/src/runtime/rpc/serialization.h @@ -28,10 +28,13 @@ #include "utils/utils.h" #include "rpc_address.h" +#include "rpc_host_port.h" #include "runtime/rpc/rpc_stream.h" #include "common/serialization_helper/thrift_helper.h" namespace dsn { +class partition_configuration; + namespace serialization { template @@ -101,4 +104,7 @@ inline void unmarshall(dsn::message_ex *msg, /*out*/ T &val) unmarshall(reader, val, (dsn_msg_serialize_format)msg->header->context.u.serialize_format); } +template <> +inline void unmarshall(dsn::message_ex *msg, /*out*/ partition_configuration &val); + } // namespace dsn diff --git a/src/runtime/service_api_c.cpp b/src/runtime/service_api_c.cpp index 271037aaf3..6bb392e90d 100644 --- a/src/runtime/service_api_c.cpp +++ b/src/runtime/service_api_c.cpp @@ -51,6 +51,7 @@ #include "runtime/global_config.h" #include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_engine.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "security/init.h" #include "security/negotiation_manager.h" @@ -140,6 +141,11 @@ void dsn_coredump() // rpc calls dsn::rpc_address dsn_primary_address() { return ::dsn::task::get_current_rpc()->primary_address(); } +dsn::host_port dsn_primary_host_port() +{ + return ::dsn::task::get_current_rpc()->primary_host_port(); +} + bool dsn_rpc_register_handler(dsn::task_code code, const char *extra_name, const dsn::rpc_request_handler &cb) @@ -158,6 +164,7 @@ void dsn_rpc_call(dsn::rpc_address server, dsn::rpc_response_task *rpc_call) auto msg = rpc_call->get_request(); msg->server_address = server; + msg->server_host_port = dsn::host_port(server); ::dsn::task::get_current_rpc()->call(msg, dsn::rpc_response_task_ptr(rpc_call)); } @@ -165,6 +172,7 @@ dsn::message_ex *dsn_rpc_call_wait(dsn::rpc_address server, dsn::message_ex *req { auto msg = ((::dsn::message_ex *)request); msg->server_address = server; + msg->server_host_port = dsn::host_port(server); ::dsn::rpc_response_task *rtask = new ::dsn::rpc_response_task(msg, nullptr, 0); rtask->add_ref(); @@ -185,6 +193,7 @@ void dsn_rpc_call_one_way(dsn::rpc_address server, dsn::message_ex *request) { auto msg = ((::dsn::message_ex *)request); msg->server_address = server; + msg->server_host_port = dsn::host_port(server); ::dsn::task::get_current_rpc()->call(msg, nullptr); } diff --git a/src/runtime/service_app.h b/src/runtime/service_app.h index 0e6e2cfc86..71e23fc6d7 100644 --- a/src/runtime/service_app.h +++ b/src/runtime/service_app.h @@ -34,7 +34,7 @@ #include "utils/threadpool_code.h" #include "runtime/task/task_code.h" #include "common/gpid.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "common/gpid.h" #include "utils/factory_store.h" #include @@ -88,14 +88,14 @@ class service_app } bool is_started() const { return _started; } - rpc_address primary_address() const { return _address; } - void set_address(const rpc_address &addr) { _address = addr; } + host_port primary_host_port() const { return _host_port; } + void set_host_port(const host_port &hp) { _host_port = hp; } void set_started(bool start_flag) { _started = start_flag; } const service_app_info &info() const; protected: const service_app_info *const _info; - rpc_address _address; + host_port _host_port; bool _started; }; diff --git a/src/runtime/service_engine.cpp b/src/runtime/service_engine.cpp index ee0c742d81..acfca18ab3 100644 --- a/src/runtime/service_engine.cpp +++ b/src/runtime/service_engine.cpp @@ -76,7 +76,7 @@ error_code service_node::init_rpc_engine() dsn::error_code service_node::start_app() { CHECK(_entity, "entity hasn't initialized"); - _entity->set_address(rpc()->primary_address()); + _entity->set_host_port(rpc()->primary_host_port()); std::vector args; utils::split_args(spec().arguments.c_str(), args); diff --git a/src/runtime/test/host_port_test.cpp b/src/runtime/test/host_port_test.cpp index 05d6982b2d..76f9e104a4 100644 --- a/src/runtime/test/host_port_test.cpp +++ b/src/runtime/test/host_port_test.cpp @@ -285,5 +285,4 @@ TEST(host_port_test, thrift_parser) send_and_check_host_port_by_serialize(hp2, DSF_THRIFT_BINARY); send_and_check_host_port_by_serialize(hp2, DSF_THRIFT_JSON); } - } // namespace dsn diff --git a/src/runtime/test/rpc.cpp b/src/runtime/test/rpc.cpp index 17c95fb5fe..1cfa27c63d 100644 --- a/src/runtime/test/rpc.cpp +++ b/src/runtime/test/rpc.cpp @@ -37,6 +37,7 @@ #include "gtest/gtest.h" #include "runtime/rpc/group_address.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/rpc/rpc_message.h" #include "runtime/rpc/serialization.h" #include "runtime/task/async_calls.h" @@ -94,10 +95,10 @@ TEST(core, group_address_talk_to_others) auto typed_callback = [addr](error_code err_code, const std::string &result) { EXPECT_EQ(ERR_OK, err_code); - dsn::rpc_address addr_got; + dsn::host_port hp_got; LOG_INFO("talk to others callback, result: {}", result); - EXPECT_TRUE(addr_got.from_string_ipv4(result.c_str())); - EXPECT_EQ(TEST_PORT_END, addr_got.port()); + EXPECT_TRUE(hp_got.from_string(result)); + EXPECT_EQ(TEST_PORT_END, hp_got.port()); }; ::dsn::task_ptr resp = ::dsn::rpc::call(addr, RPC_TEST_STRING_COMMAND, @@ -115,10 +116,10 @@ TEST(core, group_address_change_leader) auto typed_callback = [addr, &rpc_err](error_code err_code, const std::string &result) -> void { rpc_err = err_code; if (ERR_OK == err_code) { - ::dsn::rpc_address addr_got; + dsn::host_port hp_got; LOG_INFO("talk to others callback, result: {}", result); - EXPECT_TRUE(addr_got.from_string_ipv4(result.c_str())); - EXPECT_EQ(TEST_PORT_END, addr_got.port()); + EXPECT_TRUE(hp_got.from_string(result)); + EXPECT_EQ(TEST_PORT_END, hp_got.port()); } }; @@ -237,8 +238,7 @@ TEST(core, group_address_no_response_2) TEST(core, send_to_invalid_address) { ::dsn::rpc_address group = build_group(); - /* here we assume 10.255.254.253:32766 is not assigned */ - group.group_address()->set_leader(dsn::rpc_address("10.255.254.253", 32766)); + group.group_address()->set_leader(dsn::rpc_address("127.0.0.1", 32766)); rpc_reply_handler action_on_succeed = [](error_code err, dsn::message_ex *, dsn::message_ex *resp) { diff --git a/src/runtime/test_utils.h b/src/runtime/test_utils.h index 29520eee97..d43088a7e7 100644 --- a/src/runtime/test_utils.h +++ b/src/runtime/test_utils.h @@ -34,11 +34,11 @@ #include "utils/threadpool_code.h" #include "runtime/task/task_code.h" #include "common/gpid.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/serialization.h" #include "runtime/rpc/rpc_stream.h" #include "runtime/serverlet.h" #include "runtime/service_app.h" -#include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task.h" #include "runtime/task/task_worker.h" @@ -78,7 +78,9 @@ class test_client : public ::dsn::serverlet, public ::dsn::service_ { public: test_client(const service_app_info *info) - : ::dsn::serverlet("test-server"), ::dsn::service_app(info) + : ::dsn::serverlet("test-server"), + ::dsn::service_app(info), + _dns_resolver(new dns_resolver()) { } @@ -94,20 +96,20 @@ class test_client : public ::dsn::serverlet, public ::dsn::service_ ::dsn::unmarshall(message, command); if (command == "expect_talk_to_others") { - dsn::rpc_address next_addr = dsn::service_app::primary_address(); - if (next_addr.port() != TEST_PORT_END) { - next_addr.assign_ipv4(next_addr.ip(), next_addr.port() + 1); - LOG_INFO("test_client_server, talk_to_others: {}", next_addr); - dsn_rpc_forward(message, next_addr); + dsn::host_port next_hp = dsn::service_app::primary_host_port(); + if (next_hp.port() != TEST_PORT_END) { + next_hp = dsn::host_port(next_hp.host(), next_hp.port() + 1); + LOG_INFO("test_client_server, talk_to_others: {}", next_hp); + dsn_rpc_forward(message, _dns_resolver->resolve_address(next_hp)); } else { - LOG_INFO("test_client_server, talk_to_me: {}", next_addr); - reply(message, next_addr.to_std_string()); + LOG_INFO("test_client_server, talk_to_me: {}", next_hp); + reply(message, next_hp.to_string()); } } else if (command == "expect_no_reply") { - if (dsn::service_app::primary_address().port() == TEST_PORT_END) { + if (dsn::service_app::primary_host_port().port() == TEST_PORT_END) { LOG_INFO("test_client_server, talk_with_reply: {}", - dsn::service_app::primary_address()); - reply(message, dsn::service_app::primary_address().to_std_string()); + dsn::service_app::primary_host_port()); + reply(message, dsn::service_app::primary_host_port().to_string()); } } else if (command.substr(0, 5) == "echo ") { reply(message, command.substr(5)); @@ -157,4 +159,6 @@ class test_client : public ::dsn::serverlet, public ::dsn::service_ } ::dsn::error_code stop(bool cleanup = false) { return ERR_OK; } + + std::unique_ptr _dns_resolver; }; diff --git a/src/server/available_detector.h b/src/server/available_detector.h index 74a394406c..075c271dd2 100644 --- a/src/server/available_detector.h +++ b/src/server/available_detector.h @@ -27,7 +27,7 @@ #include "dsn.layer2_types.h" #include "perf_counter/perf_counter_wrapper.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task.h" #include "runtime/task/task_tracker.h" #include "utils/synchronize.h" @@ -76,7 +76,7 @@ class available_detector // client to access server. pegasus_client *_client; std::shared_ptr _ddl_client; - std::vector _meta_list; + std::vector _meta_list; ::dsn::utils::ex_lock_nr _alert_lock; // for record partition fail times. std::vector>> _fail_count; diff --git a/src/server/config.min.ini b/src/server/config.min.ini index 77480a8d68..8ae1e9bbba 100644 --- a/src/server/config.min.ini +++ b/src/server/config.min.ini @@ -43,7 +43,7 @@ logging_start_level = LOG_LEVEL_INFO [network] - primary_interface = lo + primary_interface = @LOCAL_HOSTNAME@ [block_service.local_service] type = local_service @@ -164,7 +164,7 @@ [pegasus.clusters] onebox = @LOCAL_HOSTNAME@:34601,@LOCAL_HOSTNAME@:34602,@LOCAL_HOSTNAME@:34603 - onebox2 = 0.0.0.0:35601 + onebox2 = 127.0.0.1:35601 # The group of clusters participating in duplication. # Each cluster is assigned with a unique cluster id [1, 127] to identify which cluster diff --git a/src/server/hotspot_partition_calculator.cpp b/src/server/hotspot_partition_calculator.cpp index 2d028c828c..f163f8200f 100644 --- a/src/server/hotspot_partition_calculator.cpp +++ b/src/server/hotspot_partition_calculator.cpp @@ -27,6 +27,7 @@ #include "common/gpid.h" #include "common/serialization_helper/dsn.layer2_types.h" #include "perf_counter/perf_counter.h" +#include "runtime/rpc/rpc_host_port.h" #include "server/hotspot_partition_stat.h" #include "shell/command_executor.h" #include "utils/error_code.h" @@ -218,20 +219,20 @@ void hotspot_partition_calculator::send_detect_hotkey_request( std::vector partitions; _shell_context->ddl_client->list_app(app_name, app_id, partition_count, partitions); - auto target_address = partitions[partition_index].primary; + auto target_hp = partitions[partition_index].hp_primary; dsn::replication::detect_hotkey_response resp; dsn::replication::detect_hotkey_request req; req.type = hotkey_type; req.action = action; req.pid = dsn::gpid(app_id, partition_index); - auto error = _shell_context->ddl_client->detect_hotkey(target_address, req, resp); + auto error = _shell_context->ddl_client->detect_hotkey(target_hp, req, resp); - LOG_INFO("{} {} hotkey detection in {}.{}, server address: {}", + LOG_INFO("{} {} hotkey detection in {}.{}, server host_port: {}", (action == dsn::replication::detect_action::STOP) ? "Stop" : "Start", (hotkey_type == dsn::replication::hotkey_type::WRITE) ? "write" : "read", app_name, partition_index, - target_address); + target_hp); if (error != dsn::ERR_OK) { LOG_ERROR("Hotkey detect rpc sending failed, in {}.{}, error_hint:{}", diff --git a/src/server/info_collector.cpp b/src/server/info_collector.cpp index 1c7f362288..fd51643f4f 100644 --- a/src/server/info_collector.cpp +++ b/src/server/info_collector.cpp @@ -32,7 +32,7 @@ #include "hotspot_partition_calculator.h" #include "pegasus/client.h" #include "result_writer.h" -#include "runtime/rpc/group_address.h" +#include "runtime/rpc/group_host_port.h" #include "runtime/task/async_calls.h" #include "runtime/task/task_code.h" #include "shell/command_executor.h" @@ -71,12 +71,12 @@ DSN_DEFINE_validator(usage_stat_app, info_collector::info_collector() { - std::vector<::dsn::rpc_address> meta_servers; + std::vector<::dsn::host_port> meta_servers; replica_helper::load_meta_servers(meta_servers); _meta_servers.assign_group("meta-servers"); for (auto &ms : meta_servers) { - CHECK(_meta_servers.group_address()->add(ms), ""); + CHECK(_meta_servers.group_host_port()->add(ms), ""); } _cluster_name = dsn::get_current_cluster_name(); diff --git a/src/server/info_collector.h b/src/server/info_collector.h index 649d0dee09..fe168ad7f1 100644 --- a/src/server/info_collector.h +++ b/src/server/info_collector.h @@ -29,7 +29,7 @@ #include "perf_counter/perf_counter.h" #include "perf_counter/perf_counter_wrapper.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task.h" #include "runtime/task/task_tracker.h" #include "shell/command_helper.h" @@ -213,7 +213,7 @@ class info_collector private: dsn::task_tracker _tracker; - ::dsn::rpc_address _meta_servers; + ::dsn::host_port _meta_servers; std::string _cluster_name; std::shared_ptr _shell_context; ::dsn::task_ptr _app_stat_timer_task; diff --git a/src/server/pegasus_server_impl_init.cpp b/src/server/pegasus_server_impl_init.cpp index 8cbd2214c3..a77d67da40 100644 --- a/src/server/pegasus_server_impl_init.cpp +++ b/src/server/pegasus_server_impl_init.cpp @@ -45,7 +45,7 @@ #include "pegasus_value_schema.h" #include "replica_admin_types.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "server/capacity_unit_calculator.h" // IWYU pragma: keep #include "server/key_ttl_compaction_filter.h" #include "server/meta_store.h" // IWYU pragma: keep @@ -626,7 +626,7 @@ pegasus_server_impl::pegasus_server_impl(dsn::replication::replica *r) METRIC_VAR_INIT_replica(rdb_bloom_filter_point_lookup_positives), METRIC_VAR_INIT_replica(rdb_bloom_filter_point_lookup_true_positives) { - _primary_address = dsn::rpc_address(dsn_primary_address()).to_string(); + _primary_address = dsn_primary_host_port().to_string(); _gpid = get_gpid(); _read_hotkey_collector = diff --git a/src/shell/command_executor.h b/src/shell/command_executor.h index cba3b0a5cf..1732724eaa 100644 --- a/src/shell/command_executor.h +++ b/src/shell/command_executor.h @@ -22,9 +22,11 @@ #include #include #include -#include "client/replication_ddl_client.h" #include +#include "client/replication_ddl_client.h" +#include "runtime/rpc/dns_resolver.h" + #include "sds/sds.h" struct command_executor; @@ -32,12 +34,16 @@ struct shell_context { std::string current_cluster_name; std::string current_app_name; - std::vector meta_list; + std::vector meta_list; std::unique_ptr ddl_client; pegasus::pegasus_client *pg_client; + std::unique_ptr resolver; bool escape_all; int timeout_ms; - shell_context() : pg_client(nullptr), escape_all(false), timeout_ms(5000) {} + shell_context() + : pg_client(nullptr), resolver(new dsn::dns_resolver()), escape_all(false), timeout_ms(5000) + { + } }; struct arguments diff --git a/src/shell/command_helper.h b/src/shell/command_helper.h index 3b030a1fc8..fa8cbb7e63 100644 --- a/src/shell/command_helper.h +++ b/src/shell/command_helper.h @@ -609,20 +609,21 @@ inline void scan_data_next(scan_data_context *context) struct node_desc { std::string desc; - dsn::rpc_address address; - node_desc(const std::string &s, const dsn::rpc_address &n) : desc(s), address(n) {} + dsn::host_port hp; + node_desc(const std::string &s, const dsn::host_port &n) : desc(s), hp(n) {} }; + // type: all | replica-server | meta-server inline bool fill_nodes(shell_context *sc, const std::string &type, std::vector &nodes) { if (type == "all" || type == "meta-server") { - for (auto &addr : sc->meta_list) { - nodes.emplace_back("meta-server", addr); + for (auto &hp : sc->meta_list) { + nodes.emplace_back("meta-server", hp); } } if (type == "all" || type == "replica-server") { - std::map rs_nodes; + std::map rs_nodes; ::dsn::error_code err = sc->ddl_client->list_nodes(dsn::replication::node_status::NS_ALIVE, rs_nodes); if (err != ::dsn::ERR_OK) { @@ -657,8 +658,11 @@ call_remote_command(shell_context *sc, results[i].second = err.to_string(); } }; - tasks[i] = dsn::dist::cmd::async_call_remote( - nodes[i].address, cmd, arguments, callback, std::chrono::milliseconds(5000)); + tasks[i] = dsn::dist::cmd::async_call_remote(sc->resolver->resolve_address(nodes[i].hp), + cmd, + arguments, + callback, + std::chrono::milliseconds(5000)); } for (int i = 0; i < nodes.size(); ++i) { tasks[i]->wait(); @@ -1026,24 +1030,22 @@ get_app_partitions(shell_context *sc, return true; } -inline bool decode_node_perf_counter_info(const dsn::rpc_address &node_addr, +inline bool decode_node_perf_counter_info(const dsn::host_port &hp, const std::pair &result, dsn::perf_counter_info &info) { if (!result.first) { - LOG_ERROR("query perf counter info from node {} failed", node_addr); + LOG_ERROR("query perf counter info from node {} failed", hp); return false; } dsn::blob bb(result.second.data(), 0, result.second.size()); if (!dsn::json::json_forwarder::decode(bb, info)) { - LOG_ERROR( - "decode perf counter info from node {} failed, result = {}", node_addr, result.second); + LOG_ERROR("decode perf counter info from node {} failed, result = {}", hp, result.second); return false; } if (info.result != "OK") { - LOG_ERROR("query perf counter info from node {} returns error, error = {}", - node_addr, - info.result); + LOG_ERROR( + "query perf counter info from node {} returns error, error = {}", hp, info.result); return false; } return true; @@ -1082,7 +1084,7 @@ inline bool get_app_partition_stat(shell_context *sc, for (int i = 0; i < nodes.size(); ++i) { // decode info of perf-counters on node i dsn::perf_counter_info info; - if (!decode_node_perf_counter_info(nodes[i].address, results[i], info)) { + if (!decode_node_perf_counter_info(nodes[i].hp, results[i], info)) { return false; } @@ -1097,7 +1099,7 @@ inline bool get_app_partition_stat(shell_context *sc, // only primary partition will be counted auto find = app_partitions.find(app_id_x); if (find != app_partitions.end() && - find->second[partition_index_x].primary == nodes[i].address) { + find->second[partition_index_x].hp_primary == nodes[i].hp) { row_data &row = rows[app_id_name[app_id_x]][partition_index_x]; row.row_name = std::to_string(partition_index_x); row.app_id = app_id_x; @@ -1169,9 +1171,9 @@ get_app_stat(shell_context *sc, const std::string &app_name, std::vectorsecond[partition_index_x]; - if (pc.primary != node_addr) + if (pc.hp_primary != hp) continue; update_app_pegasus_perf_counter(rows[app_row_idx[app_id_x]], counter_name, m.value); } @@ -1206,9 +1208,9 @@ get_app_stat(shell_context *sc, const std::string &app_name, std::vectorpartition_count); for (int i = 0; i < nodes.size(); ++i) { - dsn::rpc_address node_addr = nodes[i].address; + auto hp = nodes[i].hp; dsn::perf_counter_info info; - if (!decode_node_perf_counter_info(node_addr, results[i], info)) + if (!decode_node_perf_counter_info(hp, results[i], info)) return false; for (dsn::perf_counter_metric &m : info.counters) { int32_t app_id_x, partition_index_x; @@ -1218,7 +1220,7 @@ get_app_stat(shell_context *sc, const std::string &app_name, std::vectorresolver->resolve_address(hp).to_string(); for (dsn::perf_counter_metric &m : info.counters) { int32_t app_id, pidx; std::string counter_name; @@ -1332,10 +1334,10 @@ inline bool get_storage_size_stat(shell_context *sc, app_storage_size_stat &st_s sc, nodes, "perf-counters-by-prefix", {"replica*app.pegasus*disk.storage.sst(MB)"}); for (int i = 0; i < nodes.size(); ++i) { - dsn::rpc_address node_addr = nodes[i].address; + auto hp = nodes[i].hp; dsn::perf_counter_info info; - if (!decode_node_perf_counter_info(node_addr, results[i], info)) { - LOG_WARNING("decode perf counter from node({}) failed, just ignore it", node_addr); + if (!decode_node_perf_counter_info(hp, results[i], info)) { + LOG_WARNING("decode perf counter from node({}) failed, just ignore it", hp); continue; } for (dsn::perf_counter_metric &m : info.counters) { @@ -1350,7 +1352,7 @@ inline bool get_storage_size_stat(shell_context *sc, app_storage_size_stat &st_s if (find == app_partitions.end()) // app id not found continue; dsn::partition_configuration &pc = find->second[partition_index_x]; - if (pc.primary != node_addr) // not primary replica + if (pc.hp_primary != hp) // not primary replica continue; if (pc.partition_flags != 0) // already calculated continue; @@ -1372,11 +1374,15 @@ inline bool get_storage_size_stat(shell_context *sc, app_storage_size_stat &st_s inline configuration_proposal_action new_proposal_action(const dsn::rpc_address &target, const dsn::rpc_address &node, + const dsn::host_port &hp_target, + const dsn::host_port &hp_node, config_type::type type) { configuration_proposal_action act; act.__set_target(target); act.__set_node(node); + act.__set_hp_target(hp_target); + act.__set_hp_node(hp_node); act.__set_type(type); return act; } diff --git a/src/shell/command_utils.cpp b/src/shell/command_utils.cpp index acc5604f42..9888ce74b4 100644 --- a/src/shell/command_utils.cpp +++ b/src/shell/command_utils.cpp @@ -22,20 +22,20 @@ #include "client/replication_ddl_client.h" #include "command_executor.h" #include "meta_admin_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/error_code.h" bool validate_ip(shell_context *sc, const std::string &ip_str, - dsn::rpc_address &target_address, + dsn::host_port &target_hp, std::string &err_info) { - if (!target_address.from_string_ipv4(ip_str.c_str())) { - err_info = fmt::format("invalid ip:port={}, can't transform it into rpc_address", ip_str); + if (!target_hp.from_string(ip_str)) { + err_info = fmt::format("invalid ip:port={}, can't transform it into host_port", ip_str); return false; } - std::map nodes; + std::map nodes; auto error = sc->ddl_client->list_nodes(dsn::replication::node_status::NS_INVALID, nodes); if (error != dsn::ERR_OK) { err_info = fmt::format("list nodes failed, error={}", error.to_string()); @@ -43,7 +43,7 @@ bool validate_ip(shell_context *sc, } for (const auto &node : nodes) { - if (target_address == node.first) { + if (target_hp == node.first) { return true; } } diff --git a/src/shell/command_utils.h b/src/shell/command_utils.h index 441a228e00..717f517052 100644 --- a/src/shell/command_utils.h +++ b/src/shell/command_utils.h @@ -30,7 +30,7 @@ #include "utils/strings.h" namespace dsn { -class rpc_address; +class host_port; } struct shell_context; @@ -67,7 +67,7 @@ inline bool validate_cmd(const argh::parser &cmd, bool validate_ip(shell_context *sc, const std::string &ip_str, - /*out*/ dsn::rpc_address &target_address, + /*out*/ dsn::host_port &target_hp, /*out*/ std::string &err_info); #define verify_logged(exp, ...) \ diff --git a/src/shell/commands/data_operations.cpp b/src/shell/commands/data_operations.cpp index 7343e95cdb..926b799816 100644 --- a/src/shell/commands/data_operations.cpp +++ b/src/shell/commands/data_operations.cpp @@ -49,7 +49,7 @@ #include "pegasus_key_schema.h" #include "pegasus_utils.h" #include "rrdb/rrdb_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/task/async_calls.h" #include "shell/args.h" #include "shell/command_executor.h" @@ -2795,13 +2795,13 @@ bool calculate_hash_value(command_executor *e, shell_context *sc, arguments args tp.add_row_name_and_data("partition_index", partition_index); if (partitions.size() > partition_index) { ::dsn::partition_configuration &pc = partitions[partition_index]; - tp.add_row_name_and_data("primary", pc.primary.to_string()); + tp.add_row_name_and_data("primary", pc.hp_primary.to_string()); std::ostringstream oss; - for (int i = 0; i < pc.secondaries.size(); ++i) { + for (int i = 0; i < pc.hp_secondaries.size(); ++i) { if (i != 0) oss << ","; - oss << pc.secondaries[i].to_string(); + oss << pc.hp_secondaries[i]; } tp.add_row_name_and_data("secondaries", oss.str()); } diff --git a/src/shell/commands/detect_hotkey.cpp b/src/shell/commands/detect_hotkey.cpp index 6d300d31d3..c78f906c67 100644 --- a/src/shell/commands/detect_hotkey.cpp +++ b/src/shell/commands/detect_hotkey.cpp @@ -24,7 +24,7 @@ #include "client/replication_ddl_client.h" #include "common/gpid.h" #include "replica_admin_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "shell/argh.h" #include "shell/command_executor.h" #include "shell/command_utils.h" @@ -100,10 +100,10 @@ bool detect_hotkey(command_executor *e, shell_context *sc, arguments args) return false; } - dsn::rpc_address target_address; + dsn::host_port target_hp; std::string err_info; std::string ip_str = cmd({"-d", "--address"}).str(); - if (!validate_ip(sc, ip_str, target_address, err_info)) { + if (!validate_ip(sc, ip_str, target_hp, err_info)) { fmt::print(stderr, "{}\n", err_info); return false; } @@ -118,7 +118,7 @@ bool detect_hotkey(command_executor *e, shell_context *sc, arguments args) } detect_hotkey_response resp; - auto err = sc->ddl_client->detect_hotkey(dsn::rpc_address(target_address), req, resp); + auto err = sc->ddl_client->detect_hotkey(target_hp, req, resp); if (err != dsn::ERR_OK) { fmt::print(stderr, "Hotkey detection rpc sending failed, in {}.{}, error_hint:{}\n", diff --git a/src/shell/commands/node_management.cpp b/src/shell/commands/node_management.cpp index fd9369b5c0..05d897c280 100644 --- a/src/shell/commands/node_management.cpp +++ b/src/shell/commands/node_management.cpp @@ -37,7 +37,9 @@ #include "dsn.layer2_types.h" #include "meta_admin_types.h" #include "perf_counter/perf_counter_utils.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "shell/command_executor.h" #include "shell/command_helper.h" #include "shell/command_utils.h" @@ -47,7 +49,6 @@ #include "utils/error_code.h" #include "utils/output_utils.h" #include "utils/strings.h" -#include "utils/utils.h" bool query_cluster_info(command_executor *e, shell_context *sc, arguments args) { @@ -83,7 +84,7 @@ bool query_cluster_info(command_executor *e, shell_context *sc, arguments args) ::dsn::error_code err = sc->ddl_client->cluster_info(out_file, resolve_ip, json); if (err != ::dsn::ERR_OK) { - std::cout << "get cluster info failed, error=" << err.to_string() << std::endl; + std::cout << "get cluster info failed, error=" << err << std::endl; } return true; } @@ -162,24 +163,23 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args) status.c_str()); } - std::map nodes; + std::map nodes; auto r = sc->ddl_client->list_nodes(s, nodes); if (r != dsn::ERR_OK) { - std::cout << "list nodes failed, error=" << r.to_string() << std::endl; + std::cout << "list nodes failed, error=" << r << std::endl; return true; } - std::map tmp_map; + std::map tmp_map; int alive_node_count = 0; for (auto &kv : nodes) { if (kv.second == dsn::replication::node_status::NS_ALIVE) alive_node_count++; std::string status_str = dsn::enum_to_string(kv.second); status_str = status_str.substr(status_str.find("NS_") + 3); - std::string node_name = kv.first.to_std_string(); + std::string node_name = kv.first.to_string(); if (resolve_ip) { - // TODO: put hostname_from_ip_port into common utils - dsn::utils::hostname_from_ip_port(node_name.c_str(), &node_name); + node_name = sc->resolver->resolve_address(kv.first).to_string(); } tmp_map.emplace(kv.first, list_nodes_helper(node_name, status_str)); } @@ -188,7 +188,7 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args) std::vector<::dsn::app_info> apps; r = sc->ddl_client->list_apps(dsn::app_status::AS_AVAILABLE, apps); if (r != dsn::ERR_OK) { - std::cout << "list apps failed, error=" << r.to_string() << std::endl; + std::cout << "list apps failed, error=" << r << std::endl; return true; } @@ -198,20 +198,19 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args) std::vector partitions; r = sc->ddl_client->list_app(app.app_name, app_id, partition_count, partitions); if (r != dsn::ERR_OK) { - std::cout << "list app " << app.app_name << " failed, error=" << r.to_string() - << std::endl; + std::cout << "list app " << app.app_name << " failed, error=" << r << std::endl; return true; } for (const dsn::partition_configuration &p : partitions) { - if (!p.primary.is_invalid()) { - auto find = tmp_map.find(p.primary); + if (!p.hp_primary.is_invalid()) { + auto find = tmp_map.find(p.hp_primary); if (find != tmp_map.end()) { find->second.primary_count++; } } - for (const dsn::rpc_address &addr : p.secondaries) { - auto find = tmp_map.find(addr); + for (const auto &hp : p.hp_secondaries) { + auto find = tmp_map.find(hp); if (find != tmp_map.end()) { find->second.secondary_count++; } @@ -239,24 +238,23 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args) "replica*app.pegasus*rdb.index_and_filter_blocks.memory_usage"}); for (int i = 0; i < nodes.size(); ++i) { - dsn::rpc_address node_addr = nodes[i].address; - auto tmp_it = tmp_map.find(node_addr); + const auto &hp = nodes[i].hp; + auto tmp_it = tmp_map.find(hp); if (tmp_it == tmp_map.end()) continue; if (!results[i].first) { - std::cout << "query perf counter info from node " << node_addr.to_string() - << " failed" << std::endl; + std::cout << "query perf counter info from node " << hp << " failed" << std::endl; return true; } dsn::perf_counter_info info; dsn::blob bb(results[i].second.data(), 0, results[i].second.size()); if (!dsn::json::json_forwarder::decode(bb, info)) { - std::cout << "decode perf counter info from node " << node_addr.to_string() + std::cout << "decode perf counter info from node " << hp << " failed, result = " << results[i].second << std::endl; return true; } if (info.result != "OK") { - std::cout << "query perf counter info from node " << node_addr.to_string() + std::cout << "query perf counter info from node " << hp << " returns error, error = " << info.result << std::endl; return true; } @@ -299,24 +297,23 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args) "replica*app.pegasus*recent.write.cu"}); for (int i = 0; i < nodes.size(); ++i) { - dsn::rpc_address node_addr = nodes[i].address; - auto tmp_it = tmp_map.find(node_addr); + const auto &hp = nodes[i].hp; + auto tmp_it = tmp_map.find(hp); if (tmp_it == tmp_map.end()) continue; if (!results[i].first) { - std::cout << "query perf counter info from node " << node_addr.to_string() - << " failed" << std::endl; + std::cout << "query perf counter info from node " << hp << " failed" << std::endl; return true; } dsn::perf_counter_info info; dsn::blob bb(results[i].second.data(), 0, results[i].second.size()); if (!dsn::json::json_forwarder::decode(bb, info)) { - std::cout << "decode perf counter info from node " << node_addr.to_string() + std::cout << "decode perf counter info from node " << hp << " failed, result = " << results[i].second << std::endl; return true; } if (info.result != "OK") { - std::cout << "query perf counter info from node " << node_addr.to_string() + std::cout << "query perf counter info from node " << hp << " returns error, error = " << info.result << std::endl; return true; } @@ -358,24 +355,23 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args) "zion*profiler*RPC_RRDB_RRDB_MULTI_PUT.latency.server"}); for (int i = 0; i < nodes.size(); ++i) { - dsn::rpc_address node_addr = nodes[i].address; - auto tmp_it = tmp_map.find(node_addr); + const auto &hp = nodes[i].hp; + auto tmp_it = tmp_map.find(hp); if (tmp_it == tmp_map.end()) continue; if (!results[i].first) { - std::cout << "query perf counter info from node " << node_addr.to_string() - << " failed" << std::endl; + std::cout << "query perf counter info from node " << hp << " failed" << std::endl; return true; } dsn::perf_counter_info info; dsn::blob bb(results[i].second.data(), 0, results[i].second.size()); if (!dsn::json::json_forwarder::decode(bb, info)) { - std::cout << "decode perf counter info from node " << node_addr.to_string() + std::cout << "decode perf counter info from node " << hp << " failed, result = " << results[i].second << std::endl; return true; } if (info.result != "OK") { - std::cout << "query perf counter info from node " << node_addr.to_string() + std::cout << "query perf counter info from node " << hp << " returns error, error = " << info.result << std::endl; return true; } @@ -579,8 +575,8 @@ bool remote_command(command_executor *e, shell_context *sc, arguments args) } for (std::string &token : tokens) { - dsn::rpc_address node; - if (!node.from_string_ipv4(token.c_str())) { + dsn::host_port node; + if (!node.from_string(token)) { fprintf(stderr, "parse %s as a ip:port node failed\n", token.c_str()); return true; } @@ -604,9 +600,9 @@ bool remote_command(command_executor *e, shell_context *sc, arguments args) node_desc &n = node_list[i]; std::string hostname; if (resolve_ip) { - dsn::utils::hostname_from_ip_port(n.address.to_string(), &hostname); + hostname = sc->resolver->resolve_address(n.hp).to_string(); } else { - hostname = n.address.to_string(); + hostname = n.hp.to_string(); } fprintf(stderr, "CALL [%s] [%s] ", n.desc.c_str(), hostname.c_str()); if (results[i].first) { diff --git a/src/shell/commands/rebalance.cpp b/src/shell/commands/rebalance.cpp index 43a591cfc4..25bbbbc1b2 100644 --- a/src/shell/commands/rebalance.cpp +++ b/src/shell/commands/rebalance.cpp @@ -31,7 +31,9 @@ #include "client/replication_ddl_client.h" #include "common/gpid.h" #include "meta_admin_types.h" +#include "runtime/rpc/dns_resolver.h" #include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "shell/command_executor.h" #include "shell/command_helper.h" #include "shell/command_utils.h" @@ -88,7 +90,7 @@ bool propose(command_executor *e, shell_context *sc, arguments args) dverify(args.argc >= 9); dsn::replication::configuration_balancer_request request; request.gpid.set_app_id(-1); - dsn::rpc_address target, node; + dsn::host_port target, node; std::string proposal_type = "CT_"; request.force = false; bool ans; @@ -113,10 +115,10 @@ bool propose(command_executor *e, shell_context *sc, arguments args) break; case 't': verify_logged( - target.from_string_ipv4(optarg), "parse %s as target_address failed\n", optarg); + target.from_string(optarg), "parse %s as target_address failed\n", optarg); break; case 'n': - verify_logged(node.from_string_ipv4(optarg), "parse %s as node failed\n", optarg); + verify_logged(node.from_string(optarg), "parse %s as node failed\n", optarg); break; default: return false; @@ -131,7 +133,11 @@ bool propose(command_executor *e, shell_context *sc, arguments args) type_from_string(_config_type_VALUES_TO_NAMES, proposal_type, config_type::CT_INVALID); verify_logged( tp != config_type::CT_INVALID, "parse %s as config_type failed.\n", proposal_type.c_str()); - request.action_list = {new_proposal_action(target, node, tp)}; + request.action_list = {new_proposal_action(sc->resolver->resolve_address(target), + sc->resolver->resolve_address(node), + target, + node, + tp)}; dsn::error_code err = sc->ddl_client->send_balancer_proposal(request); std::cout << "send proposal response: " << err.to_string() << std::endl; return true; @@ -151,7 +157,7 @@ bool balance(command_executor *e, shell_context *sc, arguments args) dsn::replication::configuration_balancer_request request; request.gpid.set_app_id(-1); std::string balance_type; - dsn::rpc_address from, to; + dsn::host_port from, to; bool ans; optind = 0; @@ -173,13 +179,13 @@ bool balance(command_executor *e, shell_context *sc, arguments args) balance_type = optarg; break; case 'f': - if (!from.from_string_ipv4(optarg)) { + if (!from.from_string(optarg)) { fprintf(stderr, "parse %s as from_address failed\n", optarg); return false; } break; case 't': - if (!to.from_string_ipv4(optarg)) { + if (!to.from_string(optarg)) { fprintf(stderr, "parse %s as target_address failed\n", optarg); return false; } @@ -191,20 +197,31 @@ bool balance(command_executor *e, shell_context *sc, arguments args) std::vector &actions = request.action_list; actions.reserve(4); + auto from_addr = sc->resolver->resolve_address(from); + auto to_addr = sc->resolver->resolve_address(to); if (balance_type == "move_pri") { + actions.emplace_back(new_proposal_action( + from_addr, from_addr, from, from, config_type::CT_DOWNGRADE_TO_SECONDARY)); actions.emplace_back( - new_proposal_action(from, from, config_type::CT_DOWNGRADE_TO_SECONDARY)); - actions.emplace_back(new_proposal_action(to, to, config_type::CT_UPGRADE_TO_PRIMARY)); + new_proposal_action(to_addr, to_addr, to, to, config_type::CT_UPGRADE_TO_PRIMARY)); } else if (balance_type == "copy_pri") { - actions.emplace_back(new_proposal_action(from, to, config_type::CT_ADD_SECONDARY_FOR_LB)); + actions.emplace_back(new_proposal_action( + from_addr, to_addr, from, to, config_type::CT_ADD_SECONDARY_FOR_LB)); + actions.emplace_back(new_proposal_action( + from_addr, from_addr, from, from, config_type::CT_DOWNGRADE_TO_SECONDARY)); actions.emplace_back( - new_proposal_action(from, from, config_type::CT_DOWNGRADE_TO_SECONDARY)); - actions.emplace_back(new_proposal_action(to, to, config_type::CT_UPGRADE_TO_PRIMARY)); + new_proposal_action(to_addr, to_addr, to, to, config_type::CT_UPGRADE_TO_PRIMARY)); } else if (balance_type == "copy_sec") { - actions.emplace_back( - new_proposal_action(dsn::rpc_address(), to, config_type::CT_ADD_SECONDARY_FOR_LB)); - actions.emplace_back( - new_proposal_action(dsn::rpc_address(), from, config_type::CT_DOWNGRADE_TO_INACTIVE)); + actions.emplace_back(new_proposal_action(dsn::rpc_address(), + to_addr, + dsn::host_port(), + to, + config_type::CT_ADD_SECONDARY_FOR_LB)); + actions.emplace_back(new_proposal_action(dsn::rpc_address(), + from_addr, + dsn::host_port(), + from, + config_type::CT_DOWNGRADE_TO_INACTIVE)); } else { fprintf(stderr, "parse %s as a balance type failed\n", balance_type.c_str()); return false; diff --git a/src/shell/commands/recovery.cpp b/src/shell/commands/recovery.cpp index 8df98616c6..3875bb947d 100644 --- a/src/shell/commands/recovery.cpp +++ b/src/shell/commands/recovery.cpp @@ -32,7 +32,8 @@ #include "common/gpid.h" #include "dsn.layer2_types.h" #include "meta_admin_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/dns_resolver.h" +#include "runtime/rpc/rpc_host_port.h" #include "shell/command_executor.h" #include "shell/command_helper.h" #include "shell/commands.h" @@ -106,7 +107,7 @@ bool recover(command_executor *e, shell_context *sc, arguments args) return false; } - std::vector node_list; + std::vector node_list; if (!node_list_str.empty()) { std::vector tokens; dsn::utils::split_args(node_list_str.c_str(), tokens, ','); @@ -116,8 +117,8 @@ bool recover(command_executor *e, shell_context *sc, arguments args) } for (std::string &token : tokens) { - dsn::rpc_address node; - if (!node.from_string_ipv4(token.c_str())) { + dsn::host_port node; + if (!node.from_string(token)) { fprintf(stderr, "parse %s as a ip:port node failed\n", token.c_str()); return true; } @@ -137,8 +138,8 @@ bool recover(command_executor *e, shell_context *sc, arguments args) boost::trim(str); if (str.empty() || str[0] == '#' || str[0] == ';') continue; - dsn::rpc_address node; - if (!node.from_string_ipv4(str.c_str())) { + dsn::host_port node; + if (!node.from_string(str)) { fprintf(stderr, "parse %s at file %s line %d as ip:port failed\n", str.c_str(), @@ -163,20 +164,18 @@ bool recover(command_executor *e, shell_context *sc, arguments args) return true; } -dsn::rpc_address diagnose_recommend(const ddd_partition_info &pinfo); - -dsn::rpc_address diagnose_recommend(const ddd_partition_info &pinfo) +dsn::host_port diagnose_recommend(const ddd_partition_info &pinfo) { - if (pinfo.config.last_drops.size() < 2) - return dsn::rpc_address(); + if (pinfo.config.hp_last_drops.size() < 2) + return dsn::host_port(); - std::vector last_two_nodes(pinfo.config.last_drops.end() - 2, - pinfo.config.last_drops.end()); + std::vector last_two_nodes(pinfo.config.hp_last_drops.end() - 2, + pinfo.config.hp_last_drops.end()); std::vector last_dropped; for (auto &node : last_two_nodes) { auto it = std::find_if(pinfo.dropped.begin(), pinfo.dropped.end(), - [&node](const ddd_node_info &r) { return r.node == node; }); + [&node](const ddd_node_info &r) { return r.hp_node == node; }); if (it->is_alive && it->is_collected) last_dropped.push_back(*it); } @@ -184,7 +183,7 @@ dsn::rpc_address diagnose_recommend(const ddd_partition_info &pinfo) if (last_dropped.size() == 1) { const ddd_node_info &ninfo = last_dropped.back(); if (ninfo.last_committed_decree >= pinfo.config.last_committed_decree) - return ninfo.node; + return ninfo.hp_node; } else if (last_dropped.size() == 2) { const ddd_node_info &secondary = last_dropped.front(); const ddd_node_info &latest = last_dropped.back(); @@ -195,18 +194,18 @@ dsn::rpc_address diagnose_recommend(const ddd_partition_info &pinfo) if (latest.last_committed_decree == secondary.last_committed_decree && latest.last_committed_decree >= pinfo.config.last_committed_decree) - return latest.ballot >= secondary.ballot ? latest.node : secondary.node; + return latest.ballot >= secondary.ballot ? latest.hp_node : secondary.hp_node; if (latest.last_committed_decree > secondary.last_committed_decree && latest.last_committed_decree >= pinfo.config.last_committed_decree) - return latest.node; + return latest.hp_node; if (secondary.last_committed_decree > latest.last_committed_decree && secondary.last_committed_decree >= pinfo.config.last_committed_decree) - return secondary.node; + return secondary.hp_node; } - return dsn::rpc_address(); + return dsn::host_port(); } bool ddd_diagnose(command_executor *e, shell_context *sc, arguments args) @@ -287,37 +286,40 @@ bool ddd_diagnose(command_executor *e, shell_context *sc, arguments args) out << " config: ballot(" << pinfo.config.ballot << "), " << "last_committed(" << pinfo.config.last_committed_decree << ")" << std::endl; out << " ----" << std::endl; - dsn::rpc_address latest_dropped, secondary_latest_dropped; - if (pinfo.config.last_drops.size() > 0) - latest_dropped = pinfo.config.last_drops[pinfo.config.last_drops.size() - 1]; - if (pinfo.config.last_drops.size() > 1) - secondary_latest_dropped = pinfo.config.last_drops[pinfo.config.last_drops.size() - 2]; + dsn::host_port latest_dropped, secondary_latest_dropped; + if (pinfo.config.hp_last_drops.size() > 0) + latest_dropped = pinfo.config.hp_last_drops[pinfo.config.hp_last_drops.size() - 1]; + if (pinfo.config.hp_last_drops.size() > 1) + secondary_latest_dropped = + pinfo.config.hp_last_drops[pinfo.config.hp_last_drops.size() - 2]; int j = 0; for (const ddd_node_info &n : pinfo.dropped) { + dsn::host_port hp_node; + GET_HOST_PORT(n, node, hp_node); char time_buf[30] = {0}; ::dsn::utils::time_ms_to_string(n.drop_time_ms, time_buf); out << " dropped[" << j++ << "]: " - << "node(" << n.node.to_string() << "), " + << "node(" << hp_node << "), " << "drop_time(" << time_buf << "), " << "alive(" << (n.is_alive ? "true" : "false") << "), " << "collected(" << (n.is_collected ? "true" : "false") << "), " << "ballot(" << n.ballot << "), " << "last_committed(" << n.last_committed_decree << "), " << "last_prepared(" << n.last_prepared_decree << ")"; - if (n.node == latest_dropped) + if (hp_node == latest_dropped) out << " <== the latest"; - else if (n.node == secondary_latest_dropped) + else if (hp_node == secondary_latest_dropped) out << " <== the secondary latest"; out << std::endl; } out << " ----" << std::endl; j = 0; - for (const ::dsn::rpc_address &r : pinfo.config.last_drops) { + for (const ::dsn::host_port &r : pinfo.config.hp_last_drops) { out << " last_drops[" << j++ << "]: " << "node(" << r.to_string() << ")"; - if (j == (int)pinfo.config.last_drops.size() - 1) + if (j == (int)pinfo.config.hp_last_drops.size() - 1) out << " <== the secondary latest"; - else if (j == (int)pinfo.config.last_drops.size()) + else if (j == (int)pinfo.config.hp_last_drops.size()) out << " <== the latest"; out << std::endl; } @@ -326,7 +328,7 @@ bool ddd_diagnose(command_executor *e, shell_context *sc, arguments args) if (diagnose) { out << " ----" << std::endl; - dsn::rpc_address primary = diagnose_recommend(pinfo); + dsn::host_port primary = diagnose_recommend(pinfo); out << " recommend_primary: " << (primary.is_invalid() ? "none" : primary.to_string()); if (primary == latest_dropped) @@ -344,7 +346,7 @@ bool ddd_diagnose(command_executor *e, shell_context *sc, arguments args) if (c == 'y') { break; } else if (c == 'n') { - primary.set_invalid(); + primary.reset(); break; } else if (c == 's') { skip_this = true; @@ -360,7 +362,7 @@ bool ddd_diagnose(command_executor *e, shell_context *sc, arguments args) std::cout << " > Please input the primary node: "; std::string addr; std::cin >> addr; - if (primary.from_string_ipv4(addr.c_str())) { + if (primary.from_string(addr)) { break; } else { std::cout << " > Sorry, you have input an invalid node address." @@ -372,8 +374,9 @@ bool ddd_diagnose(command_executor *e, shell_context *sc, arguments args) if (!primary.is_invalid() && !skip_this) { dsn::replication::configuration_balancer_request request; request.gpid = pinfo.config.pid; - request.action_list = { - new_proposal_action(primary, primary, config_type::CT_ASSIGN_PRIMARY)}; + auto primary_hp = sc->resolver->resolve_address(primary); + request.action_list = {new_proposal_action( + primary_hp, primary_hp, primary, primary, config_type::CT_ASSIGN_PRIMARY)}; request.force = false; dsn::error_code err = sc->ddl_client->send_balancer_proposal(request); out << " propose_request: propose -g " << request.gpid.to_string() diff --git a/src/shell/commands/table_management.cpp b/src/shell/commands/table_management.cpp index 806a7a13dc..01c3d21ff7 100644 --- a/src/shell/commands/table_management.cpp +++ b/src/shell/commands/table_management.cpp @@ -40,7 +40,7 @@ #include "meta_admin_types.h" #include "pegasus_utils.h" #include "perf_counter/perf_counter_utils.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "shell/command_executor.h" #include "shell/command_helper.h" #include "shell/command_utils.h" @@ -269,24 +269,23 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) {fmt::format("replica*app.pegasus*disk.storage.sst(MB)@{}.", app_id), fmt::format("replica*app.pegasus*disk.storage.sst.count@{}.", app_id)}); - std::map> disk_map; - std::map> count_map; + std::map> disk_map; + std::map> count_map; for (int i = 0; i < nodes.size(); ++i) { if (!results[i].first) { - std::cout << "ERROR: query perf counter from node " << nodes[i].address.to_string() + std::cout << "ERROR: query perf counter from node " << nodes[i].hp << " failed" << std::endl; return true; } dsn::perf_counter_info info; dsn::blob bb(results[i].second.data(), 0, results[i].second.size()); if (!dsn::json::json_forwarder::decode(bb, info)) { - std::cout << "ERROR: decode perf counter info from node " - << nodes[i].address.to_string() << " failed, result = " << results[i].second - << std::endl; + std::cout << "ERROR: decode perf counter info from node " << nodes[i].hp + << " failed, result = " << results[i].second << std::endl; return true; } if (info.result != "OK") { - std::cout << "ERROR: query perf counter info from node " << nodes[i].address.to_string() + std::cout << "ERROR: query perf counter info from node " << nodes[i].hp << " returns error, error = " << info.result << std::endl; return true; } @@ -297,9 +296,9 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) m.name, app_id_x, partition_index_x, counter_name); CHECK(parse_ret, "name = {}", m.name); if (m.name.find("sst(MB)") != std::string::npos) { - disk_map[nodes[i].address][partition_index_x] = m.value; + disk_map[nodes[i].hp][partition_index_x] = m.value; } else if (m.name.find("sst.count") != std::string::npos) { - count_map[nodes[i].address][partition_index_x] = m.value; + count_map[nodes[i].hp][partition_index_x] = m.value; } } } @@ -325,10 +324,10 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) for (int i = 0; i < partitions.size(); i++) { const dsn::partition_configuration &p = partitions[i]; int replica_count = 0; - if (!p.primary.is_invalid()) { + if (!p.hp_primary.is_invalid()) { replica_count++; } - replica_count += p.secondaries.size(); + replica_count += p.hp_secondaries.size(); std::string replica_count_str; { std::stringstream oss; @@ -336,10 +335,10 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) replica_count_str = oss.str(); } std::string primary_str("-"); - if (!p.primary.is_invalid()) { + if (!p.hp_primary.is_invalid()) { bool disk_found = false; double disk_value = 0; - auto f1 = disk_map.find(p.primary); + auto f1 = disk_map.find(p.hp_primary); if (f1 != disk_map.end()) { auto &sub_map = f1->second; auto f2 = sub_map.find(p.pid.get_partition_index()); @@ -354,7 +353,7 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) } bool count_found = false; double count_value = 0; - auto f3 = count_map.find(p.primary); + auto f3 = count_map.find(p.hp_primary); if (f3 != count_map.end()) { auto &sub_map = f3->second; auto f3 = sub_map.find(p.pid.get_partition_index()); @@ -365,11 +364,11 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) } std::stringstream oss; std::string hostname; - std::string ip = p.primary.to_string(); + std::string ip = p.hp_primary.to_string(); if (resolve_ip && dsn::utils::hostname_from_ip_port(ip.c_str(), &hostname)) { oss << hostname << "("; } else { - oss << p.primary.to_string() << "("; + oss << p.hp_primary << "("; }; if (disk_found) oss << disk_value; @@ -387,12 +386,12 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) { std::stringstream oss; oss << "["; - for (int j = 0; j < p.secondaries.size(); j++) { + for (int j = 0; j < p.hp_secondaries.size(); j++) { if (j != 0) oss << ","; bool found = false; double value = 0; - auto f1 = disk_map.find(p.secondaries[j]); + auto f1 = disk_map.find(p.hp_secondaries[j]); if (f1 != disk_map.end()) { auto &sub_map = f1->second; auto f2 = sub_map.find(p.pid.get_partition_index()); @@ -405,7 +404,7 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) } bool count_found = false; double count_value = 0; - auto f3 = count_map.find(p.secondaries[j]); + auto f3 = count_map.find(p.hp_secondaries[j]); if (f3 != count_map.end()) { auto &sub_map = f3->second; auto f3 = sub_map.find(p.pid.get_partition_index()); @@ -416,11 +415,11 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) } std::string hostname; - std::string ip = p.secondaries[j].to_string(); + std::string ip = p.hp_secondaries[j].to_string(); if (resolve_ip && dsn::utils::hostname_from_ip_port(ip.c_str(), &hostname)) { oss << hostname << "("; } else { - oss << p.secondaries[j].to_string() << "("; + oss << p.hp_secondaries[j] << "("; }; if (found) oss << value; diff --git a/src/test/function_test/base_api/test_batch_get.cpp b/src/test/function_test/base_api/test_batch_get.cpp index da4f9bf3ca..85feb06587 100644 --- a/src/test/function_test/base_api/test_batch_get.cpp +++ b/src/test/function_test/base_api/test_batch_get.cpp @@ -33,6 +33,7 @@ #include "client/partition_resolver.h" #include "gtest/gtest.h" #include "include/rrdb/rrdb.client.h" +#include "runtime/rpc/dns_resolver.h" #include "test/function_test/utils/test_util.h" #include "utils/blob.h" #include "utils/error_code.h" @@ -48,8 +49,9 @@ class batch_get : public test_util TEST_F(batch_get, set_and_then_batch_get) { - auto rrdb_client = - new ::dsn::apps::rrdb_client(kClusterName.c_str(), meta_list_, table_name_.c_str()); + auto dns_resolver = std::make_shared(); + auto rrdb_client = new ::dsn::apps::rrdb_client( + kClusterName.c_str(), meta_list_, table_name_.c_str(), dns_resolver); int test_data_count = 100; int test_timeout_milliseconds = 3000; diff --git a/src/test/function_test/config.ini b/src/test/function_test/config.ini index 21ede3ff24..34fb4a9383 100644 --- a/src/test/function_test/config.ini +++ b/src/test/function_test/config.ini @@ -75,8 +75,8 @@ rpc_timeout_milliseconds = 5000 lb_interval_ms = 3000 [pegasus.clusters] -onebox = 127.0.0.1:34601,127.0.0.1:34602,127.0.0.1:34603 -single_master_cluster = 127.0.0.1:34601 +onebox = @LOCAL_HOSTNAME@:34601,@LOCAL_HOSTNAME@:34602,@LOCAL_HOSTNAME@:34603 +single_master_cluster = @LOCAL_HOSTNAME@:34601 [pegasus.server] encrypt_data_at_rest = false diff --git a/src/test/function_test/detect_hotspot/test_detect_hotspot.cpp b/src/test/function_test/detect_hotspot/test_detect_hotspot.cpp index 659d379053..56fbc75391 100644 --- a/src/test/function_test/detect_hotspot/test_detect_hotspot.cpp +++ b/src/test/function_test/detect_hotspot/test_detect_hotspot.cpp @@ -98,8 +98,9 @@ class detect_hotspot_test : public test_util dsn::replication::detect_hotkey_response resp; for (int partition_index = 0; partition_index < partitions_.size(); partition_index++) { req.pid = dsn::gpid(table_id_, partition_index); - ASSERT_EQ(dsn::ERR_OK, - ddl_client_->detect_hotkey(partitions_[partition_index].primary, req, resp)); + ASSERT_EQ( + dsn::ERR_OK, + ddl_client_->detect_hotkey(partitions_[partition_index].hp_primary, req, resp)); if (!resp.hotkey_result.empty()) { find_hotkey = true; break; @@ -118,16 +119,18 @@ class detect_hotspot_test : public test_util req.action = dsn::replication::detect_action::STOP; for (int partition_index = 0; partition_index < partitions_.size(); partition_index++) { - ASSERT_EQ(dsn::ERR_OK, - ddl_client_->detect_hotkey(partitions_[partition_index].primary, req, resp)); + ASSERT_EQ( + dsn::ERR_OK, + ddl_client_->detect_hotkey(partitions_[partition_index].hp_primary, req, resp)); ASSERT_EQ(dsn::ERR_OK, resp.err); } req.action = dsn::replication::detect_action::QUERY; for (int partition_index = 0; partition_index < partitions_.size(); partition_index++) { req.pid = dsn::gpid(table_id_, partition_index); - ASSERT_EQ(dsn::ERR_OK, - ddl_client_->detect_hotkey(partitions_[partition_index].primary, req, resp)); + ASSERT_EQ( + dsn::ERR_OK, + ddl_client_->detect_hotkey(partitions_[partition_index].hp_primary, req, resp)); ASSERT_EQ("Can't get hotkey now, now state: hotkey_collector_state::STOPPED", resp.err_hint); } @@ -159,12 +162,12 @@ class detect_hotspot_test : public test_util dsn::replication::detect_hotkey_response resp; ASSERT_EQ(dsn::ERR_OK, - ddl_client_->detect_hotkey(partitions_[target_partition].primary, req, resp)); + ddl_client_->detect_hotkey(partitions_[target_partition].hp_primary, req, resp)); ASSERT_EQ(dsn::ERR_OK, resp.err); req.action = dsn::replication::detect_action::QUERY; ASSERT_EQ(dsn::ERR_OK, - ddl_client_->detect_hotkey(partitions_[target_partition].primary, req, resp)); + ddl_client_->detect_hotkey(partitions_[target_partition].hp_primary, req, resp)); ASSERT_EQ("Can't get hotkey now, now state: hotkey_collector_state::COARSE_DETECTING", resp.err_hint); @@ -175,7 +178,7 @@ class detect_hotspot_test : public test_util req.action = dsn::replication::detect_action::QUERY; ASSERT_EQ(dsn::ERR_OK, - ddl_client_->detect_hotkey(partitions_[target_partition].primary, req, resp)); + ddl_client_->detect_hotkey(partitions_[target_partition].hp_primary, req, resp)); ASSERT_EQ("Can't get hotkey now, now state: hotkey_collector_state::STOPPED", resp.err_hint); } diff --git a/src/test/function_test/recovery/test_recovery.cpp b/src/test/function_test/recovery/test_recovery.cpp index 4794498841..0bd6f4a4e4 100644 --- a/src/test/function_test/recovery/test_recovery.cpp +++ b/src/test/function_test/recovery/test_recovery.cpp @@ -18,6 +18,7 @@ */ #include +#include #include #include #include @@ -31,8 +32,7 @@ #include "gtest/gtest.h" #include "include/pegasus/client.h" #include "pegasus/error.h" -#include "runtime/rpc/rpc_address.h" -#include "test/function_test/utils/global_env.h" +#include "runtime/rpc/rpc_host_port.h" #include "test/function_test/utils/test_util.h" #include "utils/error_code.h" #include "utils/rand.h" @@ -70,13 +70,15 @@ class recovery_test : public test_util // cluster has only one meta server, while "onebox" means the cluster has 3 meta servers. recovery_test() : test_util(std::map(), "single_master_cluster") {} - std::vector get_rpc_address_list(const std::vector ports) + std::vector get_rpc_host_port_list(const std::vector ports) { - std::vector result; + std::vector result; result.reserve(ports.size()); for (const int &p : ports) { - dsn::rpc_address address(global_env::instance()._host_ip.c_str(), p); - result.push_back(address); + char hostname[1024]; + gethostname(hostname, 1024); + dsn::host_port hp(hostname, p); + result.push_back(hp); } return result; } @@ -187,7 +189,7 @@ TEST_F(recovery_test, recovery) std::this_thread::sleep_for(std::chrono::seconds(10)); // then do recovery - auto nodes = get_rpc_address_list({34801, 34802, 34803}); + auto nodes = get_rpc_host_port_list({34801, 34802, 34803}); ASSERT_EQ(dsn::ERR_OK, ddl_client_->do_recovery(nodes, 30, false, false, std::string())); // send another recovery command @@ -216,7 +218,7 @@ TEST_F(recovery_test, recovery) std::this_thread::sleep_for(std::chrono::seconds(10)); // recovery only from 1 & 2 - std::vector nodes = get_rpc_address_list({34801, 34802}); + auto nodes = get_rpc_host_port_list({34801, 34802}); ASSERT_EQ(dsn::ERR_OK, ddl_client_->do_recovery(nodes, 30, false, false, std::string())); // then wait the app to ready @@ -247,7 +249,7 @@ TEST_F(recovery_test, recovery) std::this_thread::sleep_for(std::chrono::seconds(10)); // then do recovery - auto nodes = get_rpc_address_list({34801, 34802, 34803}); + auto nodes = get_rpc_host_port_list({34801, 34802, 34803}); ASSERT_EQ(dsn::ERR_OK, ddl_client_->do_recovery(nodes, 30, false, false, std::string())); // then wait the apps to ready @@ -277,7 +279,7 @@ TEST_F(recovery_test, recovery) std::this_thread::sleep_for(std::chrono::seconds(10)); // then do recovery - auto nodes = get_rpc_address_list({34801, 34802, 34803}); + auto nodes = get_rpc_host_port_list({34801, 34802, 34803}); ASSERT_EQ(dsn::ERR_OK, ddl_client_->do_recovery(nodes, 30, false, false, std::string())); // then wait the apps to ready diff --git a/src/test/function_test/utils/test_util.cpp b/src/test/function_test/utils/test_util.cpp index 5d061582c6..b1e429beec 100644 --- a/src/test/function_test/utils/test_util.cpp +++ b/src/test/function_test/utils/test_util.cpp @@ -89,6 +89,7 @@ void test_util::SetUp() ddl_client_ = std::make_shared(meta_list_); ASSERT_TRUE(ddl_client_ != nullptr); ddl_client_->set_max_wait_app_ready_secs(120); + ddl_client_->set_meta_servers_leader(); dsn::error_code ret = ddl_client_->create_app(table_name_, "pegasus", partition_count_, 3, kCreateEnvs, false); diff --git a/src/test/function_test/utils/test_util.h b/src/test/function_test/utils/test_util.h index a0d98ce19a..8e3a1663ce 100644 --- a/src/test/function_test/utils/test_util.h +++ b/src/test/function_test/utils/test_util.h @@ -28,7 +28,7 @@ #include #include "dsn.layer2_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" // TODO(yingchun): it's too tricky, but I don't know how does it happen, we can fix it later. #define TRICKY_CODE_TO_AVOID_LINK_ERROR \ @@ -117,7 +117,7 @@ class test_util : public ::testing::Test int32_t partition_count_ = 8; std::vector partitions_; pegasus_client *client_ = nullptr; - std::vector meta_list_; + std::vector meta_list_; std::shared_ptr ddl_client_; }; } // namespace pegasus diff --git a/src/test/kill_test/kill_testor.cpp b/src/test/kill_test/kill_testor.cpp index 6c1019aef1..7bd1e0c632 100644 --- a/src/test/kill_test/kill_testor.cpp +++ b/src/test/kill_test/kill_testor.cpp @@ -32,7 +32,6 @@ #include "common/gpid.h" #include "common/replication_other_types.h" #include "kill_testor.h" -#include "runtime/rpc/rpc_address.h" #include "utils/error_code.h" #include "utils/flags.h" #include "utils/fmt_logging.h" @@ -112,22 +111,22 @@ dsn::error_code kill_testor::get_partition_info(bool debug_unhealthy, for (int i = 0; i < partitions.size(); i++) { const dsn::partition_configuration &p = partitions[i]; int replica_count = 0; - if (!p.primary.is_invalid()) { + if (!p.hp_primary.is_invalid()) { replica_count++; } - replica_count += p.secondaries.size(); + replica_count += p.hp_secondaries.size(); if (replica_count == p.max_replica_count) { healthy_partition_cnt++; } else { std::stringstream info; info << "gpid=" << p.pid.get_app_id() << "." << p.pid.get_partition_index() << ", "; - info << "primay=" << p.primary.to_std_string() << ", "; + info << "primay=" << p.hp_primary << ", "; info << "secondaries=["; - for (int idx = 0; idx < p.secondaries.size(); idx++) { + for (int idx = 0; idx < p.hp_secondaries.size(); idx++) { if (idx != 0) - info << "," << p.secondaries[idx].to_std_string(); + info << "," << p.hp_secondaries[idx]; else - info << p.secondaries[idx].to_std_string(); + info << p.hp_secondaries[idx]; } info << "], "; info << "last_committed_decree=" << p.last_committed_decree; diff --git a/src/test/kill_test/kill_testor.h b/src/test/kill_test/kill_testor.h index ed1a873518..f66f31540f 100644 --- a/src/test/kill_test/kill_testor.h +++ b/src/test/kill_test/kill_testor.h @@ -24,7 +24,7 @@ #include #include "dsn.layer2_types.h" -#include "runtime/rpc/rpc_address.h" +#include "runtime/rpc/rpc_host_port.h" #include "utils/error_code.h" namespace dsn { @@ -64,7 +64,7 @@ class kill_testor protected: shared_ptr ddl_client; - vector meta_list; + vector meta_list; std::vector partitions; }; diff --git a/src/utils/metrics.cpp b/src/utils/metrics.cpp index 681b6555ca..fabf8c0a51 100644 --- a/src/utils/metrics.cpp +++ b/src/utils/metrics.cpp @@ -29,8 +29,8 @@ #include "http/http_method.h" #include "http/http_status_code.h" #include "runtime/api_layer1.h" -#include "runtime/rpc/rpc_address.h" #include "runtime/rpc/rpc_engine.h" +#include "runtime/rpc/rpc_host_port.h" #include "runtime/service_app.h" #include "runtime/service_engine.h" #include "runtime/task/task.h" @@ -517,7 +517,7 @@ void encode_port(dsn::metric_json_writer &writer) writer.Key(dsn::kMetricPortField.c_str()); const auto *const rpc = dsn::task::get_current_rpc2(); - ENCODE_OBJ_VAL(rpc != nullptr, rpc->primary_address().port()); + ENCODE_OBJ_VAL(rpc != nullptr, rpc->primary_host_port().port()); } #undef ENCODE_OBJ_VAL